6254029 memcntl() MC_HAT_ADVISE with page size 0 may cause segment page sizes to be demoted
6325885 map_pgszstk() uses p->p_brkpageszc rather than p->p_stkpageszc
6371967 assign large pages to anon segment created using mmap /dev/zero
6483208 unify and cleanup OOB (out of the box) large pagesize selection code
6483216 use intermediate pagesizes to map the beginning of bss/heap and stack when it may help performance
6483226 bss size is not properly taken into account by LP OOB policy at exec() time
6483230 grow_internal() doesn't properly align stack bottom for large pages
6483231 memcntl.c: ASSERT(IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz));
6483233 provide a mechanism to enable the use of 32M text pages on OPL by default
6485171 memcntl() shouldn't silently fail when stack space is unavailable with requested pagesize
--- a/usr/src/uts/common/exec/aout/aout.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/exec/aout/aout.c Thu Oct 26 16:44:53 2006 -0700
@@ -226,6 +226,7 @@
edp.ux_bsize, edp.ux_doffset, dataprot, pagedata, 0))
goto done;
+ exenv.ex_bssbase = (caddr_t)edp.ux_datorg;
exenv.ex_brkbase = (caddr_t)edp.ux_datorg;
exenv.ex_brksize = edp.ux_dsize + edp.ux_bsize;
exenv.ex_magic = edp.ux_mag;
--- a/usr/src/uts/common/exec/elf/elf.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/exec/elf/elf.c Thu Oct 26 16:44:53 2006 -0700
@@ -1096,6 +1096,7 @@
off_t offset;
int hsize = ehdr->e_phentsize;
caddr_t mintmp = (caddr_t)-1;
+ extern int use_brk_lpg;
if (ehdr->e_type == ET_DYN) {
/*
@@ -1145,47 +1146,41 @@
page = 0;
}
+ /*
+ * Set the heap pagesize for OOB when the bss size
+ * is known and use_brk_lpg is not 0.
+ */
+ if (brksize != NULL && use_brk_lpg &&
+ zfodsz != 0 && phdr == dataphdrp &&
+ (prot & PROT_WRITE)) {
+ size_t tlen = P2NPHASE((uintptr_t)addr +
+ phdr->p_filesz, PAGESIZE);
+
+ if (zfodsz > tlen) {
+ curproc->p_brkpageszc =
+ page_szc(map_pgsz(MAPPGSZ_HEAP,
+ curproc, addr + phdr->p_filesz +
+ tlen, zfodsz - tlen, 0));
+ }
+ }
+
if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
(prot & PROT_WRITE)) {
- /*
- * segvn only uses large pages for segments
- * that have the requested large page size
- * aligned base and size. To insure the part
- * of bss that starts at heap large page size
- * boundary gets mapped by large pages create
- * 2 bss segvn segments which is accomplished
- * by calling execmap twice. First execmap
- * will create the bss segvn segment that is
- * before the large page boundary and it will
- * be mapped with base pages. If bss start is
- * already large page aligned only 1 bss
- * segment will be created. The second bss
- * segment's size is large page size aligned
- * so that segvn uses large pages for that
- * segment and it also makes the heap that
- * starts right after bss to start at large
- * page boundary.
- */
uint_t szc = curproc->p_brkpageszc;
size_t pgsz = page_get_pagesize(szc);
- caddr_t zaddr = addr + phdr->p_filesz;
- size_t zlen = P2NPHASE((uintptr_t)zaddr, pgsz);
+ caddr_t ebss = addr + phdr->p_memsz;
+ size_t extra_zfodsz;
ASSERT(pgsz > PAGESIZE);
+ extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
+
if (error = execmap(vp, addr, phdr->p_filesz,
- zlen, phdr->p_offset, prot, page, szc))
+ zfodsz + extra_zfodsz, phdr->p_offset,
+ prot, page, szc))
goto bad;
- if (zfodsz > zlen) {
- zfodsz -= zlen;
- zaddr += zlen;
- zlen = P2ROUNDUP(zfodsz, pgsz);
- if (error = execmap(vp, zaddr, 0, zlen,
- phdr->p_offset, prot, page, szc))
- goto bad;
- }
if (brksize != NULL)
- *brksize = zlen - zfodsz;
+ *brksize = extra_zfodsz;
} else {
if (error = execmap(vp, addr, phdr->p_filesz,
zfodsz, phdr->p_offset, prot, page, 0))
--- a/usr/src/uts/common/os/exec.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/os/exec.c Thu Oct 26 16:44:53 2006 -0700
@@ -89,7 +89,6 @@
uint_t auxv_hwcap32 = 0; /* 32-bit version of auxv_hwcap */
#endif
-int exec_lpg_disable = 0;
#define PSUIDFLAGS (SNOCD|SUGID)
/*
@@ -1114,7 +1113,23 @@
error = ENOMEM;
goto bad;
}
- crargs.szc = szc;
+ if (szc > 0) {
+ /*
+ * ASSERT alignment because the mapelfexec()
+ * caller for the szc > 0 case extended zfod
+ * so it's end is pgsz aligned.
+ */
+ size_t pgsz = page_get_pagesize(szc);
+ ASSERT(IS_P2ALIGNED(zfodbase + zfodlen, pgsz));
+
+ if (IS_P2ALIGNED(zfodbase, pgsz)) {
+ crargs.szc = szc;
+ } else {
+ crargs.szc = AS_MAP_HEAP;
+ }
+ } else {
+ crargs.szc = AS_MAP_NO_LPOOB;
+ }
if (error = as_map(p->p_as, (caddr_t)zfodbase,
zfodlen, segvn_create, &crargs))
goto bad;
@@ -1555,11 +1570,6 @@
return (0);
}
-#ifdef DEBUG
-int mpss_brkpgszsel = 0;
-int mpss_stkpgszsel = 0;
-#endif
-
/*
* Initialize a new user stack with the specified arguments and environment.
* The initial user stack layout is as follows:
@@ -1614,6 +1624,7 @@
rctl_entity_p_t e;
struct as *as;
+ extern int use_stk_lpg;
args->from_model = p->p_model;
if (p->p_model == DATAMODEL_NATIVE) {
@@ -1751,7 +1762,9 @@
p->p_brkbase = NULL;
p->p_brksize = 0;
+ p->p_brkpageszc = 0;
p->p_stksize = 0;
+ p->p_stkpageszc = 0;
p->p_model = args->to_model;
p->p_usrstack = usrstack;
p->p_stkprot = args->stk_prot;
@@ -1766,51 +1779,14 @@
e.rcep_t = RCENTITY_PROCESS;
rctl_set_reset(p->p_rctls, p, &e);
- if (exec_lpg_disable == 0) {
-#ifdef DEBUG
- uint_t pgsizes = page_num_pagesizes();
- uint_t szc;
-#endif
- p->p_brkpageszc = args->brkpageszc;
- p->p_stkpageszc = args->stkpageszc;
-
- if (p->p_brkpageszc == 0) {
- p->p_brkpageszc = page_szc(map_pgsz(MAPPGSZ_HEAP,
- p, 0, 0, NULL));
- }
- if (p->p_stkpageszc == 0) {
- p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK,
- p, 0, 0, NULL));
- }
+ /* Too early to call map_pgsz for the heap */
+ if (use_stk_lpg) {
+ p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, p, 0, 0, 0));
+ }
-#ifdef DEBUG
- if (mpss_brkpgszsel != 0) {
- if (mpss_brkpgszsel == -1) {
- szc = ((uint_t)gethrtime() >> 8) % pgsizes;
- } else {
- szc = mpss_brkpgszsel % pgsizes;
- }
- p->p_brkpageszc = szc;
- }
-
- if (mpss_stkpgszsel != 0) {
- if (mpss_stkpgszsel == -1) {
- szc = ((uint_t)gethrtime() >> 7) % pgsizes;
- } else {
- szc = mpss_stkpgszsel % pgsizes;
- }
- p->p_stkpageszc = szc;
- }
-
-#endif
- mutex_enter(&p->p_lock);
- p->p_flag |= SAUTOLPG; /* kernel controls page sizes */
- mutex_exit(&p->p_lock);
-
- } else {
- p->p_brkpageszc = 0;
- p->p_stkpageszc = 0;
- }
+ mutex_enter(&p->p_lock);
+ p->p_flag |= SAUTOLPG; /* kernel controls page sizes */
+ mutex_exit(&p->p_lock);
exec_set_sp(size);
--- a/usr/src/uts/common/os/grow.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/os/grow.c Thu Oct 26 16:44:53 2006 -0700
@@ -60,7 +60,6 @@
int use_brk_lpg = 1;
int use_stk_lpg = 1;
-int use_zmap_lpg = 1;
static int brk_lpg(caddr_t nva);
static int grow_lpg(caddr_t sp);
@@ -96,12 +95,11 @@
{
struct proc *p = curproc;
size_t pgsz, len;
- caddr_t addr;
+ caddr_t addr, brkend;
caddr_t bssbase = p->p_bssbase;
caddr_t brkbase = p->p_brkbase;
int oszc, szc;
int err;
- int remap = 0;
oszc = p->p_brkpageszc;
@@ -115,7 +113,7 @@
len = nva - bssbase;
- pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, &remap);
+ pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
szc = page_szc(pgsz);
/*
@@ -133,28 +131,6 @@
return (err);
}
- if (remap == 0) {
- /*
- * Map from the current brk end up to the new page size
- * alignment using the current page size.
- */
- addr = brkbase + p->p_brksize;
- addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
- if (addr < nva) {
- err = brk_internal(addr, oszc);
- /*
- * In failure case, try again if oszc is not base page
- * size, then return err.
- */
- if (err != 0) {
- if (oszc != 0) {
- err = brk_internal(nva, 0);
- }
- return (err);
- }
- }
- }
-
err = brk_internal(nva, szc);
/* If using szc failed, map with base page size and return. */
if (err != 0) {
@@ -164,16 +140,18 @@
return (err);
}
- if (remap != 0) {
- /*
- * Round up brk base to a large page boundary and remap
- * anything in the segment already faulted in beyond that
- * point.
- */
- addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
- len = (brkbase + p->p_brksize) - addr;
- /* advisory, so ignore errors */
+ /*
+ * Round up brk base to a large page boundary and remap
+ * anything in the segment already faulted in beyond that
+ * point.
+ */
+ addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
+ brkend = brkbase + p->p_brksize;
+ len = brkend - addr;
+ /* Check that len is not negative. Update page size code for heap. */
+ if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
(void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
+ p->p_brkpageszc = szc;
}
ASSERT(err == 0);
@@ -272,8 +250,26 @@
/*
* Add new zfod mapping to extend UNIX data segment
+ * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
+ * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
+ * page sizes if ova is not aligned to szc's pgsz.
*/
- crargs.szc = szc;
+ if (szc > 0) {
+ caddr_t rbss;
+
+ rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
+ pgsz);
+ if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
+ crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
+ AS_MAP_NO_LPOOB;
+ } else if (ova == rbss) {
+ crargs.szc = szc;
+ } else {
+ crargs.szc = AS_MAP_HEAP;
+ }
+ } else {
+ crargs.szc = AS_MAP_NO_LPOOB;
+ }
crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
&crargs);
@@ -288,7 +284,6 @@
(void) as_unmap(as, nva, (size_t)(ova - nva));
}
p->p_brksize = size;
- p->p_brkpageszc = szc;
return (0);
}
@@ -300,6 +295,9 @@
grow(caddr_t sp)
{
struct proc *p = curproc;
+ struct as *as = p->p_as;
+ size_t oldsize = p->p_stksize;
+ size_t newsize;
int err;
/*
@@ -307,13 +305,24 @@
* This also serves as the lock protecting p_stksize
* and p_stkpageszc.
*/
- as_rangelock(p->p_as);
+ as_rangelock(as);
if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
err = grow_lpg(sp);
} else {
err = grow_internal(sp, p->p_stkpageszc);
}
- as_rangeunlock(p->p_as);
+ as_rangeunlock(as);
+
+ if (err == 0 && (newsize = p->p_stksize) > oldsize) {
+ ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
+ ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
+ /*
+ * Set up translations so the process doesn't have to fault in
+ * the stack pages we just gave it.
+ */
+ (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
+ newsize - oldsize, F_INVAL, S_WRITE);
+ }
return ((err == 0 ? 1 : 0));
}
@@ -328,15 +337,15 @@
struct proc *p = curproc;
size_t pgsz;
size_t len, newsize;
- caddr_t addr, oldsp;
+ caddr_t addr, saddr;
+ caddr_t growend;
int oszc, szc;
int err;
- int remap = 0;
newsize = p->p_usrstack - sp;
oszc = p->p_stkpageszc;
- pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, &remap);
+ pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
szc = page_szc(pgsz);
/*
@@ -357,30 +366,8 @@
/*
* We've grown sufficiently to switch to a new page size.
- * If we're not going to remap the whole segment with the new
- * page size, split the grow into two operations: map to the new
- * page size alignment boundary with the existing page size, then
- * map the rest with the new page size.
+ * So we are going to remap the whole segment with the new page size.
*/
- err = 0;
- if (remap == 0) {
- oldsp = p->p_usrstack - p->p_stksize;
- addr = (caddr_t)P2ALIGN((uintptr_t)oldsp, pgsz);
- if (addr > sp) {
- err = grow_internal(addr, oszc);
- /*
- * In this case, grow with oszc failed, so grow all the
- * way to sp with base page size.
- */
- if (err != 0) {
- if (oszc != 0) {
- err = grow_internal(sp, 0);
- }
- return (err);
- }
- }
- }
-
err = grow_internal(sp, szc);
/* The grow with szc failed, so fall back to base page size. */
if (err != 0) {
@@ -390,22 +377,21 @@
return (err);
}
- if (remap) {
- /*
- * Round up stack pointer to a large page boundary and remap
- * any pgsz pages in the segment already faulted in beyond that
- * point.
- */
- addr = p->p_usrstack - p->p_stksize;
- addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
- len = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz) - addr;
- /* advisory, so ignore errors */
+ /*
+ * Round up stack pointer to a large page boundary and remap
+ * any pgsz pages in the segment already faulted in beyond that
+ * point.
+ */
+ saddr = p->p_usrstack - p->p_stksize;
+ addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
+ growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
+ len = growend - addr;
+ /* Check that len is not negative. Update page size code for stack. */
+ if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
(void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
+ p->p_stkpageszc = szc;
}
- /* Update page size code for stack. */
- p->p_stkpageszc = szc;
-
ASSERT(err == 0);
return (err); /* should always be 0 */
}
@@ -418,8 +404,7 @@
grow_internal(caddr_t sp, uint_t growszc)
{
struct proc *p = curproc;
- struct as *as = p->p_as;
- size_t newsize = p->p_usrstack - sp;
+ size_t newsize;
size_t oldsize;
int error;
size_t pgsz;
@@ -427,6 +412,7 @@
struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
ASSERT(sp < p->p_usrstack);
+ sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
/*
* grow to growszc alignment but use current p->p_stkpageszc for
@@ -437,7 +423,7 @@
if ((szc = growszc) != 0) {
pgsz = page_get_pagesize(szc);
ASSERT(pgsz > PAGESIZE);
- newsize = P2ROUNDUP(newsize, pgsz);
+ newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
if (newsize > (size_t)p->p_stk_ctl) {
szc = 0;
pgsz = PAGESIZE;
@@ -445,6 +431,7 @@
}
} else {
pgsz = PAGESIZE;
+ newsize = p->p_usrstack - sp;
}
if (newsize > (size_t)p->p_stk_ctl) {
@@ -455,7 +442,6 @@
}
oldsize = p->p_stksize;
- newsize = P2ROUNDUP(newsize, pgsz);
ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
if (newsize <= oldsize) { /* prevent the stack from shrinking */
@@ -466,13 +452,31 @@
crargs.prot &= ~PROT_EXEC;
}
/*
- * extend stack with the p_stkpageszc. growszc is different than
- * p_stkpageszc only on a memcntl to increase the stack pagesize.
+ * extend stack with the proposed new growszc, which is different
+ * than p_stkpageszc only on a memcntl to increase the stack pagesize.
+ * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
+ * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
+ * if not aligned to szc's pgsz.
*/
- crargs.szc = p->p_stkpageszc;
+ if (szc > 0) {
+ caddr_t oldsp = p->p_usrstack - oldsize;
+ caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
+ pgsz);
+
+ if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
+ crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
+ AS_MAP_NO_LPOOB;
+ } else if (oldsp == austk) {
+ crargs.szc = szc;
+ } else {
+ crargs.szc = AS_MAP_STACK;
+ }
+ } else {
+ crargs.szc = AS_MAP_NO_LPOOB;
+ }
crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
- if ((error = as_map(as, p->p_usrstack - newsize, newsize - oldsize,
+ if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
segvn_create, &crargs)) != 0) {
if (error == EAGAIN) {
cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
@@ -481,15 +485,6 @@
return (error);
}
p->p_stksize = newsize;
-
-
- /*
- * Set up translations so the process doesn't have to fault in
- * the stack pages we just gave it.
- */
- (void) as_fault(as->a_hat, as,
- p->p_usrstack - newsize, newsize - oldsize, F_INVAL, S_WRITE);
-
return (0);
}
@@ -500,13 +495,7 @@
zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
offset_t pos)
{
- struct segvn_crargs a, b;
- struct proc *p = curproc;
- int err;
- size_t pgsz;
- size_t l0, l1, l2, l3, l4; /* 0th through 5th chunks */
- caddr_t ruaddr, ruaddr0; /* rounded up addresses */
- extern size_t auto_lpg_va_default;
+ struct segvn_crargs vn_a;
if (((PROT_ALL & uprot) != uprot))
return (EACCES);
@@ -549,130 +538,18 @@
* Use the seg_vn segment driver; passing in the NULL amp
* gives the desired "cloning" effect.
*/
- a.vp = NULL;
- a.offset = 0;
- a.type = flags & MAP_TYPE;
- a.prot = uprot;
- a.maxprot = PROT_ALL;
- a.flags = flags & ~MAP_TYPE;
- a.cred = CRED();
- a.amp = NULL;
- a.szc = 0;
- a.lgrp_mem_policy_flags = 0;
-
- /*
- * Call arch-specific map_pgsz routine to pick best page size to map
- * this segment, and break the mapping up into parts if required.
- *
- * The parts work like this:
- *
- * addr ---------
- * | | l0
- * ---------
- * | | l1
- * ---------
- * | | l2
- * ---------
- * | | l3
- * ---------
- * | | l4
- * ---------
- * addr+len
- *
- * Starting from the middle, l2 is the number of bytes mapped by the
- * selected large page. l1 and l3 are mapped by auto_lpg_va_default
- * page size pages, and l0 and l4 are mapped by base page size pages.
- * If auto_lpg_va_default is the base page size, then l0 == l4 == 0.
- * If the requested address or length are aligned to the selected large
- * page size, l1 or l3 may also be 0.
- */
- if (use_zmap_lpg && a.type == MAP_PRIVATE) {
-
- pgsz = map_pgsz(MAPPGSZ_VA, p, *addrp, len, NULL);
- if (pgsz <= PAGESIZE || len < pgsz) {
- return (as_map(as, *addrp, len, segvn_create, &a));
- }
+ vn_a.vp = NULL;
+ vn_a.offset = 0;
+ vn_a.type = flags & MAP_TYPE;
+ vn_a.prot = uprot;
+ vn_a.maxprot = PROT_ALL;
+ vn_a.flags = flags & ~MAP_TYPE;
+ vn_a.cred = CRED();
+ vn_a.amp = NULL;
+ vn_a.szc = 0;
+ vn_a.lgrp_mem_policy_flags = 0;
- ruaddr = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, pgsz);
- if (auto_lpg_va_default != MMU_PAGESIZE) {
- ruaddr0 = (caddr_t)P2ROUNDUP((uintptr_t)*addrp,
- auto_lpg_va_default);
- l0 = ruaddr0 - *addrp;
- } else {
- l0 = 0;
- ruaddr0 = *addrp;
- }
- l1 = ruaddr - ruaddr0;
- l3 = P2PHASE(len - l0 - l1, pgsz);
- if (auto_lpg_va_default == MMU_PAGESIZE) {
- l4 = 0;
- } else {
- l4 = P2PHASE(l3, auto_lpg_va_default);
- l3 -= l4;
- }
- l2 = len - l0 - l1 - l3 - l4;
-
- if (l0) {
- b = a;
- err = as_map(as, *addrp, l0, segvn_create, &b);
- if (err) {
- return (err);
- }
- }
-
- if (l1) {
- b = a;
- b.szc = page_szc(auto_lpg_va_default);
- err = as_map(as, ruaddr0, l1, segvn_create, &b);
- if (err) {
- goto error1;
- }
- }
-
- if (l2) {
- b = a;
- b.szc = page_szc(pgsz);
- err = as_map(as, ruaddr, l2, segvn_create, &b);
- if (err) {
- goto error2;
- }
- }
-
- if (l3) {
- b = a;
- b.szc = page_szc(auto_lpg_va_default);
- err = as_map(as, ruaddr + l2, l3, segvn_create, &b);
- if (err) {
- goto error3;
- }
- }
- if (l4) {
- err = as_map(as, ruaddr + l2 + l3, l4, segvn_create,
- &a);
- if (err) {
-error3:
- if (l3) {
- (void) as_unmap(as, ruaddr + l2, l3);
- }
-error2:
- if (l2) {
- (void) as_unmap(as, ruaddr, l2);
- }
-error1:
- if (l1) {
- (void) as_unmap(as, ruaddr0, l1);
- }
- if (l0) {
- (void) as_unmap(as, *addrp, l0);
- }
- return (err);
- }
- }
-
- return (0);
- }
-
- return (as_map(as, *addrp, len, segvn_create, &a));
+ return (as_map(as, *addrp, len, segvn_create, &vn_a));
}
static int
--- a/usr/src/uts/common/os/shm.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/os/shm.c Thu Oct 26 16:44:53 2006 -0700
@@ -341,8 +341,7 @@
* [D]ISM segment, then use the previously selected page size.
*/
if (!isspt(sp)) {
- share_size = map_pgsz(MAPPGSZ_ISM,
- pp, addr, size, NULL);
+ share_size = map_pgsz(MAPPGSZ_ISM, pp, addr, size, 0);
if (share_size == 0) {
as_rangeunlock(as);
error = EINVAL;
--- a/usr/src/uts/common/sys/exec.h Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/sys/exec.h Thu Oct 26 16:44:53 2006 -0700
@@ -101,8 +101,6 @@
size_t from_ptrsize;
size_t ncargs;
struct execsw *execswp;
- uint_t stkpageszc;
- uint_t brkpageszc;
uintptr_t entry;
uintptr_t thrptr;
char *emulator;
--- a/usr/src/uts/common/sys/vmsystm.h Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/sys/vmsystm.h Thu Oct 26 16:44:53 2006 -0700
@@ -105,6 +105,14 @@
#define MAPPGSZ_HEAP 0x04
#define MAPPGSZ_ISM 0x08
+/*
+ * Flags for map_pgszcvec
+ */
+#define MAPPGSZC_SHM 0x01
+#define MAPPGSZC_PRIVM 0x02
+#define MAPPGSZC_STACK 0x04
+#define MAPPGSZC_HEAP 0x08
+
struct as;
struct page;
struct anon;
@@ -118,10 +126,10 @@
int dir);
extern int valid_usr_range(caddr_t, size_t, uint_t, struct as *, caddr_t);
extern int useracc(void *, size_t, int);
-extern size_t map_pgsz(int maptype, struct proc *p, caddr_t addr,
- size_t len, int *remap);
-extern uint_t map_execseg_pgszcvec(int, caddr_t, size_t);
-extern uint_t map_shm_pgszcvec(caddr_t, size_t, uintptr_t);
+extern size_t map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len,
+ int memcntl);
+extern uint_t map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags,
+ int type, int memcntl);
extern void map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign,
uint_t flags);
extern int map_addr_vacalign_check(caddr_t, u_offset_t);
--- a/usr/src/uts/common/syscall/memcntl.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/syscall/memcntl.c Thu Oct 26 16:44:53 2006 -0700
@@ -199,31 +199,36 @@
else
type = MAPPGSZ_STK;
- pgsz = map_pgsz(type, p, 0, 0, NULL);
+ pgsz = map_pgsz(type, p, 0, 0, 1);
}
} else {
/*
+ * addr and len must be valid for range specified.
+ */
+ if (valid_usr_range(addr, len, 0, as,
+ as->a_userlimit) != RANGE_OKAY) {
+ return (set_errno(ENOMEM));
+ }
+ /*
* Note that we don't disable automatic large page
* selection for anon segments based on use of
* memcntl().
*/
if (pgsz == 0) {
- pgsz = map_pgsz(MAPPGSZ_VA, p, addr, len,
- NULL);
+ error = as_set_default_lpsize(as, addr, len);
+ if (error) {
+ (void) set_errno(error);
+ }
+ return (error);
}
/*
* addr and len must be prefered page size aligned
- * and valid for range specified.
*/
if (!IS_P2ALIGNED(addr, pgsz) ||
!IS_P2ALIGNED(len, pgsz)) {
return (set_errno(EINVAL));
}
- if (valid_usr_range(addr, len, 0, as,
- as->a_userlimit) != RANGE_OKAY) {
- return (set_errno(ENOMEM));
- }
}
szc = mem_getpgszc(pgsz);
@@ -257,10 +262,17 @@
return (set_errno(error));
}
}
+ /*
+ * It is possible for brk_internal to silently fail to
+ * promote the heap size, so don't panic or ASSERT.
+ */
+ if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) {
+ as_rangeunlock(as);
+ return (set_errno(ENOMEM));
+ }
oszc = p->p_brkpageszc;
p->p_brkpageszc = szc;
- ASSERT(IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz));
addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
pgsz);
len = (p->p_brkbase + p->p_brksize) - addr;
@@ -292,17 +304,24 @@
}
if (szc > p->p_stkpageszc) {
- error = grow_internal(p->p_usrstack
- - p->p_stksize, szc);
+ error = grow_internal(p->p_usrstack -
+ p->p_stksize, szc);
if (error) {
as_rangeunlock(as);
return (set_errno(error));
}
}
+ /*
+ * It is possible for grow_internal to silently fail to
+ * promote the stack size, so don't panic or ASSERT.
+ */
+ if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) {
+ as_rangeunlock(as);
+ return (set_errno(ENOMEM));
+ }
oszc = p->p_stkpageszc;
p->p_stkpageszc = szc;
- ASSERT(IS_P2ALIGNED(p->p_usrstack, pgsz));
addr = p->p_usrstack - p->p_stksize;
len = p->p_stksize;
--- a/usr/src/uts/common/vm/as.h Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/vm/as.h Thu Oct 26 16:44:53 2006 -0700
@@ -153,6 +153,13 @@
(((as)->a_userlimit > (caddr_t)UINT32_MAX) ? 1 : 0)
/*
+ * Flags for as_map/as_map_ansegs
+ */
+#define AS_MAP_NO_LPOOB ((uint_t)-1)
+#define AS_MAP_HEAP ((uint_t)-2)
+#define AS_MAP_STACK ((uint_t)-3)
+
+/*
* The as_callback is the basic structure which supports the ability to
* inform clients of specific events pertaining to address space management.
* A user calls as_add_callback to register an address space callback
@@ -274,6 +281,7 @@
size_t size, enum seg_rw rw);
int as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
boolean_t wait);
+int as_set_default_lpsize(struct as *as, caddr_t addr, size_t size);
void as_setwatch(struct as *as);
void as_clearwatch(struct as *as);
int as_getmemid(struct as *, caddr_t, memid_t *);
--- a/usr/src/uts/common/vm/hat.h Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/vm/hat.h Thu Oct 26 16:44:53 2006 -0700
@@ -345,7 +345,7 @@
* hat layer data structures. This flag forces hat layer
* to tap its reserves in order to prevent infinite
* recursion.
- * HAT_LOAD_AUTOLPG Get MMU specific disable_auto_large_pages
+ * HAT_LOAD_TEXT A flag to hat_memload() to indicate loading text pages.
*/
/*
@@ -362,7 +362,15 @@
#define HAT_RELOAD_SHARE 0x100
#define HAT_NO_KALLOC 0x200
#define HAT_LOAD_TEXT 0x400
-#define HAT_LOAD_AUTOLPG 0x800
+
+/*
+ * Flags for initializing disable_*large_pages.
+ *
+ * HAT_AUTO_TEXT Get MMU specific disable_auto_text_large_pages
+ * HAT_AUTO_DATA Get MMU specific disable_auto_data_large_pages
+ */
+#define HAT_AUTO_TEXT 0x800
+#define HAT_AUTO_DATA 0x1000
/*
* Attributes for hat_memload/hat_devload/hat_*attr
--- a/usr/src/uts/common/vm/seg_vn.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/vm/seg_vn.c Thu Oct 26 16:44:53 2006 -0700
@@ -395,7 +395,7 @@
a->flags &= ~MAP_NORESERVE;
if (a->szc != 0) {
- if (segvn_lpg_disable != 0 ||
+ if (segvn_lpg_disable != 0 || (a->szc == AS_MAP_NO_LPOOB) ||
(a->amp != NULL && a->type == MAP_PRIVATE) ||
(a->flags & MAP_NORESERVE) || seg->s_as == &kas) {
a->szc = 0;
@@ -5270,8 +5270,9 @@
err = segvn_demote_range(seg, addr, len,
SDR_END, 0);
} else {
- uint_t szcvec = map_shm_pgszcvec(seg->s_base,
- pgsz, (uintptr_t)seg->s_base);
+ uint_t szcvec = map_pgszcvec(seg->s_base,
+ pgsz, (uintptr_t)seg->s_base,
+ (svd->flags & MAP_TEXT), MAPPGSZC_SHM, 0);
err = segvn_demote_range(seg, addr, len,
SDR_END, szcvec);
}
@@ -6267,7 +6268,8 @@
ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
- return (svd->type | (svd->flags & MAP_NORESERVE));
+ return (svd->type | (svd->flags & (MAP_NORESERVE | MAP_TEXT |
+ MAP_INITDATA)));
}
/*ARGSUSED*/
--- a/usr/src/uts/common/vm/seg_vn.h Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/vm/seg_vn.h Thu Oct 26 16:44:53 2006 -0700
@@ -137,16 +137,18 @@
#define SEGVN_ZFOD_ARGS(prot, max) \
{ NULL, NULL, 0, MAP_PRIVATE, prot, max, 0, NULL, 0, 0 }
-#define AS_MAP_VNSEGS_USELPGS(crfp, argsp) \
+#define AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp) \
((crfp) == (int (*)())segvn_create && \
(((struct segvn_crargs *)(argsp))->flags & \
(MAP_TEXT | MAP_INITDATA)) && \
- ((struct segvn_crargs *)(argsp))->vp != NULL && \
- ((struct segvn_crargs *)(argsp))->amp == NULL)
+ ((struct segvn_crargs *)(argsp))->szc == 0 && \
+ ((struct segvn_crargs *)(argsp))->vp != NULL)
-#define AS_MAP_SHAMP(crfp, argsp) \
+#define AS_MAP_CHECK_ANON_LPOOB(crfp, argsp) \
((crfp) == (int (*)())segvn_create && \
- ((struct segvn_crargs *)(argsp))->type == MAP_SHARED && \
+ (((struct segvn_crargs *)(argsp))->szc == 0 || \
+ ((struct segvn_crargs *)(argsp))->szc == AS_MAP_HEAP || \
+ ((struct segvn_crargs *)(argsp))->szc == AS_MAP_STACK) && \
((struct segvn_crargs *)(argsp))->vp == NULL)
extern void segvn_init(void);
--- a/usr/src/uts/common/vm/vm_as.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/common/vm/vm_as.c Thu Oct 26 16:44:53 2006 -0700
@@ -1573,8 +1573,10 @@
as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
{
- int text = vn_a->flags & MAP_TEXT;
- uint_t szcvec = map_execseg_pgszcvec(text, addr, size);
+ uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA);
+ int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
+ uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
+ type, 0);
int error;
struct seg *seg;
struct vattr va;
@@ -1616,7 +1618,8 @@
save_size = size;
size = va.va_size - (vn_a->offset & PAGEMASK);
size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t);
- szcvec = map_execseg_pgszcvec(text, addr, size);
+ szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
+ type, 0);
if (szcvec <= 1) {
size = save_size;
goto again;
@@ -1637,14 +1640,32 @@
return (0);
}
+/*
+ * as_map_ansegs: shared or private anonymous memory. Note that the flags
+ * passed to map_pgszvec cannot be MAP_INITDATA, for anon.
+ */
static int
-as_map_sham(struct as *as, caddr_t addr, size_t size,
+as_map_ansegs(struct as *as, caddr_t addr, size_t size,
int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
{
- uint_t szcvec = map_shm_pgszcvec(addr, size,
- vn_a->amp == NULL ? (uintptr_t)addr :
- (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE));
-
+ uint_t szcvec;
+ uchar_t type;
+
+ ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE);
+ if (vn_a->type == MAP_SHARED) {
+ type = MAPPGSZC_SHM;
+ } else if (vn_a->type == MAP_PRIVATE) {
+ if (vn_a->szc == AS_MAP_HEAP) {
+ type = MAPPGSZC_HEAP;
+ } else if (vn_a->szc == AS_MAP_STACK) {
+ type = MAPPGSZC_STACK;
+ } else {
+ type = MAPPGSZC_PRIVM;
+ }
+ }
+ szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ?
+ (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE),
+ (vn_a->flags & MAP_TEXT), type, 0);
ASSERT(AS_WRITE_HELD(as, &as->a_lock));
ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
ASSERT(IS_P2ALIGNED(size, PAGESIZE));
@@ -1669,6 +1690,7 @@
caddr_t raddr; /* rounded down addr */
size_t rsize; /* rounded up size */
int error;
+ int unmap = 0;
struct proc *p = curproc;
raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
@@ -1695,15 +1717,19 @@
return (ENOMEM);
}
- if (AS_MAP_VNSEGS_USELPGS(crfp, argsp) || AS_MAP_SHAMP(crfp, argsp)) {
- int unmap = 0;
- if (AS_MAP_SHAMP(crfp, argsp)) {
- error = as_map_sham(as, raddr, rsize, crfp,
- (struct segvn_crargs *)argsp, &unmap);
- } else {
- error = as_map_vnsegs(as, raddr, rsize, crfp,
- (struct segvn_crargs *)argsp, &unmap);
+ if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) {
+ error = as_map_vnsegs(as, raddr, rsize, crfp,
+ (struct segvn_crargs *)argsp, &unmap);
+ if (error != 0) {
+ AS_LOCK_EXIT(as, &as->a_lock);
+ if (unmap) {
+ (void) as_unmap(as, addr, size);
+ }
+ return (error);
}
+ } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) {
+ error = as_map_ansegs(as, raddr, rsize, crfp,
+ (struct segvn_crargs *)argsp, &unmap);
if (error != 0) {
AS_LOCK_EXIT(as, &as->a_lock);
if (unmap) {
@@ -2741,6 +2767,377 @@
}
/*
+ * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
+ * in its chunk where s_szc is less than the szc we want to set.
+ */
+static int
+as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
+ int *retry)
+{
+ struct seg *seg;
+ size_t ssize;
+ int error;
+
+ seg = as_segat(as, raddr);
+ if (seg == NULL) {
+ panic("as_iset3_default_lpsize: no seg");
+ }
+
+ for (; rsize != 0; rsize -= ssize, raddr += ssize) {
+ if (raddr >= seg->s_base + seg->s_size) {
+ seg = AS_SEGNEXT(as, seg);
+ if (seg == NULL || raddr != seg->s_base) {
+ panic("as_iset3_default_lpsize: as changed");
+ }
+ }
+ if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
+ ssize = seg->s_base + seg->s_size - raddr;
+ } else {
+ ssize = rsize;
+ }
+
+ if (szc > seg->s_szc) {
+ error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
+ /* Only retry on EINVAL segments that have no vnode. */
+ if (error == EINVAL) {
+ vnode_t *vp = NULL;
+ if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) &&
+ (SEGOP_GETVP(seg, raddr, &vp) != 0 ||
+ vp == NULL)) {
+ *retry = 1;
+ } else {
+ *retry = 0;
+ }
+ }
+ if (error) {
+ return (error);
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
+ * pagesize on each segment in its range, but if any fails with EINVAL,
+ * then it reduces the pagesizes to the next size in the bitmap and
+ * retries as_iset3_default_lpsize(). The reason why the code retries
+ * smaller allowed sizes on EINVAL is because (a) the anon offset may not
+ * match the bigger sizes, and (b) it's hard to get this offset (to begin
+ * with) to pass to map_pgszcvec().
+ */
+static int
+as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc,
+ uint_t szcvec)
+{
+ int error;
+ int retry;
+
+ for (;;) {
+ error = as_iset3_default_lpsize(as, addr, size, szc, &retry);
+ if (error == EINVAL && retry) {
+ szcvec &= ~(1 << szc);
+ if (szcvec <= 1) {
+ return (EINVAL);
+ }
+ szc = highbit(szcvec) - 1;
+ } else {
+ return (error);
+ }
+ }
+}
+
+/*
+ * as_iset1_default_lpsize() breaks its chunk into areas where existing
+ * segments have a smaller szc than we want to set. For each such area,
+ * it calls as_iset2_default_lpsize()
+ */
+static int
+as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
+ uint_t szcvec)
+{
+ struct seg *seg;
+ size_t ssize;
+ caddr_t setaddr = raddr;
+ size_t setsize = 0;
+ int set;
+ int error;
+
+ ASSERT(AS_WRITE_HELD(as, &as->a_lock));
+
+ seg = as_segat(as, raddr);
+ if (seg == NULL) {
+ panic("as_iset1_default_lpsize: no seg");
+ }
+ if (seg->s_szc < szc) {
+ set = 1;
+ } else {
+ set = 0;
+ }
+
+ for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
+ if (raddr >= seg->s_base + seg->s_size) {
+ seg = AS_SEGNEXT(as, seg);
+ if (seg == NULL || raddr != seg->s_base) {
+ panic("as_iset1_default_lpsize: as changed");
+ }
+ if (seg->s_szc >= szc && set) {
+ ASSERT(setsize != 0);
+ error = as_iset2_default_lpsize(as,
+ setaddr, setsize, szc, szcvec);
+ if (error) {
+ return (error);
+ }
+ set = 0;
+ } else if (seg->s_szc < szc && !set) {
+ setaddr = raddr;
+ setsize = 0;
+ set = 1;
+ }
+ }
+ if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
+ ssize = seg->s_base + seg->s_size - raddr;
+ } else {
+ ssize = rsize;
+ }
+ }
+ error = 0;
+ if (set) {
+ ASSERT(setsize != 0);
+ error = as_iset2_default_lpsize(as, setaddr, setsize,
+ szc, szcvec);
+ }
+ return (error);
+}
+
+/*
+ * as_iset_default_lpsize() breaks its chunk according to the size code bitmap
+ * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each
+ * chunk to as_iset1_default_lpsize().
+ */
+static int
+as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags,
+ int type)
+{
+ int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
+ uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr,
+ flags, rtype, 1);
+ uint_t szc;
+ uint_t nszc;
+ int error;
+ caddr_t a;
+ caddr_t eaddr;
+ size_t segsize;
+ size_t pgsz;
+ uint_t save_szcvec;
+
+ ASSERT(AS_WRITE_HELD(as, &as->a_lock));
+ ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
+ ASSERT(IS_P2ALIGNED(size, PAGESIZE));
+
+ szcvec &= ~1;
+ if (szcvec <= 1) { /* skip if base page size */
+ return (0);
+ }
+
+ /* Get the pagesize of the first larger page size. */
+ szc = lowbit(szcvec) - 1;
+ pgsz = page_get_pagesize(szc);
+ eaddr = addr + size;
+ addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
+ eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
+
+ save_szcvec = szcvec;
+ szcvec >>= (szc + 1);
+ nszc = szc;
+ while (szcvec) {
+ if ((szcvec & 0x1) == 0) {
+ nszc++;
+ szcvec >>= 1;
+ continue;
+ }
+ nszc++;
+ pgsz = page_get_pagesize(nszc);
+ a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
+ if (a != addr) {
+ ASSERT(szc > 0);
+ ASSERT(a < eaddr);
+ segsize = a - addr;
+ error = as_iset1_default_lpsize(as, addr, segsize, szc,
+ save_szcvec);
+ if (error) {
+ return (error);
+ }
+ addr = a;
+ }
+ szc = nszc;
+ szcvec >>= 1;
+ }
+
+ ASSERT(addr < eaddr);
+ szcvec = save_szcvec;
+ while (szcvec) {
+ a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
+ ASSERT(a >= addr);
+ if (a != addr) {
+ ASSERT(szc > 0);
+ segsize = a - addr;
+ error = as_iset1_default_lpsize(as, addr, segsize, szc,
+ save_szcvec);
+ if (error) {
+ return (error);
+ }
+ addr = a;
+ }
+ szcvec &= ~(1 << szc);
+ if (szcvec) {
+ szc = highbit(szcvec) - 1;
+ pgsz = page_get_pagesize(szc);
+ }
+ }
+ ASSERT(addr == eaddr);
+
+ return (0);
+}
+
+/*
+ * Set the default large page size for the range. Called via memcntl with
+ * page size set to 0. as_set_default_lpsize breaks the range down into
+ * chunks with the same type/flags, ignores-non segvn segments, and passes
+ * each chunk to as_iset_default_lpsize().
+ */
+int
+as_set_default_lpsize(struct as *as, caddr_t addr, size_t size)
+{
+ struct seg *seg;
+ caddr_t raddr;
+ size_t rsize;
+ size_t ssize;
+ int rtype, rflags;
+ int stype, sflags;
+ int error;
+ caddr_t setaddr;
+ size_t setsize;
+ int segvn;
+
+ if (size == 0)
+ return (0);
+
+ AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
+again:
+ error = 0;
+
+ raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
+ rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
+ (size_t)raddr;
+
+ if (raddr + rsize < raddr) { /* check for wraparound */
+ AS_LOCK_EXIT(as, &as->a_lock);
+ return (ENOMEM);
+ }
+ as_clearwatchprot(as, raddr, rsize);
+ seg = as_segat(as, raddr);
+ if (seg == NULL) {
+ as_setwatch(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ return (ENOMEM);
+ }
+ if (seg->s_ops == &segvn_ops) {
+ rtype = SEGOP_GETTYPE(seg, addr);
+ rflags = rtype & (MAP_TEXT | MAP_INITDATA);
+ rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
+ segvn = 1;
+ } else {
+ segvn = 0;
+ }
+ setaddr = raddr;
+ setsize = 0;
+
+ for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
+ if (raddr >= (seg->s_base + seg->s_size)) {
+ seg = AS_SEGNEXT(as, seg);
+ if (seg == NULL || raddr != seg->s_base) {
+ error = ENOMEM;
+ break;
+ }
+ if (seg->s_ops == &segvn_ops) {
+ stype = SEGOP_GETTYPE(seg, raddr);
+ sflags = stype & (MAP_TEXT | MAP_INITDATA);
+ stype &= (MAP_SHARED | MAP_PRIVATE);
+ if (segvn && (rflags != sflags ||
+ rtype != stype)) {
+ /*
+ * The next segment is also segvn but
+ * has different flags and/or type.
+ */
+ ASSERT(setsize != 0);
+ error = as_iset_default_lpsize(as,
+ setaddr, setsize, rflags, rtype);
+ if (error) {
+ break;
+ }
+ rflags = sflags;
+ rtype = stype;
+ setaddr = raddr;
+ setsize = 0;
+ } else if (!segvn) {
+ rflags = sflags;
+ rtype = stype;
+ setaddr = raddr;
+ setsize = 0;
+ segvn = 1;
+ }
+ } else if (segvn) {
+ /* The next segment is not segvn. */
+ ASSERT(setsize != 0);
+ error = as_iset_default_lpsize(as,
+ setaddr, setsize, rflags, rtype);
+ if (error) {
+ break;
+ }
+ segvn = 0;
+ }
+ }
+ if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
+ ssize = seg->s_base + seg->s_size - raddr;
+ } else {
+ ssize = rsize;
+ }
+ }
+ if (error == 0 && segvn) {
+ /* The last chunk when rsize == 0. */
+ ASSERT(setsize != 0);
+ error = as_iset_default_lpsize(as, setaddr, setsize,
+ rflags, rtype);
+ }
+
+ if (error == IE_RETRY) {
+ goto again;
+ } else if (error == IE_NOMEM) {
+ error = EAGAIN;
+ } else if (error == ENOTSUP) {
+ error = EINVAL;
+ } else if (error == EAGAIN) {
+ mutex_enter(&as->a_contents);
+ if (AS_ISUNMAPWAIT(as) == 0) {
+ cv_broadcast(&as->a_cv);
+ }
+ AS_SETUNMAPWAIT(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ while (AS_ISUNMAPWAIT(as)) {
+ cv_wait(&as->a_cv, &as->a_contents);
+ }
+ mutex_exit(&as->a_contents);
+ AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
+ goto again;
+ }
+
+ as_setwatch(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ return (error);
+}
+
+/*
* Setup all of the uninitialized watched pages that we can.
*/
void
--- a/usr/src/uts/i86pc/os/startup.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/i86pc/os/startup.c Thu Oct 26 16:44:53 2006 -0700
@@ -1475,8 +1475,7 @@
extern void hat_kern_setup(void);
pgcnt_t pages_left;
- extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg;
- extern pgcnt_t auto_lpg_min_physmem;
+ extern int use_brk_lpg, use_stk_lpg;
PRM_POINT("startup_vm() starting...");
@@ -1729,11 +1728,21 @@
* disable automatic large pages for small memory systems or
* when the disable flag is set.
*/
- if (physmem < auto_lpg_min_physmem || auto_lpg_disable) {
- exec_lpg_disable = 1;
+ if (!auto_lpg_disable && mmu.max_page_level > 0) {
+ max_uheap_lpsize = LEVEL_SIZE(1);
+ max_ustack_lpsize = LEVEL_SIZE(1);
+ max_privmap_lpsize = LEVEL_SIZE(1);
+ max_uidata_lpsize = LEVEL_SIZE(1);
+ max_utext_lpsize = LEVEL_SIZE(1);
+ max_shm_lpsize = LEVEL_SIZE(1);
+ }
+ if (physmem < privm_lpg_min_physmem || mmu.max_page_level == 0 ||
+ auto_lpg_disable) {
use_brk_lpg = 0;
use_stk_lpg = 0;
- use_zmap_lpg = 0;
+ }
+ if (mmu.max_page_level > 0) {
+ mcntl0_lpsize = LEVEL_SIZE(1);
}
PRM_POINT("Calling hat_init_finish()...");
--- a/usr/src/uts/i86pc/vm/vm_dep.h Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/i86pc/vm/vm_dep.h Thu Oct 26 16:44:53 2006 -0700
@@ -569,6 +569,29 @@
#define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G)
/*
+ * Maximum and default values for user heap, stack, private and shared
+ * anonymous memory, and user text and initialized data.
+ * Used by map_pgsz*() routines.
+ */
+extern size_t max_uheap_lpsize;
+extern size_t default_uheap_lpsize;
+extern size_t max_ustack_lpsize;
+extern size_t default_ustack_lpsize;
+extern size_t max_privmap_lpsize;
+extern size_t max_uidata_lpsize;
+extern size_t max_utext_lpsize;
+extern size_t max_shm_lpsize;
+extern size_t mcntl0_lpsize;
+
+/*
+ * Sanity control. Don't use large pages regardless of user
+ * settings if there's less than priv or shm_lpg_min_physmem memory installed.
+ * The units for this variable are 8K pages.
+ */
+extern pgcnt_t privm_lpg_min_physmem;
+extern pgcnt_t shm_lpg_min_physmem;
+
+/*
* hash as and addr to get a bin.
*/
--- a/usr/src/uts/i86pc/vm/vm_machdep.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c Thu Oct 26 16:44:53 2006 -0700
@@ -55,6 +55,7 @@
#include <sys/exec.h>
#include <sys/exechdr.h>
#include <sys/debug.h>
+#include <sys/vmsystm.h>
#include <vm/hat.h>
#include <vm/as.h>
@@ -122,39 +123,80 @@
/* How many page sizes the users can see */
uint_t mmu_exported_page_sizes;
-size_t auto_lpg_va_default = MMU_PAGESIZE; /* used by zmap() */
/*
* Number of pages in 1 GB. Don't enable automatic large pages if we have
* fewer than this many pages.
*/
-pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT);
+pgcnt_t shm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT);
+pgcnt_t privm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT);
+
+/*
+ * Maximum and default segment size tunables for user private
+ * and shared anon memory, and user text and initialized data.
+ * These can be patched via /etc/system to allow large pages
+ * to be used for mapping application private and shared anon memory.
+ */
+size_t mcntl0_lpsize = MMU_PAGESIZE;
+size_t max_uheap_lpsize = MMU_PAGESIZE;
+size_t default_uheap_lpsize = MMU_PAGESIZE;
+size_t max_ustack_lpsize = MMU_PAGESIZE;
+size_t default_ustack_lpsize = MMU_PAGESIZE;
+size_t max_privmap_lpsize = MMU_PAGESIZE;
+size_t max_uidata_lpsize = MMU_PAGESIZE;
+size_t max_utext_lpsize = MMU_PAGESIZE;
+size_t max_shm_lpsize = MMU_PAGESIZE;
/*
* Return the optimum page size for a given mapping
*/
/*ARGSUSED*/
size_t
-map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap)
+map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl)
{
- level_t l;
+ level_t l = 0;
+ size_t pgsz = MMU_PAGESIZE;
+ size_t max_lpsize;
+ uint_t mszc;
- if (remap)
- *remap = 0;
+ ASSERT(maptype != MAPPGSZ_VA);
+
+ if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) {
+ return (MMU_PAGESIZE);
+ }
switch (maptype) {
-
+ case MAPPGSZ_HEAP:
case MAPPGSZ_STK:
- case MAPPGSZ_HEAP:
- case MAPPGSZ_VA:
+ max_lpsize = memcntl ? mcntl0_lpsize : (maptype ==
+ MAPPGSZ_HEAP ? max_uheap_lpsize : max_ustack_lpsize);
+ if (max_lpsize == MMU_PAGESIZE) {
+ return (MMU_PAGESIZE);
+ }
+ if (len == 0) {
+ len = (maptype == MAPPGSZ_HEAP) ? p->p_brkbase +
+ p->p_brksize - p->p_bssbase : p->p_stksize;
+ }
+ len = (maptype == MAPPGSZ_HEAP) ? MAX(len,
+ default_uheap_lpsize) : MAX(len, default_ustack_lpsize);
+
/*
* use the pages size that best fits len
*/
for (l = mmu.max_page_level; l > 0; --l) {
- if (len < LEVEL_SIZE(l))
+ if (LEVEL_SIZE(l) > max_lpsize || len < LEVEL_SIZE(l)) {
continue;
+ } else {
+ pgsz = LEVEL_SIZE(l);
+ }
break;
}
- return (LEVEL_SIZE(l));
+
+ mszc = (maptype == MAPPGSZ_HEAP ? p->p_brkpageszc :
+ p->p_stkpageszc);
+ if (addr == 0 && (pgsz < hw_page_array[mszc].hp_size)) {
+ pgsz = hw_page_array[mszc].hp_size;
+ }
+ return (pgsz);
/*
* for ISM use the 1st large page size.
@@ -164,65 +206,96 @@
return (MMU_PAGESIZE);
return (LEVEL_SIZE(1));
}
- return (0);
+ return (pgsz);
}
-/*
- * This can be patched via /etc/system to allow large pages
- * to be used for mapping application and libraries text segments.
- */
-int use_text_largepages = 0;
-int use_shm_largepages = 0;
+static uint_t
+map_szcvec(caddr_t addr, size_t size, uintptr_t off, size_t max_lpsize,
+ size_t min_physmem)
+{
+ caddr_t eaddr = addr + size;
+ uint_t szcvec = 0;
+ caddr_t raddr;
+ caddr_t readdr;
+ size_t pgsz;
+ int i;
+
+ if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) {
+ return (0);
+ }
+
+ for (i = mmu_page_sizes - 1; i > 0; i--) {
+ pgsz = page_get_pagesize(i);
+ if (pgsz > max_lpsize) {
+ continue;
+ }
+ raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
+ readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
+ if (raddr < addr || raddr >= readdr) {
+ continue;
+ }
+ if (P2PHASE((uintptr_t)addr ^ off, pgsz)) {
+ continue;
+ }
+ /*
+ * Set szcvec to the remaining page sizes.
+ */
+ szcvec = ((1 << (i + 1)) - 1) & ~1;
+ break;
+ }
+ return (szcvec);
+}
/*
* Return a bit vector of large page size codes that
* can be used to map [addr, addr + len) region.
*/
-
/*ARGSUSED*/
uint_t
-map_execseg_pgszcvec(int text, caddr_t addr, size_t len)
+map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type,
+ int memcntl)
{
- size_t pgsz;
- caddr_t a;
+ size_t max_lpsize = mcntl0_lpsize;
- if (!text || !use_text_largepages ||
- mmu.max_page_level == 0)
+ if (mmu.max_page_level == 0)
return (0);
- pgsz = LEVEL_SIZE(1);
- a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
- if (a < addr || a >= addr + len) {
- return (0);
- }
- len -= (a - addr);
- if (len < pgsz) {
- return (0);
- }
- return (1 << 1);
-}
+ if (flags & MAP_TEXT) {
+ if (!memcntl)
+ max_lpsize = max_utext_lpsize;
+ return (map_szcvec(addr, size, off, max_lpsize,
+ shm_lpg_min_physmem));
+
+ } else if (flags & MAP_INITDATA) {
+ if (!memcntl)
+ max_lpsize = max_uidata_lpsize;
+ return (map_szcvec(addr, size, off, max_lpsize,
+ privm_lpg_min_physmem));
+
+ } else if (type == MAPPGSZC_SHM) {
+ if (!memcntl)
+ max_lpsize = max_shm_lpsize;
+ return (map_szcvec(addr, size, off, max_lpsize,
+ shm_lpg_min_physmem));
-uint_t
-map_shm_pgszcvec(caddr_t addr, size_t len, uintptr_t off)
-{
- size_t pgsz;
- caddr_t a;
-
- if (!use_shm_largepages || mmu.max_page_level == 0) {
- return (0);
- }
+ } else if (type == MAPPGSZC_HEAP) {
+ if (!memcntl)
+ max_lpsize = max_uheap_lpsize;
+ return (map_szcvec(addr, size, off, max_lpsize,
+ privm_lpg_min_physmem));
- pgsz = LEVEL_SIZE(1);
- a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
- if (a < addr || a >= addr + len ||
- P2PHASE((uintptr_t)addr ^ off, pgsz)) {
- return (0);
+ } else if (type == MAPPGSZC_STACK) {
+ if (!memcntl)
+ max_lpsize = max_ustack_lpsize;
+ return (map_szcvec(addr, size, off, max_lpsize,
+ privm_lpg_min_physmem));
+
+ } else {
+ if (!memcntl)
+ max_lpsize = max_privmap_lpsize;
+ return (map_szcvec(addr, size, off, max_lpsize,
+ privm_lpg_min_physmem));
}
- len -= (a - addr);
- if (len < pgsz) {
- return (0);
- }
- return (1 << 1);
}
/*
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c Thu Oct 26 16:44:53 2006 -0700
@@ -139,14 +139,21 @@
#define LARGE_PAGES_OFF 0x1
/*
- * WARNING: 512K pages MUST be disabled for ISM/DISM. If not
- * a process would page fault indefinitely if it tried to
- * access a 512K page.
- */
-int disable_ism_large_pages = (1 << TTE512K);
-int disable_large_pages = 0;
-int disable_auto_large_pages = 0;
-int disable_shm_large_pages = 0;
+ * The disable_large_pages and disable_ism_large_pages variables control
+ * hat_memload_array and the page sizes to be used by ISM and the kernel.
+ *
+ * The disable_auto_data_large_pages and disable_auto_text_large_pages variables
+ * are only used to control which OOB pages to use at upper VM segment creation
+ * time, and are set in hat_init_pagesizes and used in the map_pgsz* routines.
+ * Their values may come from platform or CPU specific code to disable page
+ * sizes that should not be used.
+ *
+ * WARNING: 512K pages are currently not supported for ISM/DISM.
+ */
+uint_t disable_large_pages = 0;
+uint_t disable_ism_large_pages = (1 << TTE512K);
+uint_t disable_auto_data_large_pages = 0;
+uint_t disable_auto_text_large_pages = 0;
/*
* Private sfmmu data structures for hat management
@@ -891,17 +898,12 @@
mmu_exported_page_sizes = 0;
for (i = TTE8K; i < max_mmu_page_sizes; i++) {
- extern int disable_text_largepages;
- extern int disable_initdata_largepages;
szc_2_userszc[i] = (uint_t)-1;
userszc_2_szc[i] = (uint_t)-1;
if ((mmu_exported_pagesize_mask & (1 << i)) == 0) {
disable_large_pages |= (1 << i);
- disable_ism_large_pages |= (1 << i);
- disable_text_largepages |= (1 << i);
- disable_initdata_largepages |= (1 << i);
} else {
szc_2_userszc[i] = mmu_exported_page_sizes;
userszc_2_szc[mmu_exported_page_sizes] = i;
@@ -909,7 +911,9 @@
}
}
- disable_auto_large_pages = disable_large_pages;
+ disable_ism_large_pages |= disable_large_pages;
+ disable_auto_data_large_pages = disable_large_pages;
+ disable_auto_text_large_pages = disable_large_pages;
/*
* Initialize mmu-specific large page sizes.
@@ -918,11 +922,11 @@
disable_large_pages |= mmu_large_pages_disabled(HAT_LOAD);
disable_ism_large_pages |=
mmu_large_pages_disabled(HAT_LOAD_SHARE);
- disable_auto_large_pages |=
- mmu_large_pages_disabled(HAT_LOAD_AUTOLPG);
- }
-
- disable_shm_large_pages = disable_auto_large_pages;
+ disable_auto_data_large_pages |=
+ mmu_large_pages_disabled(HAT_AUTO_DATA);
+ disable_auto_text_large_pages |=
+ mmu_large_pages_disabled(HAT_AUTO_TEXT);
+ }
}
/*
@@ -1993,7 +1997,7 @@
pgcnt_t numpg, npgs;
tte_t tte;
page_t *pp;
- int large_pages_disable;
+ uint_t large_pages_disable;
ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h Thu Oct 26 16:44:53 2006 -0700
@@ -1782,7 +1782,7 @@
#pragma weak mmu_set_ctx_page_sizes
#pragma weak mmu_check_page_sizes
-extern int mmu_large_pages_disabled(uint_t);
+extern uint_t mmu_large_pages_disabled(uint_t);
extern void mmu_set_ctx_page_sizes(sfmmu_t *);
extern void mmu_check_page_sizes(sfmmu_t *, uint64_t *);
@@ -1822,6 +1822,11 @@
extern vmem_t *kmem_tsb_default_arena[];
extern int tsb_lgrp_affinity;
+extern uint_t disable_large_pages;
+extern uint_t disable_ism_large_pages;
+extern uint_t disable_auto_data_large_pages;
+extern uint_t disable_auto_text_large_pages;
+
/* kpm externals */
extern pfn_t sfmmu_kpm_vatopfn(caddr_t);
extern void sfmmu_kpm_patch_tlbm(void);
--- a/usr/src/uts/sun4/os/startup.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4/os/startup.c Thu Oct 26 16:44:53 2006 -0700
@@ -1878,7 +1878,7 @@
pgcnt_t max_phys_segkp;
int mnode;
- extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg;
+ extern int use_brk_lpg, use_stk_lpg;
/*
* get prom's mappings, create hments for them and switch
@@ -1974,12 +1974,12 @@
avmem = (uint64_t)freemem << PAGESHIFT;
cmn_err(CE_CONT, "?avail mem = %lld\n", (unsigned long long)avmem);
- /* For small memory systems disable automatic large pages. */
- if (physmem < auto_lpg_min_physmem) {
- exec_lpg_disable = 1;
+ /*
+ * For small memory systems disable automatic large pages.
+ */
+ if (physmem < privm_lpg_min_physmem) {
use_brk_lpg = 0;
use_stk_lpg = 0;
- use_zmap_lpg = 0;
}
/*
--- a/usr/src/uts/sun4/vm/vm_dep.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4/vm/vm_dep.c Thu Oct 26 16:44:53 2006 -0700
@@ -97,8 +97,6 @@
caddr_t errata57_limit;
#endif
-extern int disable_auto_large_pages; /* used by map_pgsz*() routines */
-
extern void page_relocate_hash(page_t *, page_t *);
/*
@@ -467,89 +465,56 @@
}
}
-#define MAP_PGSZ_COMMON(pgsz, n, upper, lower, len) \
- for ((n) = (upper); (n) > (lower); (n)--) { \
- if (disable_auto_large_pages & (1 << (n))) \
- continue; \
- if (hw_page_array[(n)].hp_size <= (len)) { \
- (pgsz) = hw_page_array[(n)].hp_size; \
- break; \
- } \
+/*
+ * Return non 0 value if the address may cause a VAC alias with KPM mappings.
+ * KPM selects an address such that it's equal offset modulo shm_alignment and
+ * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping.
+ */
+int
+map_addr_vacalign_check(caddr_t addr, u_offset_t off)
+{
+ if (vac) {
+ return (((uintptr_t)addr ^ off) & shm_alignment - 1);
+ } else {
+ return (0);
}
-
-
-/*ARGSUSED*/
-static size_t
-map_pgszva(struct proc *p, caddr_t addr, size_t len)
-{
- size_t pgsz = MMU_PAGESIZE;
- int n, upper;
+}
- /*
- * Select the best fit page size within the constraints of
- * auto_lpg_{min,max}szc.
- *
- * Note that we also take the heap size into account when
- * deciding if we've crossed the threshold at which we should
- * increase the page size. This isn't perfect since the heap
- * may not have reached its full size yet, but it's better than
- * not considering it at all.
- */
- len += p->p_brksize;
- if (ptob(auto_lpg_tlb_threshold) <= len) {
-
- upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc);
-
- /*
- * Use auto_lpg_minszc - 1 as the limit so we never drop
- * below auto_lpg_minszc. We don't have a size code to refer
- * to like we have for bss and stack, so we assume 0.
- * auto_lpg_minszc should always be >= 0. Using
- * auto_lpg_minszc cuts off the loop.
- */
- MAP_PGSZ_COMMON(pgsz, n, upper, auto_lpg_minszc - 1, len);
- }
-
- return (pgsz);
-}
+/*
+ * Sanity control. Don't use large pages regardless of user
+ * settings if there's less than priv or shm_lpg_min_physmem memory installed.
+ * The units for this variable is 8K pages.
+ */
+pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */
+pgcnt_t privm_lpg_min_physmem = 131072; /* 1GB */
static size_t
map_pgszheap(struct proc *p, caddr_t addr, size_t len)
{
- size_t pgsz;
- int n, upper, lower;
+ size_t pgsz = MMU_PAGESIZE;
+ int szc;
/*
* If len is zero, retrieve from proc and don't demote the page size.
+ * Use atleast the default pagesize.
*/
if (len == 0) {
- len = p->p_brksize;
+ len = p->p_brkbase + p->p_brksize - p->p_bssbase;
+ }
+ len = MAX(len, default_uheap_lpsize);
+
+ for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
+ pgsz = hw_page_array[szc].hp_size;
+ if ((disable_auto_data_large_pages & (1 << szc)) ||
+ pgsz > max_uheap_lpsize)
+ continue;
+ if (len >= pgsz) {
+ break;
+ }
}
/*
- * Still zero? Then we don't have a heap yet, so pick the default
- * heap size.
- */
- if (len == 0) {
- pgsz = auto_lpg_heap_default;
- } else {
- pgsz = hw_page_array[p->p_brkpageszc].hp_size;
- }
-
- if ((pgsz * auto_lpg_tlb_threshold) <= len) {
- /*
- * We're past the threshold, so select the best fit
- * page size within the constraints of
- * auto_lpg_{min,max}szc and the minimum required
- * alignment.
- */
- upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc);
- lower = MAX(auto_lpg_minszc - 1, p->p_brkpageszc);
- MAP_PGSZ_COMMON(pgsz, n, upper, lower, len);
- }
-
- /*
- * If addr == 0 we were called by memcntl() or exec_args() when the
+ * If addr == 0 we were called by memcntl() when the
* size code is 0. Don't set pgsz less than current size.
*/
if (addr == 0 && (pgsz < hw_page_array[p->p_brkpageszc].hp_size)) {
@@ -562,36 +527,26 @@
static size_t
map_pgszstk(struct proc *p, caddr_t addr, size_t len)
{
- size_t pgsz;
- int n, upper, lower;
+ size_t pgsz = MMU_PAGESIZE;
+ int szc;
/*
* If len is zero, retrieve from proc and don't demote the page size.
+ * Use atleast the default pagesize.
*/
if (len == 0) {
len = p->p_stksize;
}
-
- /*
- * Still zero? Then we don't have a heap yet, so pick the default
- * stack size.
- */
- if (len == 0) {
- pgsz = auto_lpg_stack_default;
- } else {
- pgsz = hw_page_array[p->p_stkpageszc].hp_size;
- }
+ len = MAX(len, default_ustack_lpsize);
- if ((pgsz * auto_lpg_tlb_threshold) <= len) {
- /*
- * We're past the threshold, so select the best fit
- * page size within the constraints of
- * auto_lpg_{min,max}szc and the minimum required
- * alignment.
- */
- upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc);
- lower = MAX(auto_lpg_minszc - 1, p->p_brkpageszc);
- MAP_PGSZ_COMMON(pgsz, n, upper, lower, len);
+ for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
+ pgsz = hw_page_array[szc].hp_size;
+ if ((disable_auto_data_large_pages & (1 << szc)) ||
+ pgsz > max_ustack_lpsize)
+ continue;
+ if (len >= pgsz) {
+ break;
+ }
}
/*
@@ -610,7 +565,6 @@
{
uint_t szc;
size_t pgsz;
- extern int disable_ism_large_pages;
for (szc = mmu_page_sizes - 1; szc >= TTE4M; szc--) {
if (disable_ism_large_pages & (1 << szc))
@@ -620,234 +574,69 @@
if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
return (pgsz);
}
+
return (DEFAULT_ISM_PAGESIZE);
}
/*
* Suggest a page size to be used to map a segment of type maptype and length
* len. Returns a page size (not a size code).
- * If remap is non-NULL, fill in a value suggesting whether or not to remap
- * this segment.
*/
+/* ARGSUSED */
size_t
-map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap)
+map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl)
{
- size_t pgsz = 0;
+ size_t pgsz = MMU_PAGESIZE;
+
+ ASSERT(maptype != MAPPGSZ_VA);
- if (remap != NULL)
- *remap = (len > auto_lpg_remap_threshold);
+ if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) {
+ return (MMU_PAGESIZE);
+ }
switch (maptype) {
case MAPPGSZ_ISM:
pgsz = map_pgszism(addr, len);
break;
- case MAPPGSZ_VA:
- pgsz = map_pgszva(p, addr, len);
- break;
-
case MAPPGSZ_STK:
- pgsz = map_pgszstk(p, addr, len);
+ if (max_ustack_lpsize > MMU_PAGESIZE) {
+ pgsz = map_pgszstk(p, addr, len);
+ }
break;
case MAPPGSZ_HEAP:
- pgsz = map_pgszheap(p, addr, len);
+ if (max_uheap_lpsize > MMU_PAGESIZE) {
+ pgsz = map_pgszheap(p, addr, len);
+ }
break;
}
return (pgsz);
}
-/*
- * Return non 0 value if the address may cause a VAC alias with KPM mappings.
- * KPM selects an address such that it's equal offset modulo shm_alignment and
- * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping.
- */
-int
-map_addr_vacalign_check(caddr_t addr, u_offset_t off)
-{
- if (vac) {
- return (((uintptr_t)addr ^ off) & shm_alignment - 1);
- } else {
- return (0);
- }
-}
-
-/*
- * use_text_pgsz64k, use_initdata_pgsz64k and use_text_pgsz4m
- * can be set in platform or CPU specific code but user can change the
- * default values via /etc/system.
- *
- * Initial values are defined in architecture specific mach_vm_dep.c file.
- */
-extern int use_text_pgsz64k;
-extern int use_text_pgsz4m;
-extern int use_initdata_pgsz64k;
-
-/*
- * disable_text_largepages and disable_initdata_largepages bitmaks are set in
- * platform or CPU specific code to disable page sizes that should not be
- * used. These variables normally shouldn't be changed via /etc/system. A
- * particular page size for text or inititialized data will be used by default
- * if both one of use_* variables is set to 1 AND this page size is not
- * disabled in the corresponding disable_* bitmask variable.
- *
- * Initial values are defined in architecture specific mach_vm_dep.c file.
- */
-extern int disable_text_largepages;
-extern int disable_initdata_largepages;
-
-/*
- * Minimum segment size tunables before 64K or 4M large pages
- * should be used to map it.
- *
- * Initial values are defined in architecture specific mach_vm_dep.c file.
- */
-extern size_t text_pgsz64k_minsize;
-extern size_t text_pgsz4m_minsize;
-extern size_t initdata_pgsz64k_minsize;
-
-/*
- * Sanity control. Don't use large pages regardless of user
- * settings if there's less than execseg_lpg_min_physmem memory installed.
- * The units for this variable is 8K pages.
- */
-pgcnt_t execseg_lpg_min_physmem = 131072; /* 1GB */
-
-extern int disable_shm_large_pages;
-pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */
-extern size_t max_shm_lpsize;
-
/* assumes TTE8K...TTE4M == szc */
static uint_t
-map_text_pgsz4m(caddr_t addr, size_t len)
-{
- caddr_t a;
-
- if (len < text_pgsz4m_minsize) {
- return (0);
- }
-
- a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE4M, uintptr_t);
- if (a < addr || a >= addr + len) {
- return (0);
- }
- len -= (a - addr);
- if (len < MMU_PAGESIZE4M) {
- return (0);
- }
-
- return (1 << TTE4M);
-}
-
-static uint_t
-map_text_pgsz64k(caddr_t addr, size_t len)
-{
- caddr_t a;
- size_t svlen = len;
-
- if (len < text_pgsz64k_minsize) {
- return (0);
- }
-
- a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE64K, uintptr_t);
- if (a < addr || a >= addr + len) {
- return (0);
- }
- len -= (a - addr);
- if (len < MMU_PAGESIZE64K) {
- return (0);
- }
- if (!use_text_pgsz4m ||
- disable_text_largepages & (1 << TTE4M)) {
- return (1 << TTE64K);
- }
- if (svlen < text_pgsz4m_minsize) {
- return (1 << TTE64K);
- }
- addr = a;
- a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE4M, uintptr_t);
- if (a < addr || a >= addr + len) {
- return (1 << TTE64K);
- }
- len -= (a - addr);
- if (len < MMU_PAGESIZE4M) {
- return (1 << TTE64K);
- }
- return ((1 << TTE4M) | (1 << TTE64K));
-}
-
-static uint_t
-map_initdata_pgsz64k(caddr_t addr, size_t len)
-{
- caddr_t a;
-
- if (len < initdata_pgsz64k_minsize) {
- return (0);
- }
-
- a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE64K, uintptr_t);
- if (a < addr || a >= addr + len) {
- return (0);
- }
- len -= (a - addr);
- if (len < MMU_PAGESIZE64K) {
- return (0);
- }
- return (1 << TTE64K);
-}
-
-/*
- * Return a bit vector of large page size codes that
- * can be used to map [addr, addr + len) region.
- */
-uint_t
-map_execseg_pgszcvec(int text, caddr_t addr, size_t len)
-{
- uint_t ret = 0;
-
- if (physmem < execseg_lpg_min_physmem) {
- return (0);
- }
-
- if (text) {
- if (use_text_pgsz64k &&
- !(disable_text_largepages & (1 << TTE64K))) {
- ret = map_text_pgsz64k(addr, len);
- } else if (use_text_pgsz4m &&
- !(disable_text_largepages & (1 << TTE4M))) {
- ret = map_text_pgsz4m(addr, len);
- }
- } else if (use_initdata_pgsz64k &&
- !(disable_initdata_largepages & (1 << TTE64K))) {
- ret = map_initdata_pgsz64k(addr, len);
- }
-
- return (ret);
-}
-
-uint_t
-map_shm_pgszcvec(caddr_t addr, size_t size, uintptr_t off)
+map_szcvec(caddr_t addr, size_t size, uintptr_t off, int disable_lpgs,
+ size_t max_lpsize, size_t min_physmem)
{
caddr_t eaddr = addr + size;
uint_t szcvec = 0;
- int i;
caddr_t raddr;
caddr_t readdr;
size_t pgsz;
+ int i;
- if (physmem < shm_lpg_min_physmem || mmu_page_sizes <= 1 ||
- max_shm_lpsize <= MMU_PAGESIZE) {
+ if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) {
return (0);
}
-
for (i = mmu_page_sizes - 1; i > 0; i--) {
- if (disable_shm_large_pages & (1 << i)) {
+ if (disable_lpgs & (1 << i)) {
continue;
}
pgsz = page_get_pagesize(i);
- if (pgsz > max_shm_lpsize) {
+ if (pgsz > max_lpsize) {
continue;
}
raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
@@ -862,7 +651,7 @@
/*
* And or in the remaining enabled page sizes.
*/
- szcvec |= P2PHASE(~disable_shm_large_pages, (1 << i));
+ szcvec |= P2PHASE(~disable_lpgs, (1 << i));
szcvec &= ~1; /* no need to return 8K pagesize */
break;
}
@@ -870,6 +659,41 @@
}
/*
+ * Return a bit vector of large page size codes that
+ * can be used to map [addr, addr + len) region.
+ */
+/* ARGSUSED */
+uint_t
+map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type,
+ int memcntl)
+{
+ if (flags & MAP_TEXT) {
+ return (map_szcvec(addr, size, off, disable_auto_text_large_pages,
+ max_utext_lpsize, shm_lpg_min_physmem));
+
+ } else if (flags & MAP_INITDATA) {
+ return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
+ max_uidata_lpsize, privm_lpg_min_physmem));
+
+ } else if (type == MAPPGSZC_SHM) {
+ return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
+ max_shm_lpsize, shm_lpg_min_physmem));
+
+ } else if (type == MAPPGSZC_HEAP) {
+ return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
+ max_uheap_lpsize, privm_lpg_min_physmem));
+
+ } else if (type == MAPPGSZC_STACK) {
+ return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
+ max_ustack_lpsize, privm_lpg_min_physmem));
+
+ } else {
+ return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
+ max_privmap_lpsize, privm_lpg_min_physmem));
+ }
+}
+
+/*
* Anchored in the table below are counters used to keep track
* of free contiguous physical memory. Each element of the table contains
* the array of counters, the size of array which is allocated during
@@ -1240,7 +1064,6 @@
size_t memtotal = physmem * PAGESIZE;
size_t mmusz;
uint_t szc;
- extern int disable_large_pages;
if (memtotal < segkmem_lpminphysmem)
return (PAGESIZE);
--- a/usr/src/uts/sun4/vm/vm_dep.h Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4/vm/vm_dep.h Thu Oct 26 16:44:53 2006 -0700
@@ -423,18 +423,33 @@
extern int vac_shift;
/*
- * Auto large page selection support variables. Some CPU
- * implementations may differ from the defaults and will need
- * to change these.
+ * Maximum and default values for user heap, stack, private and shared
+ * anonymous memory, and user text and initialized data.
+ *
+ * Initial values are defined in architecture specific mach_vm_dep.c file.
+ * Used by map_pgsz*() routines.
*/
-extern int auto_lpg_tlb_threshold;
-extern int auto_lpg_minszc;
-extern int auto_lpg_maxszc;
-extern size_t auto_lpg_heap_default;
-extern size_t auto_lpg_stack_default;
-extern size_t auto_lpg_va_default;
-extern size_t auto_lpg_remap_threshold;
-extern pgcnt_t auto_lpg_min_physmem;
+extern size_t max_uheap_lpsize;
+extern size_t default_uheap_lpsize;
+extern size_t max_ustack_lpsize;
+extern size_t default_ustack_lpsize;
+extern size_t max_privmap_lpsize;
+extern size_t max_uidata_lpsize;
+extern size_t max_utext_lpsize;
+extern size_t max_shm_lpsize;
+
+/*
+ * For adjusting the default lpsize, for DTLB-limited page sizes.
+ */
+extern void adjust_data_maxlpsize(size_t ismpagesize);
+
+/*
+ * Sanity control. Don't use large pages regardless of user
+ * settings if there's less than priv or shm_lpg_min_physmem memory installed.
+ * The units for this variable are 8K pages.
+ */
+extern pgcnt_t privm_lpg_min_physmem;
+extern pgcnt_t shm_lpg_min_physmem;
/*
* AS_2_BIN macro controls the page coloring policy.
--- a/usr/src/uts/sun4u/cpu/opl_olympus.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4u/cpu/opl_olympus.c Thu Oct 26 16:44:53 2006 -0700
@@ -494,8 +494,6 @@
extern int at_flags;
extern int disable_delay_tlb_flush, delay_tlb_flush;
extern int cpc_has_overflow_intr;
- extern int disable_text_largepages;
- extern int use_text_pgsz4m;
uint64_t cpu0_log;
extern uint64_t opl_cpu0_err_log;
@@ -590,16 +588,6 @@
* fpRAS.
*/
fpras_implemented = 0;
-
- /*
- * Enable 4M pages to be used for mapping user text by default. Don't
- * use large pages for initialized data segments since we may not know
- * at exec() time what should be the preferred large page size for DTLB
- * programming.
- */
- use_text_pgsz4m = 1;
- disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
- (1 << TTE32M) | (1 << TTE256M);
}
/*
@@ -700,11 +688,14 @@
*
*/
int init_mmu_page_sizes = 0;
-static int mmu_disable_ism_large_pages = ((1 << TTE64K) |
+
+static uint_t mmu_disable_large_pages = 0;
+static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
-static int mmu_disable_auto_large_pages = ((1 << TTE64K) |
+static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
-static int mmu_disable_large_pages = 0;
+static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) |
+ (1 << TTE512K));
/*
* Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support.
@@ -721,7 +712,6 @@
mmu_page_sizes = MMU_PAGE_SIZES;
mmu_hashcnt = MAX_HASHCNT;
mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE;
- auto_lpg_maxszc = TTE4M;
mmu_exported_pagesize_mask = (1 << TTE8K) |
(1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) |
(1 << TTE32M) | (1 << TTE256M);
@@ -747,19 +737,30 @@
/*
* The function returns the mmu-specific values for the
* hat's disable_large_pages, disable_ism_large_pages, and
- * disable_auto_large_pages variables.
+ * disable_auto_data_large_pages and
+ * disable_text_data_large_pages variables.
*/
-int
+uint_t
mmu_large_pages_disabled(uint_t flag)
{
- int pages_disable = 0;
+ uint_t pages_disable = 0;
+ extern int use_text_pgsz64K;
+ extern int use_text_pgsz512K;
if (flag == HAT_LOAD) {
pages_disable = mmu_disable_large_pages;
} else if (flag == HAT_LOAD_SHARE) {
pages_disable = mmu_disable_ism_large_pages;
- } else if (flag == HAT_LOAD_AUTOLPG) {
- pages_disable = mmu_disable_auto_large_pages;
+ } else if (flag == HAT_AUTO_DATA) {
+ pages_disable = mmu_disable_auto_data_large_pages;
+ } else if (flag == HAT_AUTO_TEXT) {
+ pages_disable = mmu_disable_auto_text_large_pages;
+ if (use_text_pgsz512K) {
+ pages_disable &= ~(1 << TTE512K);
+ }
+ if (use_text_pgsz64K) {
+ pages_disable &= ~(1 << TTE64K);
+ }
}
return (pages_disable);
}
@@ -779,23 +780,22 @@
case MMU_PAGESIZE4M:
mmu_disable_ism_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
- mmu_disable_auto_large_pages = ((1 << TTE64K) |
+ mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
- auto_lpg_maxszc = TTE4M;
break;
case MMU_PAGESIZE32M:
mmu_disable_ism_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE256M));
- mmu_disable_auto_large_pages = ((1 << TTE64K) |
+ mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
- auto_lpg_maxszc = TTE32M;
+ adjust_data_maxlpsize(ism_pagesize);
break;
case MMU_PAGESIZE256M:
mmu_disable_ism_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M));
- mmu_disable_auto_large_pages = ((1 << TTE64K) |
+ mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
- auto_lpg_maxszc = TTE256M;
+ adjust_data_maxlpsize(ism_pagesize);
break;
default:
cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
--- a/usr/src/uts/sun4u/cpu/spitfire.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4u/cpu/spitfire.c Thu Oct 26 16:44:53 2006 -0700
@@ -34,6 +34,7 @@
#include <sys/elf_SPARC.h>
#include <vm/hat_sfmmu.h>
#include <vm/page.h>
+#include <vm/vm_dep.h>
#include <sys/cpuvar.h>
#include <sys/spitregs.h>
#include <sys/async.h>
@@ -431,9 +432,6 @@
#if defined(SF_ERRATA_57)
extern caddr_t errata57_limit;
#endif
- extern int disable_text_largepages;
- extern int disable_initdata_largepages;
-
cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
@@ -514,14 +512,10 @@
#endif
/*
- * Allow only 8K, 64K and 4M pages for text by default.
- * Allow only 8K and 64K page for initialized data segments by
- * default.
+ * Disable text by default.
+ * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
*/
- disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) |
- (1 << TTE256M);
- disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) |
- (1 << TTE32M) | (1 << TTE256M);
+ max_utext_lpsize = MMU_PAGESIZE;
}
static int
@@ -4490,27 +4484,6 @@
{
}
-static int mmu_disable_ism_large_pages = ((1 << TTE512K) |
- (1 << TTE32M) | (1 << TTE256M));
-static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
-
-/*
- * The function returns the US_II mmu-specific values for the
- * hat's disable_large_pages and disable_ism_large_pages variables.
- */
-int
-mmu_large_pages_disabled(uint_t flag)
-{
- int pages_disable = 0;
-
- if (flag == HAT_LOAD) {
- pages_disable = mmu_disable_large_pages;
- } else if (flag == HAT_LOAD_SHARE) {
- pages_disable = mmu_disable_ism_large_pages;
- }
- return (pages_disable);
-}
-
/*ARGSUSED*/
void
mmu_init_kernel_pgsz(struct hat *hat)
--- a/usr/src/uts/sun4u/cpu/us3_cheetah.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4u/cpu/us3_cheetah.c Thu Oct 26 16:44:53 2006 -0700
@@ -69,6 +69,11 @@
#endif /* CHEETAHPLUS_ERRATUM_25 */
/*
+ * Note that 'Cheetah PRM' refers to:
+ * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
+ */
+
+/*
* Setup trap handlers.
*/
void
@@ -122,10 +127,6 @@
"ecache-associativity", &ecache_associativity, CH_ECACHE_NWAY
};
- extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg;
- extern size_t max_shm_lpsize;
-
-
for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++)
*prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval);
@@ -143,11 +144,12 @@
/*
* Cheetah's large page support has problems with large numbers of
* large pages, so just disable large pages out-of-the-box.
+ * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
*/
- exec_lpg_disable = 1;
- use_brk_lpg = 0;
- use_stk_lpg = 0;
- use_zmap_lpg = 0;
+ max_uheap_lpsize = MMU_PAGESIZE;
+ max_ustack_lpsize = MMU_PAGESIZE;
+ max_privmap_lpsize = MMU_PAGESIZE;
+ max_utext_lpsize = MMU_PAGESIZE;
max_shm_lpsize = MMU_PAGESIZE;
}
--- a/usr/src/uts/sun4u/cpu/us3_common.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4u/cpu/us3_common.c Thu Oct 26 16:44:53 2006 -0700
@@ -475,8 +475,6 @@
extern int at_flags;
extern int disable_delay_tlb_flush, delay_tlb_flush;
extern int cpc_has_overflow_intr;
- extern int disable_text_largepages;
- extern int use_text_pgsz4m;
/*
* Setup chip-specific trap handlers.
@@ -574,16 +572,6 @@
fpras_implemented = 1;
/*
- * Enable 4M pages to be used for mapping user text by default. Don't
- * use large pages for initialized data segments since we may not know
- * at exec() time what should be the preferred large page size for DTLB
- * programming.
- */
- use_text_pgsz4m = 1;
- disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
- (1 << TTE32M) | (1 << TTE256M);
-
- /*
* Setup CE lookup table
*/
CE_INITDISPTBL_POPULATE(ce_disp_table);
--- a/usr/src/uts/sun4u/cpu/us3_common_mmu.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4u/cpu/us3_common_mmu.c Thu Oct 26 16:44:53 2006 -0700
@@ -42,60 +42,58 @@
#include <sys/panic.h>
/*
- * Note that 'Cheetah PRM' refers to:
- * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
- */
-
-/*
* pan_disable_ism_large_pages and pan_disable_large_pages are the Panther-
* specific versions of disable_ism_large_pages and disable_large_pages,
* and feed back into those two hat variables at hat initialization time,
* for Panther-only systems.
*
- * chpjag_disable_ism_large_pages is the Ch/Jaguar-specific version of
- * disable_ism_large_pages. Ditto for chjag_disable_large_pages.
+ * chpjag_disable_large_pages is the Ch/Jaguar-specific version of
+ * disable_large_pages. Ditto for pan_disable_large_pages.
+ * Note that the Panther and Ch/Jaguar ITLB do not support 32M/256M pages.
*/
static int panther_only = 0;
-static int pan_disable_ism_large_pages = ((1 << TTE64K) |
- (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
-static int pan_disable_large_pages = (1 << TTE256M);
-static int pan_disable_auto_large_pages = ((1 << TTE64K) |
+static uint_t pan_disable_large_pages = (1 << TTE256M);
+static uint_t chjag_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
+
+static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
-
-static int chjag_disable_ism_large_pages = ((1 << TTE64K) |
+static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
-static int chjag_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
-static int chjag_disable_auto_large_pages = ((1 << TTE64K) |
+static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
/*
- * The function returns the USIII-IV mmu-specific values for the
+ * The function returns the USIII+(i)-IV+ mmu-specific values for the
* hat's disable_large_pages and disable_ism_large_pages variables.
* Currently the hat's disable_large_pages and disable_ism_large_pages
* already contain the generic sparc 4 page size info, and the return
* values are or'd with those values.
*/
-int
+uint_t
mmu_large_pages_disabled(uint_t flag)
{
- int pages_disable = 0;
+ uint_t pages_disable = 0;
+ extern int use_text_pgsz64K;
+ extern int use_text_pgsz512K;
- if (panther_only) {
- if (flag == HAT_LOAD) {
+ if (flag == HAT_LOAD) {
+ if (panther_only) {
pages_disable = pan_disable_large_pages;
- } else if (flag == HAT_LOAD_SHARE) {
- pages_disable = pan_disable_ism_large_pages;
- } else if (flag == HAT_LOAD_AUTOLPG) {
- pages_disable = pan_disable_auto_large_pages;
+ } else {
+ pages_disable = chjag_disable_large_pages;
}
- } else {
- if (flag == HAT_LOAD) {
- pages_disable = chjag_disable_large_pages;
- } else if (flag == HAT_LOAD_SHARE) {
- pages_disable = chjag_disable_ism_large_pages;
- } else if (flag == HAT_LOAD_AUTOLPG) {
- pages_disable = chjag_disable_auto_large_pages;
+ } else if (flag == HAT_LOAD_SHARE) {
+ pages_disable = mmu_disable_ism_large_pages;
+ } else if (flag == HAT_AUTO_DATA) {
+ pages_disable = mmu_disable_auto_data_large_pages;
+ } else if (flag == HAT_AUTO_TEXT) {
+ pages_disable = mmu_disable_auto_text_large_pages;
+ if (use_text_pgsz512K) {
+ pages_disable &= ~(1 << TTE512K);
+ }
+ if (use_text_pgsz64K) {
+ pages_disable &= ~(1 << TTE64K);
}
}
return (pages_disable);
@@ -141,7 +139,7 @@
* since it would be bad form to panic due
* to a user typo.
*
- * The function re-initializes the pan_disable_ism_large_pages and
+ * The function re-initializes the disable_ism_large_pages and
* pan_disable_large_pages variables, which are closely related.
* Aka, if 32M is the desired [D]ISM page sizes, then 256M cannot be allowed
* for non-ISM large page usage, or DTLB conflict will occur. Please see the
@@ -151,37 +149,37 @@
mmu_init_large_pages(size_t ism_pagesize)
{
if (cpu_impl_dual_pgsz == 0) { /* disable_dual_pgsz flag */
- pan_disable_ism_large_pages = ((1 << TTE64K) |
+ pan_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
+ mmu_disable_ism_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
- pan_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
- auto_lpg_maxszc = TTE4M;
+ mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
+ (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
return;
}
switch (ism_pagesize) {
case MMU_PAGESIZE4M:
- pan_disable_ism_large_pages = ((1 << TTE64K) |
+ pan_disable_large_pages = (1 << TTE256M);
+ mmu_disable_ism_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
- pan_disable_large_pages = (1 << TTE256M);
- pan_disable_auto_large_pages = ((1 << TTE64K) |
+ mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
- auto_lpg_maxszc = TTE4M;
break;
case MMU_PAGESIZE32M:
- pan_disable_ism_large_pages = ((1 << TTE64K) |
+ pan_disable_large_pages = (1 << TTE256M);
+ mmu_disable_ism_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE256M));
- pan_disable_large_pages = (1 << TTE256M);
- pan_disable_auto_large_pages = ((1 << TTE64K) |
+ mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
- auto_lpg_maxszc = TTE32M;
+ adjust_data_maxlpsize(ism_pagesize);
break;
case MMU_PAGESIZE256M:
- pan_disable_ism_large_pages = ((1 << TTE64K) |
+ pan_disable_large_pages = (1 << TTE32M);
+ mmu_disable_ism_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE32M));
- pan_disable_large_pages = (1 << TTE32M);
- pan_disable_auto_large_pages = ((1 << TTE64K) |
+ mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
(1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
- auto_lpg_maxszc = TTE256M;
+ adjust_data_maxlpsize(ism_pagesize);
break;
default:
cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
@@ -211,7 +209,6 @@
(1 << TTE32M) | (1 << TTE256M);
panther_dtlb_restrictions = 1;
panther_only = 1;
- auto_lpg_maxszc = TTE4M;
} else if (npanther > 0) {
panther_dtlb_restrictions = 1;
}
--- a/usr/src/uts/sun4u/vm/mach_vm_dep.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4u/vm/mach_vm_dep.c Thu Oct 26 16:44:53 2006 -0700
@@ -95,55 +95,42 @@
};
/*
- * use_text_pgsz64k, use_initdata_pgsz64k and use_text_pgsz4m
- * can be set in platform or CPU specific code but user can change the
- * default values via /etc/system.
+ * use_text_pgsz64k and use_text_pgsz512k allow the user to turn on these
+ * additional text page sizes for USIII-IV+ and OPL by changing the default
+ * values via /etc/system.
*/
-
-int use_text_pgsz64k = 0;
-int use_text_pgsz4m = 0;
-int use_initdata_pgsz64k = 0;
-
-/*
- * disable_text_largepages and disable_initdata_largepages bitmaks are set in
- * platform or CPU specific code to disable page sizes that should not be
- * used. These variables normally shouldn't be changed via /etc/system. A
- * particular page size for text or inititialized data will be used by default
- * if both one of use_* variables is set to 1 AND this page size is not
- * disabled in the corresponding disable_* bitmask variable.
- */
-
-int disable_text_largepages = (1 << TTE4M) | (1 << TTE64K);
-int disable_initdata_largepages = (1 << TTE64K);
+int use_text_pgsz64K = 0;
+int use_text_pgsz512K = 0;
/*
- * Minimum segment size tunables before 64K or 4M large pages
- * should be used to map it.
+ * Maximum and default segment size tunables for user heap, stack, private
+ * and shared anonymous memory, and user text and initialized data.
*/
-size_t text_pgsz64k_minsize = MMU_PAGESIZE64K;
-size_t text_pgsz4m_minsize = MMU_PAGESIZE4M;
-size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K;
-
-size_t max_shm_lpsize = ULONG_MAX;
+size_t max_uheap_lpsize = MMU_PAGESIZE4M;
+size_t default_uheap_lpsize = MMU_PAGESIZE;
+size_t max_ustack_lpsize = MMU_PAGESIZE4M;
+size_t default_ustack_lpsize = MMU_PAGESIZE;
+size_t max_privmap_lpsize = MMU_PAGESIZE4M;
+size_t max_uidata_lpsize = MMU_PAGESIZE;
+size_t max_utext_lpsize = MMU_PAGESIZE4M;
+size_t max_shm_lpsize = MMU_PAGESIZE4M;
-/*
- * Platforms with smaller or larger TLBs may wish to change this. Most
- * sun4u platforms can hold 1024 8K entries by default and most processes
- * are observed to be < 6MB on these machines, so we decide to move up
- * here to give ourselves some wiggle room for other, smaller segments.
- */
-int auto_lpg_tlb_threshold = 768;
-int auto_lpg_minszc = TTE4M;
-int auto_lpg_maxszc = TTE4M;
-size_t auto_lpg_heap_default = MMU_PAGESIZE;
-size_t auto_lpg_stack_default = MMU_PAGESIZE;
-size_t auto_lpg_va_default = MMU_PAGESIZE;
-size_t auto_lpg_remap_threshold = 0;
-/*
- * Number of pages in 1 GB. Don't enable automatic large pages if we have
- * fewer than this many pages.
- */
-pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT);
+void
+adjust_data_maxlpsize(size_t ismpagesize)
+{
+ if (max_uheap_lpsize == MMU_PAGESIZE4M) {
+ max_uheap_lpsize = ismpagesize;
+ }
+ if (max_ustack_lpsize == MMU_PAGESIZE4M) {
+ max_ustack_lpsize = ismpagesize;
+ }
+ if (max_privmap_lpsize == MMU_PAGESIZE4M) {
+ max_privmap_lpsize = ismpagesize;
+ }
+ if (max_shm_lpsize == MMU_PAGESIZE4M) {
+ max_shm_lpsize = ismpagesize;
+ }
+}
/*
* map_addr_proc() is the routine called when the system is to
--- a/usr/src/uts/sun4v/vm/mach_vm_dep.c Thu Oct 26 16:33:33 2006 -0700
+++ b/usr/src/uts/sun4v/vm/mach_vm_dep.c Thu Oct 26 16:44:53 2006 -0700
@@ -91,63 +91,18 @@
};
/*
- * Enable usage of 64k/4M pages for text and 64k pages for initdata for
- * all sun4v platforms. These variables can be overwritten by the platmod
- * or the CPU module. User can also change the setting via /etc/system.
- */
-
-int use_text_pgsz64k = 1;
-int use_text_pgsz4m = 1;
-int use_initdata_pgsz64k = 1;
-
-/*
- * disable_text_largepages and disable_initdata_largepages bitmaks reflect
- * both unconfigured and undesirable page sizes. Current implementation
- * supports 64K and 4M page sizes for text and only 64K for data. Rest of
- * the page sizes are not currently supported, hence disabled below. In
- * future, when support is added for any other page size, it should be
- * reflected below.
- *
- * Note that these bitmask can be set in platform or CPU specific code to
- * disable page sizes that should not be used. These variables normally
- * shouldn't be changed via /etc/system.
- *
- * These bitmasks are also updated within hat_init to reflect unsupported
- * page sizes on a sun4v processor per mmu_exported_pagesize_mask global
- * variable.
+ * Maximum and default segment size tunables for user heap, stack, private
+ * and shared anonymous memory, and user text and initialized data.
*/
-
-int disable_text_largepages =
- (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M) | (1 << TTE2G) |
- (1 << TTE16G);
-int disable_initdata_largepages =
- (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M) | (1 << TTE256M) |
- (1 << TTE2G) | (1 << TTE16G);
-
-/*
- * Minimum segment size tunables before 64K or 4M large pages
- * should be used to map it.
- */
-size_t text_pgsz64k_minsize = MMU_PAGESIZE64K;
-size_t text_pgsz4m_minsize = MMU_PAGESIZE4M;
-size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K;
-
+size_t max_uheap_lpsize = MMU_PAGESIZE64K;
+size_t default_uheap_lpsize = MMU_PAGESIZE64K;
+size_t max_ustack_lpsize = MMU_PAGESIZE64K;
+size_t default_ustack_lpsize = MMU_PAGESIZE64K;
+size_t max_privmap_lpsize = MMU_PAGESIZE64K;
+size_t max_uidata_lpsize = MMU_PAGESIZE64K;
+size_t max_utext_lpsize = MMU_PAGESIZE4M;
size_t max_shm_lpsize = MMU_PAGESIZE4M;
-/* Auto large page tunables. */
-int auto_lpg_tlb_threshold = 32;
-int auto_lpg_minszc = TTE64K;
-int auto_lpg_maxszc = TTE64K;
-size_t auto_lpg_heap_default = MMU_PAGESIZE64K;
-size_t auto_lpg_stack_default = MMU_PAGESIZE64K;
-size_t auto_lpg_va_default = MMU_PAGESIZE64K;
-size_t auto_lpg_remap_threshold = 0; /* always remap */
-/*
- * Number of pages in 1 GB. Don't enable automatic large pages if we have
- * fewer than this many pages.
- */
-pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT);
-
/*
* map_addr_proc() is the routine called when the system is to
* choose an address for the user. We will pick an address