--- a/usr/src/cmd/mdb/common/modules/genunix/genunix.c Wed Jun 17 13:10:47 2009 -0700
+++ b/usr/src/cmd/mdb/common/modules/genunix/genunix.c Wed Jun 17 15:32:10 2009 -0700
@@ -4833,6 +4833,8 @@
/* from memory.c */
{ "page", "walk all pages, or those from the specified vnode",
page_walk_init, page_walk_step, page_walk_fini },
+ { "allpages", "walk all pages, including free pages",
+ allpages_walk_init, allpages_walk_step, allpages_walk_fini },
{ "memlist", "walk specified memlist",
NULL, memlist_walk_step, NULL },
{ "swapinfo", "walk swapinfo structures",
--- a/usr/src/cmd/mdb/common/modules/genunix/memory.c Wed Jun 17 13:10:47 2009 -0700
+++ b/usr/src/cmd/mdb/common/modules/genunix/memory.c Wed Jun 17 15:32:10 2009 -0700
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -214,9 +214,222 @@
mdb_free(wsp->walk_data, sizeof (page_walk_data_t));
}
+/*
+ * allpages walks all pages in the system in order they appear in
+ * the memseg structure
+ */
+
+#define PAGE_BUFFER 128
+
+int
+allpages_walk_init(mdb_walk_state_t *wsp)
+{
+ if (wsp->walk_addr != 0) {
+ mdb_warn("allpages only supports global walks.\n");
+ return (WALK_ERR);
+ }
+
+ if (mdb_layered_walk("memseg", wsp) == -1) {
+ mdb_warn("couldn't walk 'memseg'");
+ return (WALK_ERR);
+ }
+
+ wsp->walk_data = mdb_alloc(sizeof (page_t) * PAGE_BUFFER, UM_SLEEP);
+ return (WALK_NEXT);
+}
+
+int
+allpages_walk_step(mdb_walk_state_t *wsp)
+{
+ const struct memseg *msp = wsp->walk_layer;
+ page_t *buf = wsp->walk_data;
+ size_t pg_read, i;
+ size_t pg_num = msp->pages_end - msp->pages_base;
+ const page_t *pg_addr = msp->pages;
+
+ while (pg_num > 0) {
+ pg_read = MIN(pg_num, PAGE_BUFFER);
+
+ if (mdb_vread(buf, pg_read * sizeof (page_t),
+ (uintptr_t)pg_addr) == -1) {
+ mdb_warn("can't read page_t's at %#lx", pg_addr);
+ return (WALK_ERR);
+ }
+ for (i = 0; i < pg_read; i++) {
+ int ret = wsp->walk_callback((uintptr_t)&pg_addr[i],
+ &buf[i], wsp->walk_cbdata);
+
+ if (ret != WALK_NEXT)
+ return (ret);
+ }
+ pg_num -= pg_read;
+ pg_addr += pg_read;
+ }
+
+ return (WALK_NEXT);
+}
+
+void
+allpages_walk_fini(mdb_walk_state_t *wsp)
+{
+ mdb_free(wsp->walk_data, sizeof (page_t) * PAGE_BUFFER);
+}
+
+/*
+ * Hash table + LRU queue.
+ * This table is used to cache recently read vnodes for the memstat
+ * command, to reduce the number of mdb_vread calls. This greatly
+ * speeds the memstat command on on live, large CPU count systems.
+ */
+
+#define VN_SMALL 401
+#define VN_LARGE 10007
+#define VN_HTABLE_KEY(p, hp) ((p) % ((hp)->vn_htable_buckets))
+
+struct vn_htable_list {
+ uint_t vn_flag; /* v_flag from vnode */
+ uintptr_t vn_ptr; /* pointer to vnode */
+ struct vn_htable_list *vn_q_next; /* queue next pointer */
+ struct vn_htable_list *vn_q_prev; /* queue prev pointer */
+ struct vn_htable_list *vn_h_next; /* hash table pointer */
+};
+
+/*
+ * vn_q_first -> points to to head of queue: the vnode that was most
+ * recently used
+ * vn_q_last -> points to the oldest used vnode, and is freed once a new
+ * vnode is read.
+ * vn_htable -> hash table
+ * vn_htable_buf -> contains htable objects
+ * vn_htable_size -> total number of items in the hash table
+ * vn_htable_buckets -> number of buckets in the hash table
+ */
+typedef struct vn_htable {
+ struct vn_htable_list *vn_q_first;
+ struct vn_htable_list *vn_q_last;
+ struct vn_htable_list **vn_htable;
+ struct vn_htable_list *vn_htable_buf;
+ int vn_htable_size;
+ int vn_htable_buckets;
+} vn_htable_t;
+
+
+/* allocate memory, initilize hash table and LRU queue */
+static void
+vn_htable_init(vn_htable_t *hp, size_t vn_size)
+{
+ int i;
+ int htable_size = MAX(vn_size, VN_LARGE);
+
+ if ((hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
+ * htable_size, UM_NOSLEEP|UM_GC)) == NULL) {
+ htable_size = VN_SMALL;
+ hp->vn_htable_buf = mdb_zalloc(sizeof (struct vn_htable_list)
+ * htable_size, UM_SLEEP|UM_GC);
+ }
+
+ hp->vn_htable = mdb_zalloc(sizeof (struct vn_htable_list *)
+ * htable_size, UM_SLEEP|UM_GC);
+
+ hp->vn_q_first = &hp->vn_htable_buf[0];
+ hp->vn_q_last = &hp->vn_htable_buf[htable_size - 1];
+ hp->vn_q_first->vn_q_next = &hp->vn_htable_buf[1];
+ hp->vn_q_last->vn_q_prev = &hp->vn_htable_buf[htable_size - 2];
+
+ for (i = 1; i < (htable_size-1); i++) {
+ hp->vn_htable_buf[i].vn_q_next = &hp->vn_htable_buf[i + 1];
+ hp->vn_htable_buf[i].vn_q_prev = &hp->vn_htable_buf[i - 1];
+ }
+
+ hp->vn_htable_size = htable_size;
+ hp->vn_htable_buckets = htable_size;
+}
+
+
+/*
+ * Find the vnode whose address is ptr, and return its v_flag in vp->v_flag.
+ * The function tries to find needed information in the following order:
+ *
+ * 1. check if ptr is the first in queue
+ * 2. check if ptr is in hash table (if so move it to the top of queue)
+ * 3. do mdb_vread, remove last queue item from queue and hash table.
+ * Insert new information to freed object, and put this object in to the
+ * top of the queue.
+ */
+static int
+vn_get(vn_htable_t *hp, struct vnode *vp, uintptr_t ptr)
+{
+ int hkey;
+ struct vn_htable_list *hent, **htmp, *q_next, *q_prev;
+ struct vn_htable_list *q_first = hp->vn_q_first;
+
+ /* 1. vnode ptr is the first in queue, just get v_flag and return */
+ if (q_first->vn_ptr == ptr) {
+ vp->v_flag = q_first->vn_flag;
+
+ return (0);
+ }
+
+ /* 2. search the hash table for this ptr */
+ hkey = VN_HTABLE_KEY(ptr, hp);
+ hent = hp->vn_htable[hkey];
+ while (hent && (hent->vn_ptr != ptr))
+ hent = hent->vn_h_next;
+
+ /* 3. if hent is NULL, we did not find in hash table, do mdb_vread */
+ if (hent == NULL) {
+ struct vnode vn;
+
+ if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
+ mdb_warn("unable to read vnode_t at %#lx", ptr);
+ return (-1);
+ }
+
+ /* we will insert read data into the last element in queue */
+ hent = hp->vn_q_last;
+
+ /* remove last hp->vn_q_last object from hash table */
+ if (hent->vn_ptr) {
+ htmp = &hp->vn_htable[VN_HTABLE_KEY(hent->vn_ptr, hp)];
+ while (*htmp != hent)
+ htmp = &(*htmp)->vn_h_next;
+ *htmp = hent->vn_h_next;
+ }
+
+ /* insert data into new free object */
+ hent->vn_ptr = ptr;
+ hent->vn_flag = vn.v_flag;
+
+ /* insert new object into hash table */
+ hent->vn_h_next = hp->vn_htable[hkey];
+ hp->vn_htable[hkey] = hent;
+ }
+
+ /* Remove from queue. hent is not first, vn_q_prev is not NULL */
+ q_next = hent->vn_q_next;
+ q_prev = hent->vn_q_prev;
+ if (q_next == NULL)
+ hp->vn_q_last = q_prev;
+ else
+ q_next->vn_q_prev = q_prev;
+ q_prev->vn_q_next = q_next;
+
+ /* Add to the front of queue */
+ hent->vn_q_prev = NULL;
+ hent->vn_q_next = q_first;
+ q_first->vn_q_prev = hent;
+ hp->vn_q_first = hent;
+
+ /* Set v_flag in vnode pointer from hent */
+ vp->v_flag = hent->vn_flag;
+
+ return (0);
+}
+
/* Summary statistics of pages */
typedef struct memstat {
struct vnode *ms_kvp; /* Cached address of kernel vnode */
+ struct vnode *ms_unused_vp; /* Unused pages vnode pointer */
struct vnode *ms_zvp; /* Cached address of zio vnode */
uint64_t ms_kmem; /* Pages of kernel memory */
uint64_t ms_zfs_data; /* Pages of zfs data */
@@ -225,6 +438,8 @@
uint64_t ms_exec; /* Pages of exec/library memory */
uint64_t ms_cachelist; /* Pages on the cachelist (free) */
uint64_t ms_total; /* Pages on page hash */
+ vn_htable_t *ms_vn_htable; /* Pointer to hash table */
+ struct vnode ms_vn; /* vnode buffer */
} memstat_t;
#define MS_PP_ISKAS(pp, stats) \
@@ -234,36 +449,28 @@
(((stats)->ms_zvp != NULL) && ((pp)->p_vnode == (stats)->ms_zvp))
/*
- * Summarize pages by type; called from page walker.
+ * Summarize pages by type and update stat information
*/
/* ARGSUSED */
static int
memstat_callback(page_t *page, page_t *pp, memstat_t *stats)
{
- struct vnode vn, *vp;
- uintptr_t ptr;
+ struct vnode *vp = &stats->ms_vn;
- /* read page's vnode pointer */
- if ((ptr = (uintptr_t)(pp->p_vnode)) != NULL) {
- if (mdb_vread(&vn, sizeof (vnode_t), ptr) == -1) {
- mdb_warn("unable to read vnode_t at %#lx",
- ptr);
- return (WALK_ERR);
- }
- vp = &vn;
- } else
- vp = NULL;
-
- if (PP_ISFREE(pp))
- stats->ms_cachelist++;
- else if (vp && IS_SWAPFSVP(vp))
- stats->ms_anon++;
+ if (pp->p_vnode == NULL || pp->p_vnode == stats->ms_unused_vp)
+ return (WALK_NEXT);
+ else if (MS_PP_ISKAS(pp, stats))
+ stats->ms_kmem++;
else if (MS_PP_ISZFS_DATA(pp, stats))
stats->ms_zfs_data++;
- else if (MS_PP_ISKAS(pp, stats))
- stats->ms_kmem++;
- else if (vp && (((vp)->v_flag & VVMEXEC)) != 0)
+ else if (PP_ISFREE(pp))
+ stats->ms_cachelist++;
+ else if (vn_get(stats->ms_vn_htable, vp, (uintptr_t)pp->p_vnode))
+ return (WALK_ERR);
+ else if (IS_SWAPFSVP(vp))
+ stats->ms_anon++;
+ else if ((vp->v_flag & VVMEXEC) != 0)
stats->ms_exec++;
else
stats->ms_vnode++;
@@ -281,19 +488,33 @@
pgcnt_t total_pages, physmem;
ulong_t freemem;
memstat_t stats;
- memstat_t unused_stats;
GElf_Sym sym;
+ vn_htable_t ht;
+ uintptr_t vn_size = 0;
#if defined(__i386) || defined(__amd64)
bln_stats_t bln_stats;
ssize_t bln_size;
#endif
bzero(&stats, sizeof (memstat_t));
- bzero(&unused_stats, sizeof (memstat_t));
- if (argc != 0 || (flags & DCMD_ADDRSPEC))
+ /*
+ * -s size, is an internal option. It specifies the size of vn_htable.
+ * Hash table size is set in the following order:
+ * If user has specified the size that is larger than VN_LARGE: try it,
+ * but if malloc failed default to VN_SMALL. Otherwise try VN_LARGE, if
+ * failed to allocate default to VN_SMALL.
+ * For a better efficiency of hash table it is highly recommended to
+ * set size to a prime number.
+ */
+ if ((flags & DCMD_ADDRSPEC) || mdb_getopts(argc, argv,
+ 's', MDB_OPT_UINTPTR, &vn_size, NULL) != argc)
return (DCMD_USAGE);
+ /* Initialize vnode hash list and queue */
+ vn_htable_init(&ht, vn_size);
+ stats.ms_vn_htable = &ht;
+
/* Grab base page size */
if (mdb_readvar(&pagesize, "_pagesize") == -1) {
mdb_warn("unable to read _pagesize");
@@ -332,37 +553,26 @@
stats.ms_zvp = (struct vnode *)(uintptr_t)sym.st_value;
}
- /* Walk page structures, summarizing usage */
- if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
- &stats) == -1) {
- mdb_warn("can't walk pages");
- return (DCMD_ERR);
- }
-
- /* read unused pages vnode */
+ /*
+ * If physmem != total_pages, then the administrator has limited the
+ * number of pages available in the system. Excluded pages are
+ * associated with the unused pages vnode. Read this vnode so the
+ * pages can be excluded in the page accounting.
+ */
if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "unused_pages_vp",
(GElf_Sym *)&sym) == -1) {
mdb_warn("unable to read unused_pages_vp");
return (DCMD_ERR);
}
-
- unused_stats.ms_kvp = (struct vnode *)(uintptr_t)sym.st_value;
+ stats.ms_unused_vp = (struct vnode *)(uintptr_t)sym.st_value;
- /* Find unused pages */
- if (mdb_walk("page", (mdb_walk_cb_t)memstat_callback,
- &unused_stats) == -1) {
- mdb_warn("can't walk pages");
+ /* walk all pages, collect statistics */
+ if (mdb_walk("allpages", (mdb_walk_cb_t)memstat_callback,
+ &stats) == -1) {
+ mdb_warn("can't walk memseg");
return (DCMD_ERR);
}
- /*
- * If physmem != total_pages, then the administrator has limited the
- * number of pages available in the system. In order to account for
- * this, we reduce the amount normally attributed to the page cache.
- */
- stats.ms_vnode -= unused_stats.ms_kmem;
- stats.ms_total -= unused_stats.ms_kmem;
-
#define MS_PCT_TOTAL(x) ((ulong_t)((((5 * total_pages) + ((x) * 1000ull))) / \
((physmem) * 10)))
--- a/usr/src/cmd/mdb/common/modules/genunix/memory.h Wed Jun 17 13:10:47 2009 -0700
+++ b/usr/src/cmd/mdb/common/modules/genunix/memory.h Wed Jun 17 15:32:10 2009 -0700
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2000-2001 by Sun Microsystems, Inc.
- * All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
#ifndef _MEMORY_H
#define _MEMORY_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -37,6 +34,9 @@
int page_walk_step(mdb_walk_state_t *);
void page_walk_fini(mdb_walk_state_t *);
int page(uintptr_t, uint_t, int, const mdb_arg_t *);
+int allpages_walk_init(mdb_walk_state_t *);
+int allpages_walk_step(mdb_walk_state_t *);
+void allpages_walk_fini(mdb_walk_state_t *);
int memstat(uintptr_t, uint_t, int, const mdb_arg_t *);
int swap_walk_init(mdb_walk_state_t *);
--- a/usr/src/uts/common/io/mem.c Wed Jun 17 13:10:47 2009 -0700
+++ b/usr/src/uts/common/io/mem.c Wed Jun 17 15:32:10 2009 -0700
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -234,18 +234,34 @@
#pragma weak mach_sync_icache_pa
static int
-mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio)
+mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
+ page_t *pp)
{
int error = 0;
+ int devload = 0;
+ int is_memory = pf_is_memory(pfn);
size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
(size_t)uio->uio_iov->iov_len);
+ caddr_t va = NULL;
mutex_enter(&mm_lock);
- hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
- (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ | PROT_WRITE),
- HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
+
+ if (is_memory && kpm_enable) {
+ if (pp)
+ va = hat_kpm_mapin(pp, NULL);
+ else
+ va = hat_kpm_mapin_pfn(pfn);
+ }
- if (!pf_is_memory(pfn)) {
+ if (va == NULL) {
+ hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
+ (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
+ HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
+ va = mm_map;
+ devload = 1;
+ }
+
+ if (!is_memory) {
if (allowio) {
size_t c = uio->uio_iov->iov_len;
@@ -256,7 +272,7 @@
} else
error = EIO;
} else {
- error = uiomove(&mm_map[pageoff], nbytes, rw, uio);
+ error = uiomove(va + pageoff, nbytes, rw, uio);
/*
* In case this has changed executable code,
@@ -267,7 +283,13 @@
}
}
- hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
+ if (devload)
+ hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
+ else if (pp)
+ hat_kpm_mapout(pp, NULL, va);
+ else
+ hat_kpm_mapout_pfn(pfn);
+
mutex_exit(&mm_lock);
return (error);
}
@@ -330,13 +352,13 @@
v = BTOP((u_offset_t)uio->uio_loffset);
error = mmio(uio, rw, v,
- uio->uio_loffset & PAGEOFFSET, 0);
+ uio->uio_loffset & PAGEOFFSET, 0, NULL);
break;
case M_KMEM:
case M_ALLKMEM:
{
- page_t **ppp;
+ page_t **ppp = NULL;
caddr_t vaddr = (caddr_t)uio->uio_offset;
int try_lock = NEED_LOCK_KVADDR(vaddr);
int locked = 0;
@@ -369,7 +391,8 @@
}
error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
- minor == M_ALLKMEM || mm_kmem_io_access);
+ minor == M_ALLKMEM || mm_kmem_io_access,
+ (locked && ppp) ? *ppp : NULL);
if (locked)
as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
S_WRITE);
--- a/usr/src/uts/common/sys/vnode.h Wed Jun 17 13:10:47 2009 -0700
+++ b/usr/src/uts/common/sys/vnode.h Wed Jun 17 15:32:10 2009 -0700
@@ -326,6 +326,12 @@
(pvn_vmodsort_supported != 0 && ((vp)->v_flag & VMODSORT) != 0)
#define VISSWAPFS 0x20000 /* vnode is being used for swapfs */
+
+/*
+ * The mdb memstat command assumes that IS_SWAPFSVP only uses the
+ * vnode's v_flag field. If this changes, cache the additional
+ * fields in mdb; see vn_get in mdb/common/modules/genunix/memory.c
+ */
#define IS_SWAPFSVP(vp) (((vp)->v_flag & VISSWAPFS) != 0)
#define V_SYSATTR 0x40000 /* vnode is a GFS system attribute */
--- a/usr/src/uts/common/vm/hat.h Wed Jun 17 13:10:47 2009 -0700
+++ b/usr/src/uts/common/vm/hat.h Wed Jun 17 15:32:10 2009 -0700
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -248,6 +248,8 @@
*/
caddr_t hat_kpm_mapin(struct page *, struct kpme *);
void hat_kpm_mapout(struct page *, struct kpme *, caddr_t);
+caddr_t hat_kpm_mapin_pfn(pfn_t);
+void hat_kpm_mapout_pfn(pfn_t);
caddr_t hat_kpm_page2va(struct page *, int);
struct page *hat_kpm_vaddr2page(caddr_t);
int hat_kpm_fault(struct hat *, caddr_t);
--- a/usr/src/uts/i86pc/vm/hat_i86.c Wed Jun 17 13:10:47 2009 -0700
+++ b/usr/src/uts/i86pc/vm/hat_i86.c Wed Jun 17 15:32:10 2009 -0700
@@ -138,7 +138,7 @@
/*
* AMD shanghai processors provide better management of 1gb ptes in its tlb.
- * By default, 1g page support will be disabled for pre-shanghai AMD
+ * By default, 1g page suppport will be disabled for pre-shanghai AMD
* processors that don't have optimal tlb support for the 1g page size.
* chk_optimal_1gtlb can be set to 0 to force 1g page support on sub-optimal
* processors.
@@ -1299,7 +1299,7 @@
int rv = 0;
/*
- * Is this a consistent (ie. need mapping list lock) mapping?
+ * Is this a consistant (ie. need mapping list lock) mapping?
*/
is_consist = (pp != NULL && (flags & HAT_LOAD_NOCONSIST) == 0);
@@ -1991,15 +1991,22 @@
/*
* Service a delayed TLB flush if coming out of being idle.
- * It will be called from cpu idle notification with interrupt disabled.
*/
void
tlb_service(void)
{
+ ulong_t flags = getflags();
ulong_t tlb_info;
ulong_t found;
/*
+ * Be sure interrupts are off while doing this so that
+ * higher level interrupts correctly wait for flushes to finish.
+ */
+ if (flags & PS_IE)
+ flags = intr_clear();
+
+ /*
* We only have to do something if coming out of being idle.
*/
tlb_info = CPU->cpu_m.mcpu_tlb_info;
@@ -2017,6 +2024,12 @@
if (tlb_info & TLB_INVAL_ALL)
flush_all_tlb_entries();
}
+
+ /*
+ * Restore interrupt enable control bit.
+ */
+ if (flags & PS_IE)
+ sti();
}
#endif /* !__xpv */
@@ -3165,7 +3178,7 @@
/*
* Called when all mappings to a page should have write permission removed.
- * Mostly stolen from hat_pagesync()
+ * Mostly stolem from hat_pagesync()
*/
static void
hati_page_clrwrt(struct page *pp)
@@ -3298,8 +3311,8 @@
/*
* If flag is specified, returns 0 if attribute is disabled
- * and non zero if enabled. If flag specifes multiple attributes
- * then returns 0 if ALL attributes are disabled. This is an advisory
+ * and non zero if enabled. If flag specifes multiple attributs
+ * then returns 0 if ALL atriibutes are disabled. This is an advisory
* call.
*/
uint_t
@@ -4227,6 +4240,38 @@
}
/*
+ * hat_kpm_mapin_pfn is used to obtain a kpm mapping for physical
+ * memory addresses that are not described by a page_t. It can
+ * also be used for normal pages that are not locked, but beware
+ * this is dangerous - no locking is performed, so the identity of
+ * the page could change. hat_kpm_mapin_pfn is not supported when
+ * vac_colors > 1, because the chosen va depends on the page identity,
+ * which could change.
+ * The caller must only pass pfn's for valid physical addresses; violation
+ * of this rule will cause panic.
+ */
+caddr_t
+hat_kpm_mapin_pfn(pfn_t pfn)
+{
+ caddr_t paddr, vaddr;
+
+ if (kpm_enable == 0)
+ return ((caddr_t)NULL);
+
+ paddr = (caddr_t)ptob(pfn);
+ vaddr = (uintptr_t)kpm_vbase + paddr;
+
+ return ((caddr_t)vaddr);
+}
+
+/*ARGSUSED*/
+void
+hat_kpm_mapout_pfn(pfn_t pfn)
+{
+ /* empty */
+}
+
+/*
* Return the kpm virtual address for a specific pfn
*/
caddr_t
--- a/usr/src/uts/sun4u/vm/mach_kpm.c Wed Jun 17 13:10:47 2009 -0700
+++ b/usr/src/uts/sun4u/vm/mach_kpm.c Wed Jun 17 15:32:10 2009 -0700
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -58,6 +58,8 @@
void sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp);
void sfmmu_kpm_page_cache(page_t *, int, int);
+extern uint_t vac_colors;
+
/*
* Kernel Physical Mapping (kpm) facility
*/
@@ -168,6 +170,46 @@
}
/*
+ * hat_kpm_mapin_pfn is used to obtain a kpm mapping for physical
+ * memory addresses that are not described by a page_t. It can
+ * only be supported if vac_colors=1, because there is no page_t
+ * and corresponding kpm_page_t to track VAC conflicts. Currently,
+ * this may not be used on pfn's backed by page_t's, because the
+ * kpm state may not be consistent in hat_kpm_fault if the page is
+ * mapped using both this routine and hat_kpm_mapin. KPM should be
+ * cleaned up on sun4u/vac_colors=1 to be minimal as on sun4v.
+ * The caller must only pass pfn's for valid physical addresses; violation
+ * of this rule will cause panic.
+ */
+caddr_t
+hat_kpm_mapin_pfn(pfn_t pfn)
+{
+ caddr_t paddr, vaddr;
+ tte_t tte;
+ uint_t szc = kpm_smallpages ? TTE8K : TTE4M;
+ uint_t shift = kpm_smallpages ? MMU_PAGESHIFT : MMU_PAGESHIFT4M;
+
+ if (kpm_enable == 0 || vac_colors > 1 ||
+ page_numtomemseg_nolock(pfn) != NULL)
+ return ((caddr_t)NULL);
+
+ paddr = (caddr_t)ptob(pfn);
+ vaddr = (uintptr_t)kpm_vbase + paddr;
+
+ KPM_TTE_VCACHED(tte.ll, pfn, szc);
+ sfmmu_kpm_load_tsb(vaddr, &tte, shift);
+
+ return (vaddr);
+}
+
+/*ARGSUSED*/
+void
+hat_kpm_mapout_pfn(pfn_t pfn)
+{
+ /* empty */
+}
+
+/*
* Return the kpm virtual address for the page at pp.
* If checkswap is non zero and the page is backed by a
* swap vnode the physical address is used rather than
@@ -279,17 +321,28 @@
SFMMU_KPM_VTOP(vaddr, paddr);
pfn = (pfn_t)btop(paddr);
- mseg = page_numtomemseg_nolock(pfn);
- if (mseg == NULL)
- return (EFAULT);
+ if ((mseg = page_numtomemseg_nolock(pfn)) != NULL) {
+ pp = &mseg->pages[(pgcnt_t)(pfn - mseg->pages_base)];
+ ASSERT((pfn_t)pp->p_pagenum == pfn);
+ }
- pp = &mseg->pages[(pgcnt_t)(pfn - mseg->pages_base)];
- ASSERT((pfn_t)pp->p_pagenum == pfn);
+ /*
+ * hat_kpm_mapin_pfn may add a kpm translation for memory that falls
+ * outside of memsegs. Check for this case and provide the translation
+ * here.
+ */
+ if (vac_colors == 1 && mseg == NULL) {
+ tte_t tte;
+ uint_t szc = kpm_smallpages ? TTE8K : TTE4M;
+ uint_t shift = kpm_smallpages ? MMU_PAGESHIFT : MMU_PAGESHIFT4M;
- if (!PAGE_LOCKED(pp))
- return (EFAULT);
-
- if (kpm_smallpages == 0)
+ ASSERT(address_in_memlist(phys_install, paddr, 1));
+ KPM_TTE_VCACHED(tte.ll, pfn, szc);
+ sfmmu_kpm_load_tsb(vaddr, &tte, shift);
+ error = 0;
+ } else if (mseg == NULL || !PAGE_LOCKED(pp))
+ error = EFAULT;
+ else if (kpm_smallpages == 0)
error = sfmmu_kpm_fault(vaddr, mseg, pp);
else
error = sfmmu_kpm_fault_small(vaddr, mseg, pp);
@@ -522,7 +575,6 @@
void *base;
size_t size;
struct memseg *msp;
- extern uint_t vac_colors;
for (msp = memsegs; msp; msp = msp->next) {
pbase = msp->pages_base;
--- a/usr/src/uts/sun4v/vm/mach_kpm.c Wed Jun 17 13:10:47 2009 -0700
+++ b/usr/src/uts/sun4v/vm/mach_kpm.c Wed Jun 17 15:32:10 2009 -0700
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Kernel Physical Mapping (segkpm) hat interface routines for sun4v.
*/
@@ -123,6 +121,38 @@
}
/*
+ * hat_kpm_mapin_pfn is used to obtain a kpm mapping for physical
+ * memory addresses that are not described by a page_t. It can
+ * also be used for normal pages that are not locked, but beware
+ * this is dangerous - no locking is performed, so the identity of
+ * the page could change. hat_kpm_mapin_pfn is not supported when
+ * vac_colors > 1, because the chosen va depends on the page identity,
+ * which could change.
+ * The caller must only pass pfn's for valid physical addresses; violation
+ * of this rule will cause panic.
+ */
+caddr_t
+hat_kpm_mapin_pfn(pfn_t pfn)
+{
+ caddr_t paddr, vaddr;
+
+ if (kpm_enable == 0)
+ return ((caddr_t)NULL);
+
+ paddr = (caddr_t)ptob(pfn);
+ vaddr = (uintptr_t)kpm_vbase + paddr;
+
+ return ((caddr_t)vaddr);
+}
+
+/*ARGSUSED*/
+void
+hat_kpm_mapout_pfn(pfn_t pfn)
+{
+ /* empty */
+}
+
+/*
* Return the kpm virtual address for the page at pp.
*/
/*ARGSUSED*/