usr/src/uts/sparc/v9/vm/seg_nf.c
changeset 0 68f95e015346
child 3290 256464cbb73c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/sparc/v9/vm/seg_nf.c	Tue Jun 14 00:00:00 2005 -0700
@@ -0,0 +1,499 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * Portions of this source code were derived from Berkeley 4.3 BSD
+ * under license from the Regents of the University of California.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * VM - segment for non-faulting loads.
+ */
+
+#include <sys/types.h>
+#include <sys/t_lock.h>
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <sys/errno.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/archsystm.h>
+#include <sys/lgrp.h>
+
+#include <vm/page.h>
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/vpage.h>
+
+/*
+ * Private seg op routines.
+ */
+static int	segnf_dup(struct seg *seg, struct seg *newseg);
+static int	segnf_unmap(struct seg *seg, caddr_t addr, size_t len);
+static void	segnf_free(struct seg *seg);
+static faultcode_t segnf_nomap(void);
+static int	segnf_setprot(struct seg *seg, caddr_t addr,
+		    size_t len, uint_t prot);
+static int	segnf_checkprot(struct seg *seg, caddr_t addr,
+		    size_t len, uint_t prot);
+static void	segnf_badop(void);
+static int	segnf_nop(void);
+static int	segnf_getprot(struct seg *seg, caddr_t addr,
+		    size_t len, uint_t *protv);
+static u_offset_t segnf_getoffset(struct seg *seg, caddr_t addr);
+static int	segnf_gettype(struct seg *seg, caddr_t addr);
+static int	segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
+static void	segnf_dump(struct seg *seg);
+static int	segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
+		    struct page ***ppp, enum lock_type type, enum seg_rw rw);
+static int	segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
+		    uint_t szc);
+static int	segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
+static lgrp_mem_policy_info_t	*segnf_getpolicy(struct seg *seg,
+    caddr_t addr);
+
+
+struct seg_ops segnf_ops = {
+	segnf_dup,
+	segnf_unmap,
+	segnf_free,
+	(faultcode_t (*)(struct hat *, struct seg *, caddr_t, size_t,
+	    enum fault_type, enum seg_rw))
+		segnf_nomap,		/* fault */
+	(faultcode_t (*)(struct seg *, caddr_t))
+		segnf_nomap,		/* faulta */
+	segnf_setprot,
+	segnf_checkprot,
+	(int (*)())segnf_badop,		/* kluster */
+	(size_t (*)(struct seg *))NULL,	/* swapout */
+	(int (*)(struct seg *, caddr_t, size_t, int, uint_t))
+		segnf_nop,		/* sync */
+	(size_t (*)(struct seg *, caddr_t, size_t, char *))
+		segnf_nop,		/* incore */
+	(int (*)(struct seg *, caddr_t, size_t, int, int, ulong_t *, size_t))
+		segnf_nop,		/* lockop */
+	segnf_getprot,
+	segnf_getoffset,
+	segnf_gettype,
+	segnf_getvp,
+	(int (*)(struct seg *, caddr_t, size_t, uint_t))
+		segnf_nop,		/* advise */
+	segnf_dump,
+	segnf_pagelock,
+	segnf_setpagesize,
+	segnf_getmemid,
+	segnf_getpolicy,
+};
+
+/*
+ * vnode and page for the page of zeros we use for the nf mappings.
+ */
+static kmutex_t segnf_lock;
+static struct vnode zvp;
+static struct page **zpp;
+
+#define	addr_to_vcolor(addr)                                            \
+	(shm_alignment) ?						\
+	((int)(((uintptr_t)(addr) & (shm_alignment - 1)) >> PAGESHIFT)) : 0
+
+/*
+ * We try to limit the number of Non-fault segments created.
+ * Non fault segments are created to optimize sparc V9 code which uses
+ * the sparc nonfaulting load ASI (ASI_PRIMARY_NOFAULT).
+ *
+ * There are several reasons why creating too many non-fault segments
+ * could cause problems.
+ *
+ * 	First, excessive allocation of kernel resources for the seg
+ *	structures and the HAT data to map the zero pages.
+ *
+ * 	Secondly, creating nofault segments actually uses up user virtual
+ * 	address space. This makes it unavailable for subsequent mmap(0, ...)
+ *	calls which use as_gap() to find empty va regions.  Creation of too
+ *	many nofault segments could thus interfere with the ability of the
+ *	runtime linker to load a shared object.
+ */
+#define	MAXSEGFORNF	(10000)
+#define	MAXNFSEARCH	(5)
+
+
+/*
+ * Must be called from startup()
+ */
+void
+segnf_init()
+{
+	mutex_init(&segnf_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+
+/*
+ * Create a no-fault segment.
+ *
+ * The no-fault segment is not technically necessary, as the code in
+ * nfload() in trap.c will emulate the SPARC instruction and load
+ * a value of zero in the destination register.
+ *
+ * However, this code tries to put a page of zero's at the nofault address
+ * so that subsequent non-faulting loads to the same page will not
+ * trap with a tlb miss.
+ *
+ * In order to help limit the number of segments we merge adjacent nofault
+ * segments into a single segment.  If we get a large number of segments
+ * we'll also try to delete a random other nf segment.
+ */
+/* ARGSUSED */
+int
+segnf_create(struct seg *seg, void *argsp)
+{
+	uint_t prot;
+	pgcnt_t	vacpgs;
+	u_offset_t off = 0;
+	caddr_t	vaddr = NULL;
+	int i, color;
+	struct seg *s1;
+	struct seg *s2;
+	size_t size;
+	struct as *as = seg->s_as;
+
+	ASSERT(as && AS_WRITE_HELD(as, &as->a_lock));
+
+	/*
+	 * Need a page per virtual color or just 1 if no vac.
+	 */
+	mutex_enter(&segnf_lock);
+	if (zpp == NULL) {
+		struct seg kseg;
+
+		vacpgs = 1;
+		if (shm_alignment > PAGESIZE) {
+			vacpgs = shm_alignment >> PAGESHIFT;
+		}
+
+		zpp = kmem_alloc(sizeof (*zpp) * vacpgs, KM_SLEEP);
+
+		kseg.s_as = &kas;
+		for (i = 0; i < vacpgs; i++, off += PAGESIZE,
+		    vaddr += PAGESIZE) {
+			zpp[i] = page_create_va(&zvp, off, PAGESIZE,
+			    PG_WAIT | PG_NORELOC, &kseg, vaddr);
+			page_io_unlock(zpp[i]);
+			page_downgrade(zpp[i]);
+			pagezero(zpp[i], 0, PAGESIZE);
+		}
+	}
+	mutex_exit(&segnf_lock);
+
+	hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
+
+	/*
+	 * s_data can't be NULL because of ASSERTS in the common vm code.
+	 */
+	seg->s_ops = &segnf_ops;
+	seg->s_data = seg;
+	seg->s_flags |= S_PURGE;
+
+	mutex_enter(&as->a_contents);
+	as->a_flags |= AS_NEEDSPURGE;
+	mutex_exit(&as->a_contents);
+
+	prot = PROT_READ;
+	color = addr_to_vcolor(seg->s_base);
+	if (as != &kas)
+		prot |= PROT_USER;
+	hat_memload(as->a_hat, seg->s_base, zpp[color],
+	    prot | HAT_NOFAULT, HAT_LOAD);
+
+	/*
+	 * At this point see if we can concatenate a segment to
+	 * a non-fault segment immediately before and/or after it.
+	 */
+	if ((s1 = AS_SEGPREV(as, seg)) != NULL &&
+	    s1->s_ops == &segnf_ops &&
+	    s1->s_base + s1->s_size == seg->s_base) {
+		size = s1->s_size;
+		seg_free(s1);
+		seg->s_base -= size;
+		seg->s_size += size;
+	}
+
+	if ((s2 = AS_SEGNEXT(as, seg)) != NULL &&
+	    s2->s_ops == &segnf_ops &&
+	    seg->s_base + seg->s_size == s2->s_base) {
+		size = s2->s_size;
+		seg_free(s2);
+		seg->s_size += size;
+	}
+
+	/*
+	 * if we already have a lot of segments, try to delete some other
+	 * nofault segment to reduce the probability of uncontrolled segment
+	 * creation.
+	 *
+	 * the code looks around quickly (no more than MAXNFSEARCH segments
+	 * each way) for another NF segment and then deletes it.
+	 */
+	if (avl_numnodes(&as->a_segtree) > MAXSEGFORNF) {
+		size = 0;
+		s2 = NULL;
+		s1 = AS_SEGPREV(as, seg);
+		while (size++ < MAXNFSEARCH && s1 != NULL) {
+			if (s1->s_ops == &segnf_ops)
+				s2 = s1;
+			s1 = AS_SEGPREV(s1->s_as, seg);
+		}
+		if (s2 == NULL) {
+			s1 = AS_SEGNEXT(as, seg);
+			while (size-- > 0 && s1 != NULL) {
+				if (s1->s_ops == &segnf_ops)
+					s2 = s1;
+				s1 = AS_SEGNEXT(as, seg);
+			}
+		}
+		if (s2 != NULL)
+			seg_unmap(s2);
+	}
+
+	return (0);
+}
+
+/*
+ * Never really need "No fault" segments, so they aren't dup'd.
+ */
+/* ARGSUSED */
+static int
+segnf_dup(struct seg *seg, struct seg *newseg)
+{
+	panic("segnf_dup");
+	return (0);
+}
+
+/*
+ * Split a segment at addr for length len.
+ */
+static int
+segnf_unmap(struct seg *seg, caddr_t addr, size_t len)
+{
+	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
+
+	/*
+	 * Check for bad sizes.
+	 */
+	if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
+	    (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) {
+		cmn_err(CE_PANIC, "segnf_unmap: bad unmap size");
+	}
+
+	/*
+	 * Unload any hardware translations in the range to be taken out.
+	 */
+	hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
+
+	if (addr == seg->s_base && len == seg->s_size) {
+		/*
+		 * Freeing entire segment.
+		 */
+		seg_free(seg);
+	} else if (addr == seg->s_base) {
+		/*
+		 * Freeing the beginning of the segment.
+		 */
+		seg->s_base += len;
+		seg->s_size -= len;
+	} else if (addr + len == seg->s_base + seg->s_size) {
+		/*
+		 * Freeing the end of the segment.
+		 */
+		seg->s_size -= len;
+	} else {
+		/*
+		 * The section to go is in the middle of the segment, so we
+		 * have to cut it into two segments.  We shrink the existing
+		 * "seg" at the low end, and create "nseg" for the high end.
+		 */
+		caddr_t nbase = addr + len;
+		size_t nsize = (seg->s_base + seg->s_size) - nbase;
+		struct seg *nseg;
+
+		/*
+		 * Trim down "seg" before trying to stick "nseg" into the as.
+		 */
+		seg->s_size = addr - seg->s_base;
+		nseg = seg_alloc(seg->s_as, nbase, nsize);
+		if (nseg == NULL)
+			cmn_err(CE_PANIC, "segnf_unmap: seg_alloc failed");
+
+		/*
+		 * s_data can't be NULL because of ASSERTs in common VM code.
+		 */
+		nseg->s_ops = seg->s_ops;
+		nseg->s_data = nseg;
+		nseg->s_flags |= S_PURGE;
+		mutex_enter(&seg->s_as->a_contents);
+		seg->s_as->a_flags |= AS_NEEDSPURGE;
+		mutex_exit(&seg->s_as->a_contents);
+	}
+
+	return (0);
+}
+
+/*
+ * Free a segment.
+ */
+static void
+segnf_free(struct seg *seg)
+{
+	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
+}
+
+/*
+ * No faults allowed on segnf.
+ */
+static faultcode_t
+segnf_nomap(void)
+{
+	return (FC_NOMAP);
+}
+
+/* ARGSUSED */
+static int
+segnf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
+{
+	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+	return (EACCES);
+}
+
+/* ARGSUSED */
+static int
+segnf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
+{
+	uint_t sprot;
+	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+
+	sprot = seg->s_as == &kas ?  PROT_READ : PROT_READ|PROT_USER;
+	return ((prot & sprot) == prot ? 0 : EACCES);
+}
+
+static void
+segnf_badop(void)
+{
+	panic("segnf_badop");
+	/*NOTREACHED*/
+}
+
+static int
+segnf_nop(void)
+{
+	return (0);
+}
+
+static int
+segnf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
+{
+	size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
+	size_t p;
+	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+
+	for (p = 0; p < pgno; ++p)
+		protv[p] = PROT_READ;
+	return (0);
+}
+
+/* ARGSUSED */
+static u_offset_t
+segnf_getoffset(struct seg *seg, caddr_t addr)
+{
+	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+
+	return ((u_offset_t)0);
+}
+
+/* ARGSUSED */
+static int
+segnf_gettype(struct seg *seg, caddr_t addr)
+{
+	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+
+	return (MAP_SHARED);
+}
+
+/* ARGSUSED */
+static int
+segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
+{
+	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+
+	*vpp = &zvp;
+	return (0);
+}
+
+/*
+ * segnf pages are not dumped, so we just return
+ */
+/* ARGSUSED */
+static void
+segnf_dump(struct seg *seg)
+{}
+
+/*ARGSUSED*/
+static int
+segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
+    struct page ***ppp, enum lock_type type, enum seg_rw rw)
+{
+	return (ENOTSUP);
+}
+
+/*ARGSUSED*/
+static int
+segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
+    uint_t szc)
+{
+	return (ENOTSUP);
+}
+
+/*ARGSUSED*/
+static int
+segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
+{
+	return (ENODEV);
+}
+
+/*ARGSUSED*/
+static lgrp_mem_policy_info_t *
+segnf_getpolicy(struct seg *seg, caddr_t addr)
+{
+	return (NULL);
+}