usr/src/uts/common/vm/seg_kmem.c
changeset 3290 256464cbb73c
parent 1338 c2e71173ece3
child 3351 853fb8df244a
equal deleted inserted replaced
3289:95e8ec05aa83 3290:256464cbb73c
     1 /*
     1 /*
     2  * CDDL HEADER START
     2  * CDDL HEADER START
     3  *
     3  *
     4  * The contents of this file are subject to the terms of the
     4  * The contents of this file are subject to the terms of the
     5  * Common Development and Distribution License, Version 1.0 only
     5  * Common Development and Distribution License (the "License").
     6  * (the "License").  You may not use this file except in compliance
     6  * You may not use this file except in compliance with the License.
     7  * with the License.
       
     8  *
     7  *
     9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    10  * or http://www.opensolaris.org/os/licensing.
     9  * or http://www.opensolaris.org/os/licensing.
    11  * See the License for the specific language governing permissions
    10  * See the License for the specific language governing permissions
    12  * and limitations under the License.
    11  * and limitations under the License.
   101 
   100 
   102 char *kernelheap;		/* start of primary kernel heap */
   101 char *kernelheap;		/* start of primary kernel heap */
   103 char *ekernelheap;		/* end of primary kernel heap */
   102 char *ekernelheap;		/* end of primary kernel heap */
   104 struct seg kvseg;		/* primary kernel heap segment */
   103 struct seg kvseg;		/* primary kernel heap segment */
   105 struct seg kvseg_core;		/* "core" kernel heap segment */
   104 struct seg kvseg_core;		/* "core" kernel heap segment */
       
   105 struct seg kzioseg;		/* Segment for zio mappings */
   106 vmem_t *heap_arena;		/* primary kernel heap arena */
   106 vmem_t *heap_arena;		/* primary kernel heap arena */
   107 vmem_t *heap_core_arena;	/* core kernel heap arena */
   107 vmem_t *heap_core_arena;	/* core kernel heap arena */
   108 char *heap_core_base;		/* start of core kernel heap arena */
   108 char *heap_core_base;		/* start of core kernel heap arena */
   109 char *heap_lp_base;		/* start of kernel large page heap arena */
   109 char *heap_lp_base;		/* start of kernel large page heap arena */
   110 char *heap_lp_end;		/* end of kernel large page heap arena */
   110 char *heap_lp_end;		/* end of kernel large page heap arena */
   112 struct seg kvseg32;		/* 32-bit kernel heap segment */
   112 struct seg kvseg32;		/* 32-bit kernel heap segment */
   113 vmem_t *heap32_arena;		/* 32-bit kernel heap arena */
   113 vmem_t *heap32_arena;		/* 32-bit kernel heap arena */
   114 vmem_t *heaptext_arena;		/* heaptext arena */
   114 vmem_t *heaptext_arena;		/* heaptext arena */
   115 struct as kas;			/* kernel address space */
   115 struct as kas;			/* kernel address space */
   116 struct vnode kvp;		/* vnode for all segkmem pages */
   116 struct vnode kvp;		/* vnode for all segkmem pages */
       
   117 struct vnode zvp;		/* vnode for zfs pages */
   117 int segkmem_reloc;		/* enable/disable relocatable segkmem pages */
   118 int segkmem_reloc;		/* enable/disable relocatable segkmem pages */
   118 vmem_t *static_arena;		/* arena for caches to import static memory */
   119 vmem_t *static_arena;		/* arena for caches to import static memory */
   119 vmem_t *static_alloc_arena;	/* arena for allocating static memory */
   120 vmem_t *static_alloc_arena;	/* arena for allocating static memory */
       
   121 vmem_t *zio_arena = NULL;	/* arena for allocating zio memory */
       
   122 vmem_t *zio_alloc_arena = NULL;	/* arena for allocating zio memory */
   120 
   123 
   121 /*
   124 /*
   122  * seg_kmem driver can map part of the kernel heap with large pages.
   125  * seg_kmem driver can map part of the kernel heap with large pages.
   123  * Currently this functionality is implemented for sparc platforms only.
   126  * Currently this functionality is implemented for sparc platforms only.
   124  *
   127  *
   425 	enum fault_type type, enum seg_rw rw)
   428 	enum fault_type type, enum seg_rw rw)
   426 {
   429 {
   427 	pgcnt_t npages;
   430 	pgcnt_t npages;
   428 	spgcnt_t pg;
   431 	spgcnt_t pg;
   429 	page_t *pp;
   432 	page_t *pp;
       
   433 	struct vnode *vp = seg->s_data;
   430 
   434 
   431 	ASSERT(RW_READ_HELD(&seg->s_as->a_lock));
   435 	ASSERT(RW_READ_HELD(&seg->s_as->a_lock));
   432 
   436 
   433 	if (seg->s_as != &kas || size > seg->s_size ||
   437 	if (seg->s_as != &kas || size > seg->s_size ||
   434 	    addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
   438 	    addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
   449 	npages = btopr(size);
   453 	npages = btopr(size);
   450 
   454 
   451 	switch (type) {
   455 	switch (type) {
   452 	case F_SOFTLOCK:	/* lock down already-loaded translations */
   456 	case F_SOFTLOCK:	/* lock down already-loaded translations */
   453 		for (pg = 0; pg < npages; pg++) {
   457 		for (pg = 0; pg < npages; pg++) {
   454 			pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr,
   458 			pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
   455 			    SE_SHARED);
   459 			    SE_SHARED);
   456 			if (pp == NULL) {
   460 			if (pp == NULL) {
   457 				/*
   461 				/*
   458 				 * Hmm, no page. Does a kernel mapping
   462 				 * Hmm, no page. Does a kernel mapping
   459 				 * exist for it?
   463 				 * exist for it?
   460 				 */
   464 				 */
   461 				if (!hat_probe(kas.a_hat, addr)) {
   465 				if (!hat_probe(kas.a_hat, addr)) {
   462 					addr -= PAGESIZE;
   466 					addr -= PAGESIZE;
   463 					while (--pg >= 0) {
   467 					while (--pg >= 0) {
   464 						pp = page_find(&kvp,
   468 						pp = page_find(vp,
   465 						(u_offset_t)(uintptr_t)addr);
   469 						(u_offset_t)(uintptr_t)addr);
   466 						if (pp)
   470 						if (pp)
   467 							page_unlock(pp);
   471 							page_unlock(pp);
   468 						addr -= PAGESIZE;
   472 						addr -= PAGESIZE;
   469 					}
   473 					}
   475 		if (rw == S_OTHER)
   479 		if (rw == S_OTHER)
   476 			hat_reserve(seg->s_as, addr, size);
   480 			hat_reserve(seg->s_as, addr, size);
   477 		return (0);
   481 		return (0);
   478 	case F_SOFTUNLOCK:
   482 	case F_SOFTUNLOCK:
   479 		while (npages--) {
   483 		while (npages--) {
   480 			pp = page_find(&kvp, (u_offset_t)(uintptr_t)addr);
   484 			pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
   481 			if (pp)
   485 			if (pp)
   482 				page_unlock(pp);
   486 				page_unlock(pp);
   483 			addr += PAGESIZE;
   487 			addr += PAGESIZE;
   484 		}
   488 		}
   485 		return (0);
   489 		return (0);
   643 	} else if (seg == &kvseg32) {
   647 	} else if (seg == &kvseg32) {
   644 		vmem_walk(heap32_arena, VMEM_ALLOC | VMEM_REENTRANT,
   648 		vmem_walk(heap32_arena, VMEM_ALLOC | VMEM_REENTRANT,
   645 		    segkmem_dump_range, seg->s_as);
   649 		    segkmem_dump_range, seg->s_as);
   646 		vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
   650 		vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
   647 		    segkmem_dump_range, seg->s_as);
   651 		    segkmem_dump_range, seg->s_as);
       
   652 	} else if (seg == &kzioseg) {
       
   653 		/*
       
   654 		 * We don't want to dump pages attached to kzioseg since they
       
   655 		 * contain file data from ZFS.  If this page's segment is
       
   656 		 * kzioseg return instead of writing it to the dump device.
       
   657 		 */
       
   658 		return;
   648 	} else {
   659 	} else {
   649 		segkmem_dump_range(seg->s_as, seg->s_base, seg->s_size);
   660 		segkmem_dump_range(seg->s_as, seg->s_base, seg->s_size);
   650 	}
   661 	}
   651 }
   662 }
   652 
   663 
   664 {
   675 {
   665 	page_t **pplist, *pp;
   676 	page_t **pplist, *pp;
   666 	pgcnt_t npages;
   677 	pgcnt_t npages;
   667 	spgcnt_t pg;
   678 	spgcnt_t pg;
   668 	size_t nb;
   679 	size_t nb;
       
   680 	struct vnode *vp = seg->s_data;
   669 
   681 
   670 	ASSERT(ppp != NULL);
   682 	ASSERT(ppp != NULL);
   671 
   683 
   672 	if (segkp_bitmap && seg == &kvseg) {
   684 	if (segkp_bitmap && seg == &kvseg) {
   673 		/*
   685 		/*
   704 		*ppp = NULL;
   716 		*ppp = NULL;
   705 		return (ENOTSUP);	/* take the slow path */
   717 		return (ENOTSUP);	/* take the slow path */
   706 	}
   718 	}
   707 
   719 
   708 	for (pg = 0; pg < npages; pg++) {
   720 	for (pg = 0; pg < npages; pg++) {
   709 		pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_SHARED);
   721 		pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_SHARED);
   710 		if (pp == NULL) {
   722 		if (pp == NULL) {
   711 			while (--pg >= 0)
   723 			while (--pg >= 0)
   712 				page_unlock(pplist[pg]);
   724 				page_unlock(pplist[pg]);
   713 			kmem_free(pplist, nb);
   725 			kmem_free(pplist, nb);
   714 			*ppp = NULL;
   726 			*ppp = NULL;
   789 	segkmem_getpolicy,		/* getpolicy */
   801 	segkmem_getpolicy,		/* getpolicy */
   790 	segkmem_capable,		/* capable */
   802 	segkmem_capable,		/* capable */
   791 };
   803 };
   792 
   804 
   793 int
   805 int
   794 segkmem_create(struct seg *seg)
   806 segkmem_zio_create(struct seg *seg)
   795 {
   807 {
   796 	ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
   808 	ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
   797 	seg->s_ops = &segkmem_ops;
   809 	seg->s_ops = &segkmem_ops;
   798 	seg->s_data = NULL;
   810 	seg->s_data = &zvp;
       
   811 	kas.a_size += seg->s_size;
       
   812 	return (0);
       
   813 }
       
   814 
       
   815 int
       
   816 segkmem_create(struct seg *seg)
       
   817 {
       
   818 	ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
       
   819 	seg->s_ops = &segkmem_ops;
       
   820 	seg->s_data = &kvp;
   799 	kas.a_size += seg->s_size;
   821 	kas.a_size += seg->s_size;
   800 	return (0);
   822 	return (0);
   801 }
   823 }
   802 
   824 
   803 /*ARGSUSED*/
   825 /*ARGSUSED*/
   804 page_t *
   826 page_t *
   805 segkmem_page_create(void *addr, size_t size, int vmflag, void *arg)
   827 segkmem_page_create(void *addr, size_t size, int vmflag, void *arg)
   806 {
   828 {
   807 	struct seg kseg;
   829 	struct seg kseg;
   808 	int pgflags;
   830 	int pgflags;
       
   831 	struct vnode *vp = arg;
       
   832 
       
   833 	if (vp == NULL)
       
   834 		vp = &kvp;
   809 
   835 
   810 	kseg.s_as = &kas;
   836 	kseg.s_as = &kas;
   811 	pgflags = PG_EXCL;
   837 	pgflags = PG_EXCL;
   812 
   838 
   813 	if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
   839 	if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
   817 	if (vmflag & VM_PANIC)
   843 	if (vmflag & VM_PANIC)
   818 		pgflags |= PG_PANIC;
   844 		pgflags |= PG_PANIC;
   819 	if (vmflag & VM_PUSHPAGE)
   845 	if (vmflag & VM_PUSHPAGE)
   820 		pgflags |= PG_PUSHPAGE;
   846 		pgflags |= PG_PUSHPAGE;
   821 
   847 
   822 	return (page_create_va(&kvp, (u_offset_t)(uintptr_t)addr, size,
   848 	return (page_create_va(vp, (u_offset_t)(uintptr_t)addr, size,
   823 	    pgflags, &kseg, addr));
   849 	    pgflags, &kseg, addr));
   824 }
   850 }
   825 
   851 
   826 /*
   852 /*
   827  * Allocate pages to back the virtual address range [addr, addr + size).
   853  * Allocate pages to back the virtual address range [addr, addr + size).
   895 	}
   921 	}
   896 
   922 
   897 	return (addr);
   923 	return (addr);
   898 }
   924 }
   899 
   925 
   900 void *
   926 static void *
   901 segkmem_alloc(vmem_t *vmp, size_t size, int vmflag)
   927 segkmem_alloc_vn(vmem_t *vmp, size_t size, int vmflag, struct vnode *vp)
   902 {
   928 {
   903 	void *addr;
   929 	void *addr;
   904 	segkmem_gc_list_t *gcp, **prev_gcpp;
   930 	segkmem_gc_list_t *gcp, **prev_gcpp;
       
   931 
       
   932 	ASSERT(vp != NULL);
   905 
   933 
   906 	if (kvseg.s_base == NULL) {
   934 	if (kvseg.s_base == NULL) {
   907 #ifndef __sparc
   935 #ifndef __sparc
   908 		if (bootops->bsys_alloc == NULL)
   936 		if (bootops->bsys_alloc == NULL)
   909 			halt("Memory allocation between bop_alloc() and "
   937 			halt("Memory allocation between bop_alloc() and "
   926 		if (boot_alloc(addr, size, BO_NO_ALIGN) != addr)
   954 		if (boot_alloc(addr, size, BO_NO_ALIGN) != addr)
   927 			panic("segkmem_alloc: boot_alloc failed");
   955 			panic("segkmem_alloc: boot_alloc failed");
   928 		return (addr);
   956 		return (addr);
   929 	}
   957 	}
   930 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   958 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
   931 	    segkmem_page_create, NULL));
   959 	    segkmem_page_create, vp));
       
   960 }
       
   961 
       
   962 void *
       
   963 segkmem_alloc(vmem_t *vmp, size_t size, int vmflag)
       
   964 {
       
   965 	return (segkmem_alloc_vn(vmp, size, vmflag, &kvp));
       
   966 }
       
   967 
       
   968 void *
       
   969 segkmem_zio_alloc(vmem_t *vmp, size_t size, int vmflag)
       
   970 {
       
   971 	return (segkmem_alloc_vn(vmp, size, vmflag, &zvp));
   932 }
   972 }
   933 
   973 
   934 /*
   974 /*
   935  * Any changes to this routine must also be carried over to
   975  * Any changes to this routine must also be carried over to
   936  * devmap_free_pages() in the seg_dev driver. This is because
   976  * devmap_free_pages() in the seg_dev driver. This is because
   937  * we currently don't have a special kernel segment for non-paged
   977  * we currently don't have a special kernel segment for non-paged
   938  * kernel memory that is exported by drivers to user space.
   978  * kernel memory that is exported by drivers to user space.
   939  */
   979  */
   940 void
   980 static void
   941 segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
   981 segkmem_free_vn(vmem_t *vmp, void *inaddr, size_t size, struct vnode *vp)
   942 {
   982 {
   943 	page_t *pp;
   983 	page_t *pp;
   944 	caddr_t addr = inaddr;
   984 	caddr_t addr = inaddr;
   945 	caddr_t eaddr;
   985 	caddr_t eaddr;
   946 	pgcnt_t npages = btopr(size);
   986 	pgcnt_t npages = btopr(size);
   947 
   987 
   948 	ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
   988 	ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
       
   989 	ASSERT(vp != NULL);
   949 
   990 
   950 	if (kvseg.s_base == NULL) {
   991 	if (kvseg.s_base == NULL) {
   951 		segkmem_gc_list_t *gc = inaddr;
   992 		segkmem_gc_list_t *gc = inaddr;
   952 		gc->gc_arena = vmp;
   993 		gc->gc_arena = vmp;
   953 		gc->gc_size = size;
   994 		gc->gc_size = size;
   958 
   999 
   959 	hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
  1000 	hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
   960 
  1001 
   961 	for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
  1002 	for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
   962 #if defined(__x86)
  1003 #if defined(__x86)
   963 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)addr);
  1004 		pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
   964 		if (pp == NULL)
  1005 		if (pp == NULL)
   965 			panic("segkmem_free: page not found");
  1006 			panic("segkmem_free: page not found");
   966 		if (!page_tryupgrade(pp)) {
  1007 		if (!page_tryupgrade(pp)) {
   967 			/*
  1008 			/*
   968 			 * Some other thread has a sharelock. Wait for
  1009 			 * Some other thread has a sharelock. Wait for
   969 			 * it to drop the lock so we can free this page.
  1010 			 * it to drop the lock so we can free this page.
   970 			 */
  1011 			 */
   971 			page_unlock(pp);
  1012 			page_unlock(pp);
   972 			pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr,
  1013 			pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
   973 			    SE_EXCL);
  1014 			    SE_EXCL);
   974 		}
  1015 		}
   975 #else
  1016 #else
   976 		pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
  1017 		pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
   977 #endif
  1018 #endif
   978 		if (pp == NULL)
  1019 		if (pp == NULL)
   979 			panic("segkmem_free: page not found");
  1020 			panic("segkmem_free: page not found");
   980 		/* Clear p_lckcnt so page_destroy() doesn't update availrmem */
  1021 		/* Clear p_lckcnt so page_destroy() doesn't update availrmem */
   981 		pp->p_lckcnt = 0;
  1022 		pp->p_lckcnt = 0;
   983 	}
  1024 	}
   984 	page_unresv(npages);
  1025 	page_unresv(npages);
   985 
  1026 
   986 	if (vmp != NULL)
  1027 	if (vmp != NULL)
   987 		vmem_free(vmp, inaddr, size);
  1028 		vmem_free(vmp, inaddr, size);
       
  1029 
       
  1030 }
       
  1031 
       
  1032 void
       
  1033 segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
       
  1034 {
       
  1035 	segkmem_free_vn(vmp, inaddr, size, &kvp);
       
  1036 }
       
  1037 
       
  1038 void
       
  1039 segkmem_zio_free(vmem_t *vmp, void *inaddr, size_t size)
       
  1040 {
       
  1041 	segkmem_free_vn(vmp, inaddr, size, &zvp);
   988 }
  1042 }
   989 
  1043 
   990 void
  1044 void
   991 segkmem_gc(void)
  1045 segkmem_gc(void)
   992 {
  1046 {
  1439 
  1493 
  1440 #endif
  1494 #endif
  1441 	return (use_large_pages);
  1495 	return (use_large_pages);
  1442 }
  1496 }
  1443 
  1497 
       
  1498 void
       
  1499 segkmem_zio_init(void *zio_mem_base, size_t zio_mem_size)
       
  1500 {
       
  1501 	ASSERT(zio_mem_base != NULL);
       
  1502 	ASSERT(zio_mem_size != 0);
       
  1503 
       
  1504 	zio_arena = vmem_create("zio", zio_mem_base, zio_mem_size, PAGESIZE,
       
  1505 	    NULL, NULL, NULL, 0, VM_SLEEP);
       
  1506 
       
  1507 	zio_alloc_arena = vmem_create("zio_buf", NULL, 0, PAGESIZE,
       
  1508 	    segkmem_zio_alloc, segkmem_zio_free, zio_arena, 0, VM_SLEEP);
       
  1509 
       
  1510 	ASSERT(zio_arena != NULL);
       
  1511 	ASSERT(zio_alloc_arena != NULL);
       
  1512 }
       
  1513 
  1444 #ifdef __sparc
  1514 #ifdef __sparc
  1445 
  1515 
  1446 
  1516 
  1447 static void *
  1517 static void *
  1448 segkmem_alloc_ppa(vmem_t *vmp, size_t size, int vmflag)
  1518 segkmem_alloc_ppa(vmem_t *vmp, size_t size, int vmflag)