usr/src/uts/i86pc/os/startup.c
changeset 0 68f95e015346
child 423 6cbc492798ce
equal deleted inserted replaced
-1:000000000000 0:68f95e015346
       
     1 /*
       
     2  * CDDL HEADER START
       
     3  *
       
     4  * The contents of this file are subject to the terms of the
       
     5  * Common Development and Distribution License, Version 1.0 only
       
     6  * (the "License").  You may not use this file except in compliance
       
     7  * with the License.
       
     8  *
       
     9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
       
    10  * or http://www.opensolaris.org/os/licensing.
       
    11  * See the License for the specific language governing permissions
       
    12  * and limitations under the License.
       
    13  *
       
    14  * When distributing Covered Code, include this CDDL HEADER in each
       
    15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
       
    16  * If applicable, add the following below this CDDL HEADER, with the
       
    17  * fields enclosed by brackets "[]" replaced with your own identifying
       
    18  * information: Portions Copyright [yyyy] [name of copyright owner]
       
    19  *
       
    20  * CDDL HEADER END
       
    21  */
       
    22 /*
       
    23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
       
    24  * Use is subject to license terms.
       
    25  */
       
    26 
       
    27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
       
    28 
       
    29 #include <sys/types.h>
       
    30 #include <sys/t_lock.h>
       
    31 #include <sys/param.h>
       
    32 #include <sys/sysmacros.h>
       
    33 #include <sys/signal.h>
       
    34 #include <sys/systm.h>
       
    35 #include <sys/user.h>
       
    36 #include <sys/mman.h>
       
    37 #include <sys/vm.h>
       
    38 #include <sys/conf.h>
       
    39 #include <sys/avintr.h>
       
    40 #include <sys/autoconf.h>
       
    41 #include <sys/disp.h>
       
    42 #include <sys/class.h>
       
    43 #include <sys/bitmap.h>
       
    44 
       
    45 #include <sys/privregs.h>
       
    46 
       
    47 #include <sys/proc.h>
       
    48 #include <sys/buf.h>
       
    49 #include <sys/kmem.h>
       
    50 #include <sys/kstat.h>
       
    51 
       
    52 #include <sys/reboot.h>
       
    53 #include <sys/uadmin.h>
       
    54 
       
    55 #include <sys/cred.h>
       
    56 #include <sys/vnode.h>
       
    57 #include <sys/file.h>
       
    58 
       
    59 #include <sys/procfs.h>
       
    60 #include <sys/acct.h>
       
    61 
       
    62 #include <sys/vfs.h>
       
    63 #include <sys/dnlc.h>
       
    64 #include <sys/var.h>
       
    65 #include <sys/cmn_err.h>
       
    66 #include <sys/utsname.h>
       
    67 #include <sys/debug.h>
       
    68 #include <sys/kdi.h>
       
    69 
       
    70 #include <sys/dumphdr.h>
       
    71 #include <sys/bootconf.h>
       
    72 #include <sys/varargs.h>
       
    73 #include <sys/promif.h>
       
    74 #include <sys/prom_emul.h>	/* for create_prom_prop */
       
    75 #include <sys/modctl.h>		/* for "procfs" hack */
       
    76 
       
    77 #include <sys/consdev.h>
       
    78 #include <sys/frame.h>
       
    79 
       
    80 #include <sys/sunddi.h>
       
    81 #include <sys/sunndi.h>
       
    82 #include <sys/ndi_impldefs.h>
       
    83 #include <sys/ddidmareq.h>
       
    84 #include <sys/psw.h>
       
    85 #include <sys/regset.h>
       
    86 #include <sys/clock.h>
       
    87 #include <sys/pte.h>
       
    88 #include <sys/mmu.h>
       
    89 #include <sys/tss.h>
       
    90 #include <sys/stack.h>
       
    91 #include <sys/trap.h>
       
    92 #include <sys/pic.h>
       
    93 #include <sys/fp.h>
       
    94 #include <vm/anon.h>
       
    95 #include <vm/as.h>
       
    96 #include <vm/page.h>
       
    97 #include <vm/seg.h>
       
    98 #include <vm/seg_dev.h>
       
    99 #include <vm/seg_kmem.h>
       
   100 #include <vm/seg_kpm.h>
       
   101 #include <vm/seg_map.h>
       
   102 #include <vm/seg_vn.h>
       
   103 #include <vm/seg_kp.h>
       
   104 #include <sys/memnode.h>
       
   105 #include <vm/vm_dep.h>
       
   106 #include <sys/swap.h>
       
   107 #include <sys/thread.h>
       
   108 #include <sys/sysconf.h>
       
   109 #include <sys/vm_machparam.h>
       
   110 #include <sys/archsystm.h>
       
   111 #include <sys/machsystm.h>
       
   112 #include <vm/hat.h>
       
   113 #include <vm/hat_i86.h>
       
   114 #include <sys/pmem.h>
       
   115 #include <sys/instance.h>
       
   116 #include <sys/smp_impldefs.h>
       
   117 #include <sys/x86_archext.h>
       
   118 #include <sys/segments.h>
       
   119 #include <sys/clconf.h>
       
   120 #include <sys/kobj.h>
       
   121 #include <sys/kobj_lex.h>
       
   122 #include <sys/prom_emul.h>
       
   123 #include <sys/cpc_impl.h>
       
   124 #include <sys/chip.h>
       
   125 #include <sys/x86_archext.h>
       
   126 
       
   127 extern void debug_enter(char *);
       
   128 extern void progressbar_init(void);
       
   129 extern void progressbar_start(void);
       
   130 
       
   131 /*
       
   132  * XXX make declaration below "static" when drivers no longer use this
       
   133  * interface.
       
   134  */
       
   135 extern caddr_t p0_va;	/* Virtual address for accessing physical page 0 */
       
   136 
       
   137 /*
       
   138  * segkp
       
   139  */
       
   140 extern int segkp_fromheap;
       
   141 
       
   142 static void kvm_init(void);
       
   143 static void startup_init(void);
       
   144 static void startup_memlist(void);
       
   145 static void startup_modules(void);
       
   146 static void startup_bop_gone(void);
       
   147 static void startup_vm(void);
       
   148 static void startup_end(void);
       
   149 
       
   150 /*
       
   151  * Declare these as initialized data so we can patch them.
       
   152  */
       
   153 pgcnt_t physmem = 0;	/* memory size in pages, patch if you want less */
       
   154 pgcnt_t obp_pages;	/* Memory used by PROM for its text and data */
       
   155 
       
   156 char *kobj_file_buf;
       
   157 int kobj_file_bufsize;	/* set in /etc/system */
       
   158 
       
   159 /* Global variables for MP support. Used in mp_startup */
       
   160 caddr_t	rm_platter_va;
       
   161 uint32_t rm_platter_pa;
       
   162 
       
   163 /*
       
   164  * Some CPUs have holes in the middle of the 64-bit virtual address range.
       
   165  */
       
   166 uintptr_t hole_start, hole_end;
       
   167 
       
   168 /*
       
   169  * kpm mapping window
       
   170  */
       
   171 caddr_t kpm_vbase;
       
   172 size_t  kpm_size;
       
   173 static int kpm_desired = 0;		/* Do we want to try to use segkpm? */
       
   174 
       
   175 /*
       
   176  * VA range that must be preserved for boot until we release all of its
       
   177  * mappings.
       
   178  */
       
   179 #if defined(__amd64)
       
   180 static void *kmem_setaside;
       
   181 #endif
       
   182 
       
   183 /*
       
   184  * Configuration parameters set at boot time.
       
   185  */
       
   186 
       
   187 caddr_t econtig;		/* end of first block of contiguous kernel */
       
   188 
       
   189 struct bootops		*bootops = 0;	/* passed in from boot */
       
   190 struct bootops		**bootopsp;
       
   191 struct boot_syscalls	*sysp;		/* passed in from boot */
       
   192 
       
   193 char bootblock_fstype[16];
       
   194 
       
   195 char kern_bootargs[OBP_MAXPATHLEN];
       
   196 
       
   197 /*
       
   198  * new memory fragmentations are possible in startup() due to BOP_ALLOCs. this
       
   199  * depends on number of BOP_ALLOC calls made and requested size, memory size
       
   200  * combination and whether boot.bin memory needs to be freed.
       
   201  */
       
   202 #define	POSS_NEW_FRAGMENTS	12
       
   203 
       
   204 /*
       
   205  * VM data structures
       
   206  */
       
   207 long page_hashsz;		/* Size of page hash table (power of two) */
       
   208 struct page *pp_base;		/* Base of initial system page struct array */
       
   209 struct page **page_hash;	/* Page hash table */
       
   210 struct seg ktextseg;		/* Segment used for kernel executable image */
       
   211 struct seg kvalloc;		/* Segment used for "valloc" mapping */
       
   212 struct seg kpseg;		/* Segment used for pageable kernel virt mem */
       
   213 struct seg kmapseg;		/* Segment used for generic kernel mappings */
       
   214 struct seg kdebugseg;		/* Segment used for the kernel debugger */
       
   215 
       
   216 struct seg *segkmap = &kmapseg;	/* Kernel generic mapping segment */
       
   217 struct seg *segkp = &kpseg;	/* Pageable kernel virtual memory segment */
       
   218 
       
   219 #if defined(__amd64)
       
   220 struct seg kvseg_core;		/* Segment used for the core heap */
       
   221 struct seg kpmseg;		/* Segment used for physical mapping */
       
   222 struct seg *segkpm = &kpmseg;	/* 64bit kernel physical mapping segment */
       
   223 #else
       
   224 struct seg *segkpm = NULL;	/* Unused on IA32 */
       
   225 #endif
       
   226 
       
   227 caddr_t segkp_base;		/* Base address of segkp */
       
   228 #if defined(__amd64)
       
   229 pgcnt_t segkpsize = btop(SEGKPDEFSIZE);	/* size of segkp segment in pages */
       
   230 #else
       
   231 pgcnt_t segkpsize = 0;
       
   232 #endif
       
   233 
       
   234 struct memseg *memseg_base;
       
   235 struct vnode unused_pages_vp;
       
   236 
       
   237 #define	FOURGB	0x100000000LL
       
   238 
       
   239 struct memlist *memlist;
       
   240 
       
   241 caddr_t s_text;		/* start of kernel text segment */
       
   242 caddr_t e_text;		/* end of kernel text segment */
       
   243 caddr_t s_data;		/* start of kernel data segment */
       
   244 caddr_t e_data;		/* end of kernel data segment */
       
   245 caddr_t modtext;	/* start of loadable module text reserved */
       
   246 caddr_t e_modtext;	/* end of loadable module text reserved */
       
   247 caddr_t moddata;	/* start of loadable module data reserved */
       
   248 caddr_t e_moddata;	/* end of loadable module data reserved */
       
   249 
       
   250 struct memlist *phys_install;	/* Total installed physical memory */
       
   251 struct memlist *phys_avail;	/* Total available physical memory */
       
   252 
       
   253 static void memlist_add(uint64_t, uint64_t, struct memlist *,
       
   254 	struct memlist **);
       
   255 
       
   256 /*
       
   257  * kphysm_init returns the number of pages that were processed
       
   258  */
       
   259 static pgcnt_t kphysm_init(page_t *, struct memseg *, pgcnt_t, pgcnt_t);
       
   260 
       
   261 #define	IO_PROP_SIZE	64	/* device property size */
       
   262 
       
   263 /*
       
   264  * a couple useful roundup macros
       
   265  */
       
   266 #define	ROUND_UP_PAGE(x)	\
       
   267 	((uintptr_t)P2ROUNDUP((uintptr_t)(x), (uintptr_t)MMU_PAGESIZE))
       
   268 #define	ROUND_UP_LPAGE(x)	\
       
   269 	((uintptr_t)P2ROUNDUP((uintptr_t)(x), mmu.level_size[1]))
       
   270 #define	ROUND_UP_4MEG(x)	\
       
   271 	((uintptr_t)P2ROUNDUP((uintptr_t)(x), (uintptr_t)FOURMB_PAGESIZE))
       
   272 #define	ROUND_UP_TOPLEVEL(x)	\
       
   273 	((uintptr_t)P2ROUNDUP((uintptr_t)(x), mmu.level_size[mmu.max_level]))
       
   274 
       
   275 /*
       
   276  *	32-bit Kernel's Virtual memory layout.
       
   277  *		+-----------------------+
       
   278  *		|	psm 1-1 map	|
       
   279  *		|	exec args area	|
       
   280  * 0xFFC00000  -|-----------------------|- ARGSBASE
       
   281  *		|	debugger	|
       
   282  * 0xFF800000  -|-----------------------|- SEGDEBUGBASE
       
   283  *		|      Kernel Data	|
       
   284  * 0xFEC00000  -|-----------------------|
       
   285  *              |      Kernel Text	|
       
   286  * 0xFE800000  -|-----------------------|- KERNEL_TEXT
       
   287  * 		|     LUFS sinkhole	|
       
   288  * 0xFE000000  -|-----------------------|- lufs_addr
       
   289  * ---         -|-----------------------|- valloc_base + valloc_sz
       
   290  * 		|   early pp structures	|
       
   291  * 		|   memsegs, memlists, 	|
       
   292  * 		|   page hash, etc.	|
       
   293  * ---	       -|-----------------------|- valloc_base (floating)
       
   294  * 		|     ptable_va    	|
       
   295  * 0xFDFFE000  -|-----------------------|- ekernelheap, ptable_va
       
   296  *		|			|  (segkp is an arena under the heap)
       
   297  *		|			|
       
   298  *		|	kvseg		|
       
   299  *		|			|
       
   300  *		|			|
       
   301  * ---         -|-----------------------|- kernelheap (floating)
       
   302  * 		|        Segkmap	|
       
   303  * 0xC3002000  -|-----------------------|- segkmap_start (floating)
       
   304  *		|	Red Zone	|
       
   305  * 0xC3000000  -|-----------------------|- kernelbase / userlimit (floating)
       
   306  *		|			|			||
       
   307  *		|     Shared objects	|			\/
       
   308  *		|			|
       
   309  *		:			:
       
   310  *		|	user data	|
       
   311  *		|-----------------------|
       
   312  *		|	user text	|
       
   313  * 0x08048000  -|-----------------------|
       
   314  *		|	user stack	|
       
   315  *		:			:
       
   316  *		|	invalid		|
       
   317  * 0x00000000	+-----------------------+
       
   318  *
       
   319  *
       
   320  *		64-bit Kernel's Virtual memory layout. (assuming 64 bit app)
       
   321  *			+-----------------------+
       
   322  *			|	psm 1-1 map	|
       
   323  *			|	exec args area	|
       
   324  * 0xFFFFFFFF.FFC00000  |-----------------------|- ARGSBASE
       
   325  *			|	debugger (?)	|
       
   326  * 0xFFFFFFFF.FF800000  |-----------------------|- SEGDEBUGBASE
       
   327  *			|      unused    	|
       
   328  *			+-----------------------+
       
   329  *			|      Kernel Data	|
       
   330  * 0xFFFFFFFF.FBC00000  |-----------------------|
       
   331  *			|      Kernel Text	|
       
   332  * 0xFFFFFFFF.FB800000  |-----------------------|- KERNEL_TEXT
       
   333  * 			|     LUFS sinkhole	|
       
   334  * 0xFFFFFFFF.FB000000 -|-----------------------|- lufs_addr
       
   335  * ---                  |-----------------------|- valloc_base + valloc_sz
       
   336  * 			|   early pp structures	|
       
   337  * 			|   memsegs, memlists, 	|
       
   338  * 			|   page hash, etc.	|
       
   339  * ---                  |-----------------------|- valloc_base
       
   340  * 			|     ptable_va    	|
       
   341  * ---                  |-----------------------|- ptable_va
       
   342  * 			|      Core heap	| (used for loadable modules)
       
   343  * 0xFFFFFFFF.C0000000  |-----------------------|- core_base / ekernelheap
       
   344  *			|	 Kernel		|
       
   345  *			|	  heap		|
       
   346  * 0xFFFFFXXX.XXX00000  |-----------------------|- kernelheap (floating)
       
   347  *			|	 segkmap	|
       
   348  * 0xFFFFFXXX.XXX00000  |-----------------------|- segkmap_start (floating)
       
   349  *			|    device mappings	|
       
   350  * 0xFFFFFXXX.XXX00000  |-----------------------|- toxic_addr (floating)
       
   351  *			|	  segkp		|
       
   352  * ---                  |-----------------------|- segkp_base
       
   353  *			|	 segkpm		|
       
   354  * 0xFFFFFE00.00000000  |-----------------------|
       
   355  *			|	Red Zone	|
       
   356  * 0xFFFFFD80.00000000  |-----------------------|- KERNELBASE
       
   357  *			|     User stack	|- User space memory
       
   358  * 			|			|
       
   359  * 			| shared objects, etc	|	(grows downwards)
       
   360  *			:			:
       
   361  * 			|			|
       
   362  * 0xFFFF8000.00000000  |-----------------------|
       
   363  * 			|			|
       
   364  * 			| VA Hole / unused	|
       
   365  * 			|			|
       
   366  * 0x00008000.00000000  |-----------------------|
       
   367  *			|			|
       
   368  *			|			|
       
   369  *			:			:
       
   370  *			|	user heap	|	(grows upwards)
       
   371  *			|			|
       
   372  *			|	user data	|
       
   373  *			|-----------------------|
       
   374  *			|	user text	|
       
   375  * 0x00000000.04000000  |-----------------------|
       
   376  *			|	invalid		|
       
   377  * 0x00000000.00000000	+-----------------------+
       
   378  *
       
   379  * A 32 bit app on the 64 bit kernel sees the same layout as on the 32 bit
       
   380  * kernel, except that userlimit is raised to 0xfe000000
       
   381  *
       
   382  * Floating values:
       
   383  *
       
   384  * valloc_base: start of the kernel's memory management/tracking data
       
   385  * structures.  This region contains page_t structures for the lowest 4GB
       
   386  * of physical memory, memsegs, memlists, and the page hash.
       
   387  *
       
   388  * core_base: start of the kernel's "core" heap area on 64-bit systems.
       
   389  * This area is intended to be used for global data as well as for module
       
   390  * text/data that does not fit into the nucleus pages.  The core heap is
       
   391  * restricted to a 2GB range, allowing every address within it to be
       
   392  * accessed using rip-relative addressing
       
   393  *
       
   394  * ekernelheap: end of kernelheap and start of segmap.
       
   395  *
       
   396  * kernelheap: start of kernel heap.  On 32-bit systems, this starts right
       
   397  * above a red zone that separates the user's address space from the
       
   398  * kernel's.  On 64-bit systems, it sits above segkp and segkpm.
       
   399  *
       
   400  * segkmap_start: start of segmap. The length of segmap can be modified
       
   401  * by changing segmapsize in /etc/system (preferred) or eeprom (deprecated).
       
   402  * The default length is 16MB on 32-bit systems and 64MB on 64-bit systems.
       
   403  *
       
   404  * kernelbase: On a 32-bit kernel the default value of 0xd4000000 will be
       
   405  * decreased by 2X the size required for page_t.  This allows the kernel
       
   406  * heap to grow in size with physical memory.  With sizeof(page_t) == 80
       
   407  * bytes, the following shows the values of kernelbase and kernel heap
       
   408  * sizes for different memory configurations (assuming default segmap and
       
   409  * segkp sizes).
       
   410  *
       
   411  *	mem	size for	kernelbase	kernel heap
       
   412  *	size	page_t's			size
       
   413  *	----	---------	----------	-----------
       
   414  *	1gb	0x01400000	0xd1800000	684MB
       
   415  *	2gb	0x02800000	0xcf000000	704MB
       
   416  *	4gb	0x05000000	0xca000000	744MB
       
   417  *	6gb	0x07800000	0xc5000000	784MB
       
   418  *	8gb	0x0a000000	0xc0000000	824MB
       
   419  *	16gb	0x14000000	0xac000000	984MB
       
   420  *	32gb	0x28000000	0x84000000	1304MB
       
   421  *	64gb	0x50000000	0x34000000	1944MB (*)
       
   422  *
       
   423  * kernelbase is less than the abi minimum of 0xc0000000 for memory
       
   424  * configurations above 8gb.
       
   425  *
       
   426  * (*) support for memory configurations above 32gb will require manual tuning
       
   427  * of kernelbase to balance out the need of user applications.
       
   428  */
       
   429 
       
   430 void init_intr_threads(struct cpu *);
       
   431 
       
   432 /*
       
   433  * Dummy spl priority masks
       
   434  */
       
   435 static unsigned char	dummy_cpu_pri[MAXIPL + 1] = {
       
   436 	0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
       
   437 	0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf
       
   438 };
       
   439 
       
   440 /* real-time-clock initialization parameters */
       
   441 long gmt_lag;		/* offset in seconds of gmt to local time */
       
   442 extern long process_rtc_config_file(void);
       
   443 
       
   444 char		*final_kernelheap;
       
   445 char		*boot_kernelheap;
       
   446 uintptr_t	kernelbase;
       
   447 uintptr_t	eprom_kernelbase;
       
   448 size_t		segmapsize;
       
   449 static uintptr_t segmap_reserved;
       
   450 uintptr_t	segkmap_start;
       
   451 int		segmapfreelists;
       
   452 pgcnt_t		boot_npages;
       
   453 pgcnt_t		npages;
       
   454 size_t		core_size;		/* size of "core" heap */
       
   455 uintptr_t	core_base;		/* base address of "core" heap */
       
   456 
       
   457 /*
       
   458  * List of bootstrap pages. We mark these as allocated in startup.
       
   459  * release_bootstrap() will free them when we're completely done with
       
   460  * the bootstrap.
       
   461  */
       
   462 static page_t *bootpages, *rd_pages;
       
   463 
       
   464 struct system_hardware system_hardware;
       
   465 
       
   466 /*
       
   467  * Enable some debugging messages concerning memory usage...
       
   468  *
       
   469  * XX64 There should only be one print routine once memlist usage between
       
   470  * vmx and the kernel is cleaned up and there is a single memlist structure
       
   471  * shared between kernel and boot.
       
   472  */
       
   473 static void
       
   474 print_boot_memlist(char *title, struct memlist *mp)
       
   475 {
       
   476 	prom_printf("MEMLIST: %s:\n", title);
       
   477 	while (mp != NULL)  {
       
   478 		prom_printf("\tAddress 0x%" PRIx64 ", size 0x%" PRIx64 "\n",
       
   479 		    mp->address, mp->size);
       
   480 		mp = mp->next;
       
   481 	}
       
   482 }
       
   483 
       
   484 static void
       
   485 print_kernel_memlist(char *title, struct memlist *mp)
       
   486 {
       
   487 	prom_printf("MEMLIST: %s:\n", title);
       
   488 	while (mp != NULL)  {
       
   489 		prom_printf("\tAddress 0x%" PRIx64 ", size 0x%" PRIx64 "\n",
       
   490 		    mp->address, mp->size);
       
   491 		mp = mp->next;
       
   492 	}
       
   493 }
       
   494 
       
   495 /*
       
   496  * XX64 need a comment here.. are these just default values, surely
       
   497  * we read the "cpuid" type information to figure this out.
       
   498  */
       
   499 int	l2cache_sz = 0x80000;
       
   500 int	l2cache_linesz = 0x40;
       
   501 int	l2cache_assoc = 1;
       
   502 
       
   503 /*
       
   504  * on 64 bit we use a predifined VA range for mapping devices in the kernel
       
   505  * on 32 bit the mappings are intermixed in the heap, so we use a bit map
       
   506  */
       
   507 #ifdef __amd64
       
   508 
       
   509 vmem_t		*device_arena;
       
   510 uintptr_t	toxic_addr = (uintptr_t)NULL;
       
   511 size_t		toxic_size = 1 * 1024 * 1024 * 1024; /* Sparc uses 1 gig too */
       
   512 
       
   513 #else	/* __i386 */
       
   514 
       
   515 ulong_t		*toxic_bit_map;	/* one bit for each 4k of VA in heap_arena */
       
   516 size_t		toxic_bit_map_len = 0;	/* in bits */
       
   517 
       
   518 #endif	/* __i386 */
       
   519 
       
   520 /*
       
   521  * Simple boot time debug facilities
       
   522  */
       
   523 static char *prm_dbg_str[] = {
       
   524 	"%s:%d: '%s' is 0x%x\n",
       
   525 	"%s:%d: '%s' is 0x%llx\n"
       
   526 };
       
   527 
       
   528 int prom_debug;
       
   529 
       
   530 #define	PRM_DEBUG(q)	if (prom_debug) 	\
       
   531 	prom_printf(prm_dbg_str[sizeof (q) >> 3], "startup.c", __LINE__, #q, q);
       
   532 #define	PRM_POINT(q)	if (prom_debug) 	\
       
   533 	prom_printf("%s:%d: %s\n", "startup.c", __LINE__, q);
       
   534 
       
   535 /*
       
   536  * This structure is used to keep track of the intial allocations
       
   537  * done in startup_memlist(). The value of NUM_ALLOCATIONS needs to
       
   538  * be >= the number of ADD_TO_ALLOCATIONS() executed in the code.
       
   539  */
       
   540 #define	NUM_ALLOCATIONS 7
       
   541 int num_allocations = 0;
       
   542 struct {
       
   543 	void **al_ptr;
       
   544 	size_t al_size;
       
   545 } allocations[NUM_ALLOCATIONS];
       
   546 size_t valloc_sz = 0;
       
   547 uintptr_t valloc_base;
       
   548 extern uintptr_t ptable_va;
       
   549 extern size_t ptable_sz;
       
   550 
       
   551 #define	ADD_TO_ALLOCATIONS(ptr, size) {					\
       
   552 		size = ROUND_UP_PAGE(size);		 		\
       
   553 		if (num_allocations == NUM_ALLOCATIONS)			\
       
   554 			panic("too many ADD_TO_ALLOCATIONS()");		\
       
   555 		allocations[num_allocations].al_ptr = (void**)&ptr;	\
       
   556 		allocations[num_allocations].al_size = size;		\
       
   557 		valloc_sz += size;					\
       
   558 		++num_allocations;				 	\
       
   559 	}
       
   560 
       
   561 static void
       
   562 perform_allocations(void)
       
   563 {
       
   564 	caddr_t mem;
       
   565 	int i;
       
   566 
       
   567 	mem = BOP_ALLOC(bootops, (caddr_t)valloc_base, valloc_sz, BO_NO_ALIGN);
       
   568 	if (mem != (caddr_t)valloc_base)
       
   569 		panic("BOP_ALLOC() failed");
       
   570 	bzero(mem, valloc_sz);
       
   571 	for (i = 0; i < num_allocations; ++i) {
       
   572 		*allocations[i].al_ptr = (void *)mem;
       
   573 		mem += allocations[i].al_size;
       
   574 	}
       
   575 }
       
   576 
       
   577 /*
       
   578  * Our world looks like this at startup time.
       
   579  *
       
   580  * In a 32-bit OS, boot loads the kernel text at 0xfe800000 and kernel data
       
   581  * at 0xfec00000.  On a 64-bit OS, kernel text and data are loaded at
       
   582  * 0xffffffff.fe800000 and 0xffffffff.fec00000 respectively.  Those
       
   583  * addresses are fixed in the binary at link time.
       
   584  *
       
   585  * On the text page:
       
   586  * unix/genunix/krtld/module text loads.
       
   587  *
       
   588  * On the data page:
       
   589  * unix/genunix/krtld/module data loads and space for page_t's.
       
   590  */
       
   591 /*
       
   592  * Machine-dependent startup code
       
   593  */
       
   594 void
       
   595 startup(void)
       
   596 {
       
   597 	extern void startup_bios_disk();
       
   598 	/*
       
   599 	 * Make sure that nobody tries to use sekpm until we have
       
   600 	 * initialized it properly.
       
   601 	 */
       
   602 #if defined(__amd64)
       
   603 	kpm_desired = kpm_enable;
       
   604 #endif
       
   605 	kpm_enable = 0;
       
   606 
       
   607 	progressbar_init();
       
   608 	startup_init();
       
   609 	startup_memlist();
       
   610 	startup_modules();
       
   611 	startup_bios_disk();
       
   612 	startup_bop_gone();
       
   613 	startup_vm();
       
   614 	startup_end();
       
   615 	progressbar_start();
       
   616 }
       
   617 
       
   618 static void
       
   619 startup_init()
       
   620 {
       
   621 	PRM_POINT("startup_init() starting...");
       
   622 
       
   623 	/*
       
   624 	 * Complete the extraction of cpuid data
       
   625 	 */
       
   626 	cpuid_pass2(CPU);
       
   627 
       
   628 	(void) check_boot_version(BOP_GETVERSION(bootops));
       
   629 
       
   630 	/*
       
   631 	 * Check for prom_debug in boot environment
       
   632 	 */
       
   633 	if (BOP_GETPROPLEN(bootops, "prom_debug") >= 0) {
       
   634 		++prom_debug;
       
   635 		PRM_POINT("prom_debug found in boot enviroment");
       
   636 	}
       
   637 
       
   638 	/*
       
   639 	 * Collect node, cpu and memory configuration information.
       
   640 	 */
       
   641 	get_system_configuration();
       
   642 
       
   643 	/*
       
   644 	 * Halt if this is an unsupported processor.
       
   645 	 */
       
   646 	if (x86_type == X86_TYPE_486 || x86_type == X86_TYPE_CYRIX_486) {
       
   647 		printf("\n486 processor (\"%s\") detected.\n",
       
   648 		    CPU->cpu_brandstr);
       
   649 		halt("This processor is not supported by this release "
       
   650 		    "of Solaris.");
       
   651 	}
       
   652 
       
   653 	/*
       
   654 	 * Set up dummy values till psm spl code installed
       
   655 	 */
       
   656 	CPU->cpu_pri_data = dummy_cpu_pri;
       
   657 
       
   658 	PRM_POINT("startup_init() done");
       
   659 }
       
   660 
       
   661 /*
       
   662  * Callback for copy_memlist_filter() to filter nucleus, kadb/kmdb, (ie.
       
   663  * everything mapped above KERNEL_TEXT) pages from phys_avail. Note it
       
   664  * also filters out physical page zero.  There is some reliance on the
       
   665  * boot loader allocating only a few contiguous physical memory chunks.
       
   666  */
       
   667 static void
       
   668 avail_filter(uint64_t *addr, uint64_t *size)
       
   669 {
       
   670 	uintptr_t va;
       
   671 	uintptr_t next_va;
       
   672 	pfn_t pfn;
       
   673 	uint64_t pfn_addr;
       
   674 	uint64_t pfn_eaddr;
       
   675 	uint_t prot;
       
   676 	size_t len;
       
   677 	uint_t change;
       
   678 
       
   679 	if (prom_debug)
       
   680 		prom_printf("\tFilter: in: a=%" PRIx64 ", s=%" PRIx64 "\n",
       
   681 		    *addr, *size);
       
   682 
       
   683 	/*
       
   684 	 * page zero is required for BIOS.. never make it available
       
   685 	 */
       
   686 	if (*addr == 0) {
       
   687 		*addr += MMU_PAGESIZE;
       
   688 		*size -= MMU_PAGESIZE;
       
   689 	}
       
   690 
       
   691 	/*
       
   692 	 * First we trim from the front of the range. Since hat_boot_probe()
       
   693 	 * walks ranges in virtual order, but addr/size are physical, we need
       
   694 	 * to the list until no changes are seen.  This deals with the case
       
   695 	 * where page "p" is mapped at v, page "p + PAGESIZE" is mapped at w
       
   696 	 * but w < v.
       
   697 	 */
       
   698 	do {
       
   699 		change = 0;
       
   700 		for (va = KERNEL_TEXT;
       
   701 		    *size > 0 && hat_boot_probe(&va, &len, &pfn, &prot) != 0;
       
   702 		    va = next_va) {
       
   703 
       
   704 			next_va = va + len;
       
   705 			pfn_addr = ptob((uint64_t)pfn);
       
   706 			pfn_eaddr = pfn_addr + len;
       
   707 
       
   708 			if (pfn_addr <= *addr && pfn_eaddr > *addr) {
       
   709 				change = 1;
       
   710 				while (*size > 0 && len > 0) {
       
   711 					*addr += MMU_PAGESIZE;
       
   712 					*size -= MMU_PAGESIZE;
       
   713 					len -= MMU_PAGESIZE;
       
   714 				}
       
   715 			}
       
   716 		}
       
   717 		if (change && prom_debug)
       
   718 			prom_printf("\t\ttrim: a=%" PRIx64 ", s=%" PRIx64 "\n",
       
   719 			    *addr, *size);
       
   720 	} while (change);
       
   721 
       
   722 	/*
       
   723 	 * Trim pages from the end of the range.
       
   724 	 */
       
   725 	for (va = KERNEL_TEXT;
       
   726 	    *size > 0 && hat_boot_probe(&va, &len, &pfn, &prot) != 0;
       
   727 	    va = next_va) {
       
   728 
       
   729 		next_va = va + len;
       
   730 		pfn_addr = ptob((uint64_t)pfn);
       
   731 
       
   732 		if (pfn_addr >= *addr && pfn_addr < *addr + *size)
       
   733 			*size = pfn_addr - *addr;
       
   734 	}
       
   735 
       
   736 	if (prom_debug)
       
   737 		prom_printf("\tFilter out: a=%" PRIx64 ", s=%" PRIx64 "\n",
       
   738 		    *addr, *size);
       
   739 }
       
   740 
       
   741 static void
       
   742 kpm_init()
       
   743 {
       
   744 	struct segkpm_crargs b;
       
   745 	uintptr_t start, end;
       
   746 	struct memlist	*pmem;
       
   747 
       
   748 	/*
       
   749 	 * These variables were all designed for sfmmu in which segkpm is
       
   750 	 * mapped using a single pagesize - either 8KB or 4MB.  On x86, we
       
   751 	 * might use 2+ page sizes on a single machine, so none of these
       
   752 	 * variables have a single correct value.  They are set up as if we
       
   753 	 * always use a 4KB pagesize, which should do no harm.  In the long
       
   754 	 * run, we should get rid of KPM's assumption that only a single
       
   755 	 * pagesize is used.
       
   756 	 */
       
   757 	kpm_pgshft = MMU_PAGESHIFT;
       
   758 	kpm_pgsz =  MMU_PAGESIZE;
       
   759 	kpm_pgoff = MMU_PAGEOFFSET;
       
   760 	kpmp2pshft = 0;
       
   761 	kpmpnpgs = 1;
       
   762 	ASSERT(((uintptr_t)kpm_vbase & (kpm_pgsz - 1)) == 0);
       
   763 
       
   764 	PRM_POINT("about to create segkpm");
       
   765 	rw_enter(&kas.a_lock, RW_WRITER);
       
   766 
       
   767 	if (seg_attach(&kas, kpm_vbase, kpm_size, segkpm) < 0)
       
   768 		panic("cannot attach segkpm");
       
   769 
       
   770 	b.prot = PROT_READ | PROT_WRITE;
       
   771 	b.nvcolors = 1;
       
   772 
       
   773 	if (segkpm_create(segkpm, (caddr_t)&b) != 0)
       
   774 		panic("segkpm_create segkpm");
       
   775 
       
   776 	rw_exit(&kas.a_lock);
       
   777 
       
   778 	/*
       
   779 	 * Map each of the memsegs into the kpm segment, coalesing adjacent
       
   780 	 * memsegs to allow mapping with the largest possible pages.
       
   781 	 */
       
   782 	pmem = phys_install;
       
   783 	start = pmem->address;
       
   784 	end = start + pmem->size;
       
   785 	for (;;) {
       
   786 		if (pmem == NULL || pmem->address > end) {
       
   787 			hat_devload(kas.a_hat, kpm_vbase + start,
       
   788 			    end - start, mmu_btop(start),
       
   789 			    PROT_READ | PROT_WRITE,
       
   790 			    HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
       
   791 			if (pmem == NULL)
       
   792 				break;
       
   793 			start = pmem->address;
       
   794 		}
       
   795 		end = pmem->address + pmem->size;
       
   796 		pmem = pmem->next;
       
   797 	}
       
   798 }
       
   799 
       
   800 /*
       
   801  * The purpose of startup memlist is to get the system to the
       
   802  * point where it can use kmem_alloc()'s that operate correctly
       
   803  * relying on BOP_ALLOC(). This includes allocating page_ts,
       
   804  * page hash table, vmem initialized, etc.
       
   805  *
       
   806  * Boot's versions of physinstalled and physavail are insufficient for
       
   807  * the kernel's purposes. Specifically we don't know which pages that
       
   808  * are not in physavail can be reclaimed after boot is gone.
       
   809  *
       
   810  * This code solves the problem by dividing the address space
       
   811  * into 3 regions as it takes over the MMU from the booter.
       
   812  *
       
   813  * 1) Any (non-nucleus) pages that are mapped at addresses above KERNEL_TEXT
       
   814  * can not be used by the kernel.
       
   815  *
       
   816  * 2) Any free page that happens to be mapped below kernelbase
       
   817  * is protected until the boot loader is released, but will then be reclaimed.
       
   818  *
       
   819  * 3) Boot shouldn't use any address in the remaining area between kernelbase
       
   820  * and KERNEL_TEXT.
       
   821  *
       
   822  * In the case of multiple mappings to the same page, region 1 has precedence
       
   823  * over region 2.
       
   824  */
       
   825 static void
       
   826 startup_memlist(void)
       
   827 {
       
   828 	size_t memlist_sz;
       
   829 	size_t memseg_sz;
       
   830 	size_t pagehash_sz;
       
   831 	size_t pp_sz;
       
   832 	uintptr_t va;
       
   833 	size_t len;
       
   834 	uint_t prot;
       
   835 	pfn_t pfn;
       
   836 	int memblocks;
       
   837 	caddr_t pagecolor_mem;
       
   838 	size_t pagecolor_memsz;
       
   839 	caddr_t page_ctrs_mem;
       
   840 	size_t page_ctrs_size;
       
   841 	struct memlist *current;
       
   842 	extern void startup_build_mem_nodes(struct memlist *);
       
   843 
       
   844 	/* XX64 fix these - they should be in include files */
       
   845 	extern ulong_t cr4_value;
       
   846 	extern size_t page_coloring_init(uint_t, int, int);
       
   847 	extern void page_coloring_setup(caddr_t);
       
   848 
       
   849 	PRM_POINT("startup_memlist() starting...");
       
   850 
       
   851 	/*
       
   852 	 * Take the most current snapshot we can by calling mem-update.
       
   853 	 * For this to work properly, we first have to ask boot for its
       
   854 	 * end address.
       
   855 	 */
       
   856 	if (BOP_GETPROPLEN(bootops, "memory-update") == 0)
       
   857 		(void) BOP_GETPROP(bootops, "memory-update", NULL);
       
   858 
       
   859 	/*
       
   860 	 * find if the kernel is mapped on a large page
       
   861 	 */
       
   862 	va = KERNEL_TEXT;
       
   863 	if (hat_boot_probe(&va, &len, &pfn, &prot) == 0)
       
   864 		panic("Couldn't find kernel text boot mapping");
       
   865 
       
   866 	/*
       
   867 	 * Use leftover large page nucleus text/data space for loadable modules.
       
   868 	 * Use at most MODTEXT/MODDATA.
       
   869 	 */
       
   870 	if (len > MMU_PAGESIZE) {
       
   871 
       
   872 		moddata = (caddr_t)ROUND_UP_PAGE(e_data);
       
   873 		e_moddata = (caddr_t)ROUND_UP_4MEG(e_data);
       
   874 		if (e_moddata - moddata > MODDATA)
       
   875 			e_moddata = moddata + MODDATA;
       
   876 
       
   877 		modtext = (caddr_t)ROUND_UP_PAGE(e_text);
       
   878 		e_modtext = (caddr_t)ROUND_UP_4MEG(e_text);
       
   879 		if (e_modtext - modtext > MODTEXT)
       
   880 			e_modtext = modtext + MODTEXT;
       
   881 
       
   882 
       
   883 	} else {
       
   884 
       
   885 		PRM_POINT("Kernel NOT loaded on Large Page!");
       
   886 		e_moddata = moddata = (caddr_t)ROUND_UP_PAGE(e_data);
       
   887 		e_modtext = modtext = (caddr_t)ROUND_UP_PAGE(e_text);
       
   888 
       
   889 	}
       
   890 	econtig = e_moddata;
       
   891 
       
   892 	PRM_DEBUG(modtext);
       
   893 	PRM_DEBUG(e_modtext);
       
   894 	PRM_DEBUG(moddata);
       
   895 	PRM_DEBUG(e_moddata);
       
   896 	PRM_DEBUG(econtig);
       
   897 
       
   898 	/*
       
   899 	 * For MP machines cr4_value must be set or the non-boot
       
   900 	 * CPUs will not be able to start.
       
   901 	 */
       
   902 	if (x86_feature & X86_LARGEPAGE)
       
   903 		cr4_value = getcr4();
       
   904 	PRM_DEBUG(cr4_value);
       
   905 
       
   906 	/*
       
   907 	 * Examine the boot loaders physical memory map to find out:
       
   908 	 * - total memory in system - physinstalled
       
   909 	 * - the max physical address - physmax
       
   910 	 * - the number of segments the intsalled memory comes in
       
   911 	 */
       
   912 	if (prom_debug)
       
   913 		print_boot_memlist("boot physinstalled",
       
   914 		    bootops->boot_mem->physinstalled);
       
   915 	installed_top_size(bootops->boot_mem->physinstalled, &physmax,
       
   916 	    &physinstalled, &memblocks);
       
   917 	PRM_DEBUG(physmax);
       
   918 	PRM_DEBUG(physinstalled);
       
   919 	PRM_DEBUG(memblocks);
       
   920 
       
   921 	if (prom_debug)
       
   922 		print_boot_memlist("boot physavail",
       
   923 		    bootops->boot_mem->physavail);
       
   924 
       
   925 	/*
       
   926 	 * Initialize hat's mmu parameters.
       
   927 	 * Check for enforce-prot-exec in boot environment. It's used to
       
   928 	 * enable/disable support for the page table entry NX bit.
       
   929 	 * The default is to enforce PROT_EXEC on processors that support NX.
       
   930 	 * Boot seems to round up the "len", but 8 seems to be big enough.
       
   931 	 */
       
   932 	mmu_init();
       
   933 
       
   934 #ifdef	__i386
       
   935 	/*
       
   936 	 * physmax is lowered if there is more memory than can be
       
   937 	 * physically addressed in 32 bit (PAE/non-PAE) modes.
       
   938 	 */
       
   939 	if (mmu.pae_hat) {
       
   940 		if (PFN_ABOVE64G(physmax)) {
       
   941 			physinstalled -= (physmax - (PFN_64G - 1));
       
   942 			physmax = PFN_64G - 1;
       
   943 		}
       
   944 	} else {
       
   945 		if (PFN_ABOVE4G(physmax)) {
       
   946 			physinstalled -= (physmax - (PFN_4G - 1));
       
   947 			physmax = PFN_4G - 1;
       
   948 		}
       
   949 	}
       
   950 #endif
       
   951 
       
   952 	startup_build_mem_nodes(bootops->boot_mem->physinstalled);
       
   953 
       
   954 	if (BOP_GETPROPLEN(bootops, "enforce-prot-exec") >= 0) {
       
   955 		int len = BOP_GETPROPLEN(bootops, "enforce-prot-exec");
       
   956 		char value[8];
       
   957 
       
   958 		if (len < 8)
       
   959 			(void) BOP_GETPROP(bootops, "enforce-prot-exec", value);
       
   960 		else
       
   961 			(void) strcpy(value, "");
       
   962 		if (strcmp(value, "off") == 0)
       
   963 			mmu.pt_nx = 0;
       
   964 	}
       
   965 	PRM_DEBUG(mmu.pt_nx);
       
   966 
       
   967 	/*
       
   968 	 * We will need page_t's for every page in the system, except for
       
   969 	 * memory mapped at or above above the start of the kernel text segment.
       
   970 	 *
       
   971 	 * pages above e_modtext are attributed to kernel debugger (obp_pages)
       
   972 	 */
       
   973 	npages = physinstalled - 1; /* avail_filter() skips page 0, so "- 1" */
       
   974 	obp_pages = 0;
       
   975 	va = KERNEL_TEXT;
       
   976 	while (hat_boot_probe(&va, &len, &pfn, &prot) != 0) {
       
   977 		npages -= len >> MMU_PAGESHIFT;
       
   978 		if (va >= (uintptr_t)e_moddata)
       
   979 			obp_pages += len >> MMU_PAGESHIFT;
       
   980 		va += len;
       
   981 	}
       
   982 	PRM_DEBUG(npages);
       
   983 	PRM_DEBUG(obp_pages);
       
   984 
       
   985 	/*
       
   986 	 * If physmem is patched to be non-zero, use it instead of
       
   987 	 * the computed value unless it is larger than the real
       
   988 	 * amount of memory on hand.
       
   989 	 */
       
   990 	if (physmem == 0 || physmem > npages)
       
   991 		physmem = npages;
       
   992 	else
       
   993 		npages = physmem;
       
   994 	PRM_DEBUG(physmem);
       
   995 
       
   996 	/*
       
   997 	 * We now compute the sizes of all the  initial allocations for
       
   998 	 * structures the kernel needs in order do kmem_alloc(). These
       
   999 	 * include:
       
  1000 	 *	memsegs
       
  1001 	 *	memlists
       
  1002 	 *	page hash table
       
  1003 	 *	page_t's
       
  1004 	 *	page coloring data structs
       
  1005 	 */
       
  1006 	memseg_sz = sizeof (struct memseg) * (memblocks + POSS_NEW_FRAGMENTS);
       
  1007 	ADD_TO_ALLOCATIONS(memseg_base, memseg_sz);
       
  1008 	PRM_DEBUG(memseg_sz);
       
  1009 
       
  1010 	/*
       
  1011 	 * Reserve space for phys_avail/phys_install memlists.
       
  1012 	 * There's no real good way to know exactly how much room we'll need,
       
  1013 	 * but this should be a good upper bound.
       
  1014 	 */
       
  1015 	memlist_sz = ROUND_UP_PAGE(2 * sizeof (struct memlist) *
       
  1016 	    (memblocks + POSS_NEW_FRAGMENTS));
       
  1017 	ADD_TO_ALLOCATIONS(memlist, memlist_sz);
       
  1018 	PRM_DEBUG(memlist_sz);
       
  1019 
       
  1020 	/*
       
  1021 	 * The page structure hash table size is a power of 2
       
  1022 	 * such that the average hash chain length is PAGE_HASHAVELEN.
       
  1023 	 */
       
  1024 	page_hashsz = npages / PAGE_HASHAVELEN;
       
  1025 	page_hashsz = 1 << highbit(page_hashsz);
       
  1026 	pagehash_sz = sizeof (struct page *) * page_hashsz;
       
  1027 	ADD_TO_ALLOCATIONS(page_hash, pagehash_sz);
       
  1028 	PRM_DEBUG(pagehash_sz);
       
  1029 
       
  1030 	/*
       
  1031 	 * Set aside room for the page structures themselves.  Note: on
       
  1032 	 * 64-bit systems we don't allocate page_t's for every page here.
       
  1033 	 * We just allocate enough to map the lowest 4GB of physical
       
  1034 	 * memory, minus those pages that are used for the "nucleus" kernel
       
  1035 	 * text and data.  The remaining pages are allocated once we can
       
  1036 	 * map around boot.
       
  1037 	 *
       
  1038 	 * boot_npages is used to allocate an area big enough for our
       
  1039 	 * initial page_t's. kphym_init may use less than that.
       
  1040 	 */
       
  1041 	boot_npages = npages;
       
  1042 #if defined(__amd64)
       
  1043 	if (npages > mmu_btop(FOURGB - (econtig - s_text)))
       
  1044 		boot_npages = mmu_btop(FOURGB - (econtig - s_text));
       
  1045 #endif
       
  1046 	PRM_DEBUG(boot_npages);
       
  1047 	pp_sz = sizeof (struct page) * boot_npages;
       
  1048 	ADD_TO_ALLOCATIONS(pp_base, pp_sz);
       
  1049 	PRM_DEBUG(pp_sz);
       
  1050 
       
  1051 	/*
       
  1052 	 * determine l2 cache info and memory size for page coloring
       
  1053 	 */
       
  1054 	(void) getl2cacheinfo(CPU,
       
  1055 	    &l2cache_sz, &l2cache_linesz, &l2cache_assoc);
       
  1056 	pagecolor_memsz =
       
  1057 	    page_coloring_init(l2cache_sz, l2cache_linesz, l2cache_assoc);
       
  1058 	ADD_TO_ALLOCATIONS(pagecolor_mem, pagecolor_memsz);
       
  1059 	PRM_DEBUG(pagecolor_memsz);
       
  1060 
       
  1061 	page_ctrs_size = page_ctrs_sz();
       
  1062 	ADD_TO_ALLOCATIONS(page_ctrs_mem, page_ctrs_size);
       
  1063 	PRM_DEBUG(page_ctrs_size);
       
  1064 
       
  1065 	/*
       
  1066 	 * valloc_base will be below kernel text
       
  1067 	 * The extra pages are for the HAT and kmdb to map page tables.
       
  1068 	 */
       
  1069 	valloc_sz = ROUND_UP_LPAGE(valloc_sz);
       
  1070 	valloc_base = KERNEL_TEXT - valloc_sz;
       
  1071 	PRM_DEBUG(valloc_base);
       
  1072 	ptable_va = valloc_base - ptable_sz;
       
  1073 
       
  1074 #if defined(__amd64)
       
  1075 	if (eprom_kernelbase && eprom_kernelbase != KERNELBASE)
       
  1076 		cmn_err(CE_NOTE, "!kernelbase cannot be changed on 64-bit "
       
  1077 		    "systems.");
       
  1078 	kernelbase = (uintptr_t)KERNELBASE;
       
  1079 	core_base = (uintptr_t)COREHEAP_BASE;
       
  1080 	core_size = ptable_va - core_base;
       
  1081 #else	/* __i386 */
       
  1082 	/*
       
  1083 	 * We configure kernelbase based on:
       
  1084 	 *
       
  1085 	 * 1. user specified kernelbase via eeprom command. Value cannot exceed
       
  1086 	 *    KERNELBASE_MAX. we large page align eprom_kernelbase
       
  1087 	 *
       
  1088 	 * 2. Default to KERNELBASE and adjust to 2X less the size for page_t.
       
  1089 	 *    On large memory systems we must lower kernelbase to allow
       
  1090 	 *    enough room for page_t's for all of memory.
       
  1091 	 *
       
  1092 	 * The value set here, might be changed a little later.
       
  1093 	 */
       
  1094 	if (eprom_kernelbase) {
       
  1095 		kernelbase = eprom_kernelbase & mmu.level_mask[1];
       
  1096 		if (kernelbase > KERNELBASE_MAX)
       
  1097 			kernelbase = KERNELBASE_MAX;
       
  1098 	} else {
       
  1099 		kernelbase = (uintptr_t)KERNELBASE;
       
  1100 		kernelbase -= ROUND_UP_4MEG(2 * valloc_sz);
       
  1101 	}
       
  1102 	ASSERT((kernelbase & mmu.level_offset[1]) == 0);
       
  1103 	core_base = ptable_va;
       
  1104 	core_size = 0;
       
  1105 #endif
       
  1106 
       
  1107 	PRM_DEBUG(kernelbase);
       
  1108 	PRM_DEBUG(core_base);
       
  1109 	PRM_DEBUG(core_size);
       
  1110 
       
  1111 	/*
       
  1112 	 * At this point, we can only use a portion of the kernelheap that
       
  1113 	 * will be available after we boot.  Both 32-bit and 64-bit systems
       
  1114 	 * have this limitation, although the reasons are completely
       
  1115 	 * different.
       
  1116 	 *
       
  1117 	 * On 64-bit systems, the booter only supports allocations in the
       
  1118 	 * upper 4GB of memory, so we have to work with a reduced kernel
       
  1119 	 * heap until we take over all allocations.  The booter also sits
       
  1120 	 * in the lower portion of that 4GB range, so we have to raise the
       
  1121 	 * bottom of the heap even further.
       
  1122 	 *
       
  1123 	 * On 32-bit systems we have to leave room to place segmap below
       
  1124 	 * the heap.  We don't yet know how large segmap will be, so we
       
  1125 	 * have to be very conservative.
       
  1126 	 */
       
  1127 #if defined(__amd64)
       
  1128 	/*
       
  1129 	 * XX64: For now, we let boot have the lower 2GB of the top 4GB
       
  1130 	 * address range.  In the long run, that should be fixed.  It's
       
  1131 	 * insane for a booter to need 2 2GB address ranges.
       
  1132 	 */
       
  1133 	boot_kernelheap = (caddr_t)(BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE);
       
  1134 	segmap_reserved = 0;
       
  1135 
       
  1136 #else	/* __i386 */
       
  1137 	segkp_fromheap = 1;
       
  1138 	segmap_reserved = ROUND_UP_LPAGE(MAX(segmapsize, SEGMAPMAX));
       
  1139 	boot_kernelheap = (caddr_t)(ROUND_UP_LPAGE(kernelbase) +
       
  1140 	    segmap_reserved);
       
  1141 #endif
       
  1142 	PRM_DEBUG(boot_kernelheap);
       
  1143 	kernelheap = boot_kernelheap;
       
  1144 	ekernelheap = (char *)core_base;
       
  1145 
       
  1146 	/*
       
  1147 	 * If segmap is too large we can push the bottom of the kernel heap
       
  1148 	 * higher than the base.  Or worse, it could exceed the top of the
       
  1149 	 * VA space entirely, causing it to wrap around.
       
  1150 	 */
       
  1151 	if (kernelheap >= ekernelheap || (uintptr_t)kernelheap < kernelbase)
       
  1152 		panic("too little memory available for kernelheap,"
       
  1153 			    " use a different kernelbase");
       
  1154 
       
  1155 	/*
       
  1156 	 * Now that we know the real value of kernelbase,
       
  1157 	 * update variables that were initialized with a value of
       
  1158 	 * KERNELBASE (in common/conf/param.c).
       
  1159 	 *
       
  1160 	 * XXX	The problem with this sort of hackery is that the
       
  1161 	 *	compiler just may feel like putting the const declarations
       
  1162 	 *	(in param.c) into the .text section.  Perhaps they should
       
  1163 	 *	just be declared as variables there?
       
  1164 	 */
       
  1165 
       
  1166 #if defined(__amd64)
       
  1167 	ASSERT(_kernelbase == KERNELBASE);
       
  1168 	ASSERT(_userlimit == USERLIMIT);
       
  1169 	/*
       
  1170 	 * As one final sanity check, verify that the "red zone" between
       
  1171 	 * kernel and userspace is exactly the size we expected.
       
  1172 	 */
       
  1173 	ASSERT(_kernelbase == (_userlimit + (2 * 1024 * 1024)));
       
  1174 #else
       
  1175 	*(uintptr_t *)&_kernelbase = kernelbase;
       
  1176 	*(uintptr_t *)&_userlimit = kernelbase;
       
  1177 	*(uintptr_t *)&_userlimit32 = _userlimit;
       
  1178 #endif
       
  1179 	PRM_DEBUG(_kernelbase);
       
  1180 	PRM_DEBUG(_userlimit);
       
  1181 	PRM_DEBUG(_userlimit32);
       
  1182 
       
  1183 	/*
       
  1184 	 * do all the initial allocations
       
  1185 	 */
       
  1186 	perform_allocations();
       
  1187 
       
  1188 	/*
       
  1189 	 * Initialize the kernel heap. Note 3rd argument must be > 1st.
       
  1190 	 */
       
  1191 	kernelheap_init(kernelheap, ekernelheap, kernelheap + MMU_PAGESIZE,
       
  1192 	    (void *)core_base, (void *)ptable_va);
       
  1193 
       
  1194 	/*
       
  1195 	 * Build phys_install and phys_avail in kernel memspace.
       
  1196 	 * - phys_install should be all memory in the system.
       
  1197 	 * - phys_avail is phys_install minus any memory mapped before this
       
  1198 	 *    point above KERNEL_TEXT.
       
  1199 	 */
       
  1200 	current = phys_install = memlist;
       
  1201 	copy_memlist_filter(bootops->boot_mem->physinstalled, &current, NULL);
       
  1202 	if ((caddr_t)current > (caddr_t)memlist + memlist_sz)
       
  1203 		panic("physinstalled was too big!");
       
  1204 	if (prom_debug)
       
  1205 		print_kernel_memlist("phys_install", phys_install);
       
  1206 
       
  1207 	phys_avail = current;
       
  1208 	PRM_POINT("Building phys_avail:\n");
       
  1209 	copy_memlist_filter(bootops->boot_mem->physinstalled, &current,
       
  1210 	    avail_filter);
       
  1211 	if ((caddr_t)current > (caddr_t)memlist + memlist_sz)
       
  1212 		panic("physavail was too big!");
       
  1213 	if (prom_debug)
       
  1214 		print_kernel_memlist("phys_avail", phys_avail);
       
  1215 
       
  1216 	/*
       
  1217 	 * setup page coloring
       
  1218 	 */
       
  1219 	page_coloring_setup(pagecolor_mem);
       
  1220 	page_lock_init();	/* currently a no-op */
       
  1221 
       
  1222 	/*
       
  1223 	 * free page list counters
       
  1224 	 */
       
  1225 	(void) page_ctrs_alloc(page_ctrs_mem);
       
  1226 
       
  1227 	/*
       
  1228 	 * Initialize the page structures from the memory lists.
       
  1229 	 */
       
  1230 	availrmem_initial = availrmem = freemem = 0;
       
  1231 	PRM_POINT("Calling kphysm_init()...");
       
  1232 	boot_npages = kphysm_init(pp_base, memseg_base, 0, boot_npages);
       
  1233 	PRM_POINT("kphysm_init() done");
       
  1234 	PRM_DEBUG(boot_npages);
       
  1235 
       
  1236 	/*
       
  1237 	 * Now that page_t's have been initialized, remove all the
       
  1238 	 * initial allocation pages from the kernel free page lists.
       
  1239 	 */
       
  1240 	boot_mapin((caddr_t)valloc_base, valloc_sz);
       
  1241 
       
  1242 	/*
       
  1243 	 * Initialize kernel memory allocator.
       
  1244 	 */
       
  1245 	kmem_init();
       
  1246 
       
  1247 	/*
       
  1248 	 * print this out early so that we know what's going on
       
  1249 	 */
       
  1250 	cmn_err(CE_CONT, "?features: %b\n", x86_feature, FMT_X86_FEATURE);
       
  1251 
       
  1252 	/*
       
  1253 	 * Initialize bp_mapin().
       
  1254 	 */
       
  1255 	bp_init(MMU_PAGESIZE, HAT_STORECACHING_OK);
       
  1256 
       
  1257 #if defined(__i386)
       
  1258 	if (eprom_kernelbase && (eprom_kernelbase != kernelbase))
       
  1259 		cmn_err(CE_WARN, "kernelbase value, User specified 0x%lx, "
       
  1260 		    "System using 0x%lx",
       
  1261 		    (uintptr_t)eprom_kernelbase, (uintptr_t)kernelbase);
       
  1262 #endif
       
  1263 
       
  1264 #ifdef	KERNELBASE_ABI_MIN
       
  1265 	if (kernelbase < (uintptr_t)KERNELBASE_ABI_MIN) {
       
  1266 		cmn_err(CE_NOTE, "!kernelbase set to 0x%lx, system is not "
       
  1267 		    "i386 ABI compliant.", (uintptr_t)kernelbase);
       
  1268 	}
       
  1269 #endif
       
  1270 
       
  1271 	PRM_POINT("startup_memlist() done");
       
  1272 }
       
  1273 
       
  1274 static void
       
  1275 startup_modules(void)
       
  1276 {
       
  1277 	unsigned int i;
       
  1278 	extern void impl_setup_ddi(void);
       
  1279 	extern void prom_setup(void);
       
  1280 
       
  1281 	PRM_POINT("startup_modules() starting...");
       
  1282 	/*
       
  1283 	 * Initialize ten-micro second timer so that drivers will
       
  1284 	 * not get short changed in their init phase. This was
       
  1285 	 * not getting called until clkinit which, on fast cpu's
       
  1286 	 * caused the drv_usecwait to be way too short.
       
  1287 	 */
       
  1288 	microfind();
       
  1289 
       
  1290 	/*
       
  1291 	 * Read the GMT lag from /etc/rtc_config.
       
  1292 	 */
       
  1293 	gmt_lag = process_rtc_config_file();
       
  1294 
       
  1295 	/*
       
  1296 	 * Calculate default settings of system parameters based upon
       
  1297 	 * maxusers, yet allow to be overridden via the /etc/system file.
       
  1298 	 */
       
  1299 	param_calc(0);
       
  1300 
       
  1301 	mod_setup();
       
  1302 
       
  1303 	/*
       
  1304 	 * Setup machine check architecture on P6
       
  1305 	 */
       
  1306 	setup_mca();
       
  1307 
       
  1308 	/*
       
  1309 	 * Initialize system parameters.
       
  1310 	 */
       
  1311 	param_init();
       
  1312 
       
  1313 	/*
       
  1314 	 * maxmem is the amount of physical memory we're playing with.
       
  1315 	 */
       
  1316 	maxmem = physmem;
       
  1317 
       
  1318 	/*
       
  1319 	 * Initialize the hat layer.
       
  1320 	 */
       
  1321 	hat_init();
       
  1322 
       
  1323 	/*
       
  1324 	 * Initialize segment management stuff.
       
  1325 	 */
       
  1326 	seg_init();
       
  1327 
       
  1328 	if (modload("fs", "specfs") == -1)
       
  1329 		halt("Can't load specfs");
       
  1330 
       
  1331 	if (modload("fs", "devfs") == -1)
       
  1332 		halt("Can't load devfs");
       
  1333 
       
  1334 	dispinit();
       
  1335 
       
  1336 	/*
       
  1337 	 * This is needed here to initialize hw_serial[] for cluster booting.
       
  1338 	 */
       
  1339 	if ((i = modload("misc", "sysinit")) != (unsigned int)-1)
       
  1340 		(void) modunload(i);
       
  1341 	else
       
  1342 		cmn_err(CE_CONT, "sysinit load failed");
       
  1343 
       
  1344 	/* Read cluster configuration data. */
       
  1345 	clconf_init();
       
  1346 
       
  1347 	/*
       
  1348 	 * Create a kernel device tree. First, create rootnex and
       
  1349 	 * then invoke bus specific code to probe devices.
       
  1350 	 */
       
  1351 	setup_ddi();
       
  1352 	impl_setup_ddi();
       
  1353 	/*
       
  1354 	 * Fake a prom tree such that /dev/openprom continues to work
       
  1355 	 */
       
  1356 	prom_setup();
       
  1357 
       
  1358 	/*
       
  1359 	 * Load all platform specific modules
       
  1360 	 */
       
  1361 	psm_modload();
       
  1362 
       
  1363 	PRM_POINT("startup_modules() done");
       
  1364 }
       
  1365 
       
  1366 static void
       
  1367 startup_bop_gone(void)
       
  1368 {
       
  1369 	PRM_POINT("startup_bop_gone() starting...");
       
  1370 
       
  1371 	/*
       
  1372 	 * Do final allocations of HAT data structures that need to
       
  1373 	 * be allocated before quiescing the boot loader.
       
  1374 	 */
       
  1375 	PRM_POINT("Calling hat_kern_alloc()...");
       
  1376 	hat_kern_alloc();
       
  1377 	PRM_POINT("hat_kern_alloc() done");
       
  1378 
       
  1379 	/*
       
  1380 	 * Setup MTRR (Memory type range registers)
       
  1381 	 */
       
  1382 	setup_mtrr();
       
  1383 	PRM_POINT("startup_bop_gone() done");
       
  1384 }
       
  1385 
       
  1386 /*
       
  1387  * Walk through the pagetables looking for pages mapped in by boot.  If the
       
  1388  * setaside flag is set the pages are expected to be returned to the
       
  1389  * kernel later in boot, so we add them to the bootpages list.
       
  1390  */
       
  1391 static void
       
  1392 protect_boot_range(uintptr_t low, uintptr_t high, int setaside)
       
  1393 {
       
  1394 	uintptr_t va = low;
       
  1395 	size_t len;
       
  1396 	uint_t prot;
       
  1397 	pfn_t pfn;
       
  1398 	page_t *pp;
       
  1399 	pgcnt_t boot_protect_cnt = 0;
       
  1400 
       
  1401 	while (hat_boot_probe(&va, &len, &pfn, &prot) != 0 && va < high) {
       
  1402 		if (va + len >= high)
       
  1403 			panic("0x%lx byte mapping at 0x%p exceeds boot's "
       
  1404 			    "legal range.", len, (void *)va);
       
  1405 
       
  1406 		while (len > 0) {
       
  1407 			pp = page_numtopp_alloc(pfn);
       
  1408 			if (pp != NULL) {
       
  1409 				if (setaside == 0)
       
  1410 					panic("Unexpected mapping by boot.  "
       
  1411 					    "addr=%p pfn=%lx\n",
       
  1412 					    (void *)va, pfn);
       
  1413 
       
  1414 				pp->p_next = bootpages;
       
  1415 				bootpages = pp;
       
  1416 				++boot_protect_cnt;
       
  1417 			}
       
  1418 
       
  1419 			++pfn;
       
  1420 			len -= MMU_PAGESIZE;
       
  1421 			va += MMU_PAGESIZE;
       
  1422 		}
       
  1423 	}
       
  1424 	PRM_DEBUG(boot_protect_cnt);
       
  1425 }
       
  1426 
       
  1427 static void
       
  1428 startup_vm(void)
       
  1429 {
       
  1430 	struct segmap_crargs a;
       
  1431 	extern void hat_kern_setup(void);
       
  1432 	pgcnt_t pages_left;
       
  1433 
       
  1434 	PRM_POINT("startup_vm() starting...");
       
  1435 
       
  1436 	/*
       
  1437 	 * The next two loops are done in distinct steps in order
       
  1438 	 * to be sure that any page that is doubly mapped (both above
       
  1439 	 * KERNEL_TEXT and below kernelbase) is dealt with correctly.
       
  1440 	 * Note this may never happen, but it might someday.
       
  1441 	 */
       
  1442 
       
  1443 	bootpages = NULL;
       
  1444 	PRM_POINT("Protecting boot pages");
       
  1445 	/*
       
  1446 	 * Protect any pages mapped above KERNEL_TEXT that somehow have
       
  1447 	 * page_t's. This can only happen if something weird allocated
       
  1448 	 * in this range (like kadb/kmdb).
       
  1449 	 */
       
  1450 	protect_boot_range(KERNEL_TEXT, (uintptr_t)-1, 0);
       
  1451 
       
  1452 	/*
       
  1453 	 * Before we can take over memory allocation/mapping from the boot
       
  1454 	 * loader we must remove from our free page lists any boot pages that
       
  1455 	 * will stay mapped until release_bootstrap().
       
  1456 	 */
       
  1457 	protect_boot_range(0, kernelbase, 1);
       
  1458 #if defined(__amd64)
       
  1459 	protect_boot_range(BOOT_DOUBLEMAP_BASE,
       
  1460 	    BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE, 0);
       
  1461 #endif
       
  1462 
       
  1463 	/*
       
  1464 	 * Copy in boot's page tables, set up extra page tables for the kernel,
       
  1465 	 * and switch to the kernel's context.
       
  1466 	 */
       
  1467 	PRM_POINT("Calling hat_kern_setup()...");
       
  1468 	hat_kern_setup();
       
  1469 
       
  1470 	/*
       
  1471 	 * It is no longer safe to call BOP_ALLOC(), so make sure we don't.
       
  1472 	 */
       
  1473 	bootops->bsys_alloc = NULL;
       
  1474 	PRM_POINT("hat_kern_setup() done");
       
  1475 
       
  1476 	hat_cpu_online(CPU);
       
  1477 
       
  1478 	/*
       
  1479 	 * Before we call kvm_init(), we need to establish the final size
       
  1480 	 * of the kernel's heap.  So, we need to figure out how much space
       
  1481 	 * to set aside for segkp, segkpm, and segmap.
       
  1482 	 */
       
  1483 	final_kernelheap = (caddr_t)ROUND_UP_LPAGE(kernelbase);
       
  1484 #if defined(__amd64)
       
  1485 	if (kpm_desired) {
       
  1486 		/*
       
  1487 		 * Segkpm appears at the bottom of the kernel's address
       
  1488 		 * range.  To detect accidental overruns of the user
       
  1489 		 * address space, we leave a "red zone" of unmapped memory
       
  1490 		 * between kernelbase and the beginning of segkpm.
       
  1491 		 */
       
  1492 		kpm_vbase = final_kernelheap + KERNEL_REDZONE_SIZE;
       
  1493 		kpm_size = mmu_ptob(physmax);
       
  1494 		PRM_DEBUG(kpm_vbase);
       
  1495 		PRM_DEBUG(kpm_size);
       
  1496 		final_kernelheap =
       
  1497 		    (caddr_t)ROUND_UP_TOPLEVEL(kpm_vbase + kpm_size);
       
  1498 	}
       
  1499 
       
  1500 	if (!segkp_fromheap) {
       
  1501 		size_t sz = mmu_ptob(segkpsize);
       
  1502 
       
  1503 		/*
       
  1504 		 * determine size of segkp and adjust the bottom of the
       
  1505 		 * kernel's heap.
       
  1506 		 */
       
  1507 		if (sz < SEGKPMINSIZE || sz > SEGKPMAXSIZE) {
       
  1508 			sz = SEGKPDEFSIZE;
       
  1509 			cmn_err(CE_WARN, "!Illegal value for segkpsize. "
       
  1510 			    "segkpsize has been reset to %ld pages",
       
  1511 			    mmu_btop(sz));
       
  1512 		}
       
  1513 		sz = MIN(sz, MAX(SEGKPMINSIZE, mmu_ptob(physmem)));
       
  1514 
       
  1515 		segkpsize = mmu_btop(ROUND_UP_LPAGE(sz));
       
  1516 		segkp_base = final_kernelheap;
       
  1517 		PRM_DEBUG(segkpsize);
       
  1518 		PRM_DEBUG(segkp_base);
       
  1519 		final_kernelheap = segkp_base + mmu_ptob(segkpsize);
       
  1520 		PRM_DEBUG(final_kernelheap);
       
  1521 	}
       
  1522 
       
  1523 	/*
       
  1524 	 * put the range of VA for device mappings next
       
  1525 	 */
       
  1526 	toxic_addr = (uintptr_t)final_kernelheap;
       
  1527 	PRM_DEBUG(toxic_addr);
       
  1528 	final_kernelheap = (char *)toxic_addr + toxic_size;
       
  1529 #endif
       
  1530 	PRM_DEBUG(final_kernelheap);
       
  1531 	ASSERT(final_kernelheap < boot_kernelheap);
       
  1532 
       
  1533 	/*
       
  1534 	 * Users can change segmapsize through eeprom or /etc/system.
       
  1535 	 * If the variable is tuned through eeprom, there is no upper
       
  1536 	 * bound on the size of segmap.  If it is tuned through
       
  1537 	 * /etc/system on 32-bit systems, it must be no larger than we
       
  1538 	 * planned for in startup_memlist().
       
  1539 	 */
       
  1540 	segmapsize = MAX(ROUND_UP_LPAGE(segmapsize), SEGMAPDEFAULT);
       
  1541 	segkmap_start = ROUND_UP_LPAGE((uintptr_t)final_kernelheap);
       
  1542 
       
  1543 #if defined(__i386)
       
  1544 	if (segmapsize > segmap_reserved) {
       
  1545 		cmn_err(CE_NOTE, "!segmapsize may not be set > 0x%lx in "
       
  1546 		    "/etc/system.  Use eeprom.", (long)SEGMAPMAX);
       
  1547 		segmapsize = segmap_reserved;
       
  1548 	}
       
  1549 	/*
       
  1550 	 * 32-bit systems don't have segkpm or segkp, so segmap appears at
       
  1551 	 * the bottom of the kernel's address range.  Set aside space for a
       
  1552 	 * red zone just below the start of segmap.
       
  1553 	 */
       
  1554 	segkmap_start += KERNEL_REDZONE_SIZE;
       
  1555 	segmapsize -= KERNEL_REDZONE_SIZE;
       
  1556 #endif
       
  1557 	final_kernelheap = (char *)(segkmap_start + segmapsize);
       
  1558 
       
  1559 	PRM_DEBUG(segkmap_start);
       
  1560 	PRM_DEBUG(segmapsize);
       
  1561 	PRM_DEBUG(final_kernelheap);
       
  1562 
       
  1563 	/*
       
  1564 	 * Initialize VM system
       
  1565 	 */
       
  1566 	PRM_POINT("Calling kvm_init()...");
       
  1567 	kvm_init();
       
  1568 	PRM_POINT("kvm_init() done");
       
  1569 
       
  1570 	/*
       
  1571 	 * Tell kmdb that the VM system is now working
       
  1572 	 */
       
  1573 	if (boothowto & RB_DEBUG)
       
  1574 		kdi_dvec_vmready();
       
  1575 
       
  1576 	/*
       
  1577 	 * Mangle the brand string etc.
       
  1578 	 */
       
  1579 	cpuid_pass3(CPU);
       
  1580 
       
  1581 	PRM_DEBUG(final_kernelheap);
       
  1582 
       
  1583 	/*
       
  1584 	 * Now that we can use memory outside the top 4GB (on 64-bit
       
  1585 	 * systems) and we know the size of segmap, we can set the final
       
  1586 	 * size of the kernel's heap.  Note: on 64-bit systems we still
       
  1587 	 * can't touch anything in the bottom half of the top 4GB range
       
  1588 	 * because boot still has pages mapped there.
       
  1589 	 */
       
  1590 	if (final_kernelheap < boot_kernelheap) {
       
  1591 		kernelheap_extend(final_kernelheap, boot_kernelheap);
       
  1592 #if defined(__amd64)
       
  1593 		kmem_setaside = vmem_xalloc(heap_arena, BOOT_DOUBLEMAP_SIZE,
       
  1594 		    MMU_PAGESIZE, 0, 0, (void *)(BOOT_DOUBLEMAP_BASE),
       
  1595 		    (void *)(BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE),
       
  1596 		    VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
       
  1597 		PRM_DEBUG(kmem_setaside);
       
  1598 		if (kmem_setaside == NULL)
       
  1599 			panic("Could not protect boot's memory");
       
  1600 #endif
       
  1601 	}
       
  1602 	/*
       
  1603 	 * Now that the kernel heap may have grown significantly, we need
       
  1604 	 * to make all the remaining page_t's available to back that memory.
       
  1605 	 *
       
  1606 	 * XX64 this should probably wait till after release boot-strap too.
       
  1607 	 */
       
  1608 	pages_left = npages - boot_npages;
       
  1609 	if (pages_left > 0) {
       
  1610 		PRM_DEBUG(pages_left);
       
  1611 		(void) kphysm_init(NULL, memseg_base, boot_npages, pages_left);
       
  1612 	}
       
  1613 
       
  1614 #if defined(__amd64)
       
  1615 
       
  1616 	/*
       
  1617 	 * Create the device arena for toxic (to dtrace/kmdb) mappings.
       
  1618 	 */
       
  1619 	device_arena = vmem_create("device", (void *)toxic_addr,
       
  1620 	    toxic_size, MMU_PAGESIZE, NULL, NULL, NULL, 0, VM_SLEEP);
       
  1621 
       
  1622 #else	/* __i386 */
       
  1623 
       
  1624 	/*
       
  1625 	 * allocate the bit map that tracks toxic pages
       
  1626 	 */
       
  1627 	toxic_bit_map_len = btop((ulong_t)(ptable_va - kernelbase));
       
  1628 	PRM_DEBUG(toxic_bit_map_len);
       
  1629 	toxic_bit_map =
       
  1630 	    kmem_zalloc(BT_SIZEOFMAP(toxic_bit_map_len), KM_NOSLEEP);
       
  1631 	ASSERT(toxic_bit_map != NULL);
       
  1632 	PRM_DEBUG(toxic_bit_map);
       
  1633 
       
  1634 #endif	/* __i386 */
       
  1635 
       
  1636 
       
  1637 	/*
       
  1638 	 * Now that we've got more VA, as well as the ability to allocate from
       
  1639 	 * it, tell the debugger.
       
  1640 	 */
       
  1641 	if (boothowto & RB_DEBUG)
       
  1642 		kdi_dvec_memavail();
       
  1643 
       
  1644 	/*
       
  1645 	 * The following code installs a special page fault handler (#pf)
       
  1646 	 * to work around a pentium bug.
       
  1647 	 */
       
  1648 #if !defined(__amd64)
       
  1649 	if (x86_type == X86_TYPE_P5) {
       
  1650 		gate_desc_t *newidt;
       
  1651 		desctbr_t    newidt_r;
       
  1652 
       
  1653 		if ((newidt = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP)) == NULL)
       
  1654 			panic("failed to install pentium_pftrap");
       
  1655 
       
  1656 		bcopy(idt0, newidt, sizeof (idt0));
       
  1657 		set_gatesegd(&newidt[T_PGFLT], &pentium_pftrap,
       
  1658 		    KCS_SEL, 0, SDT_SYSIGT, SEL_KPL);
       
  1659 
       
  1660 		(void) as_setprot(&kas, (caddr_t)newidt, MMU_PAGESIZE,
       
  1661 		    PROT_READ|PROT_EXEC);
       
  1662 
       
  1663 		newidt_r.dtr_limit = sizeof (idt0) - 1;
       
  1664 		newidt_r.dtr_base = (uintptr_t)newidt;
       
  1665 		CPU->cpu_idt = newidt;
       
  1666 		wr_idtr(&newidt_r);
       
  1667 	}
       
  1668 #endif	/* !__amd64 */
       
  1669 
       
  1670 	/*
       
  1671 	 * Map page pfn=0 for drivers, such as kd, that need to pick up
       
  1672 	 * parameters left there by controllers/BIOS.
       
  1673 	 */
       
  1674 	PRM_POINT("setup up p0_va");
       
  1675 	p0_va = i86devmap(0, 1, PROT_READ);
       
  1676 	PRM_DEBUG(p0_va);
       
  1677 
       
  1678 	/*
       
  1679 	 * If the following is true, someone has patched phsymem to be less
       
  1680 	 * than the number of pages that the system actually has.  Remove
       
  1681 	 * pages until system memory is limited to the requested amount.
       
  1682 	 * Since we have allocated page structures for all pages, we
       
  1683 	 * correct the amount of memory we want to remove by the size of
       
  1684 	 * the memory used to hold page structures for the non-used pages.
       
  1685 	 */
       
  1686 	if (physmem < npages) {
       
  1687 		uint_t diff;
       
  1688 		offset_t off;
       
  1689 		struct page *pp;
       
  1690 		caddr_t rand_vaddr;
       
  1691 		struct seg kseg;
       
  1692 
       
  1693 		cmn_err(CE_WARN, "limiting physmem to %lu pages", physmem);
       
  1694 
       
  1695 		off = 0;
       
  1696 		diff = npages - physmem;
       
  1697 		diff -= mmu_btopr(diff * sizeof (struct page));
       
  1698 		kseg.s_as = &kas;
       
  1699 		while (diff--) {
       
  1700 			rand_vaddr = (caddr_t)
       
  1701 			    (((uintptr_t)&unused_pages_vp >> 7) ^
       
  1702 			    (uintptr_t)((u_offset_t)off >> MMU_PAGESHIFT));
       
  1703 			pp = page_create_va(&unused_pages_vp, off, MMU_PAGESIZE,
       
  1704 				PG_WAIT | PG_EXCL, &kseg, rand_vaddr);
       
  1705 			if (pp == NULL) {
       
  1706 				panic("limited physmem too much!");
       
  1707 				/*NOTREACHED*/
       
  1708 			}
       
  1709 			page_io_unlock(pp);
       
  1710 			page_downgrade(pp);
       
  1711 			availrmem--;
       
  1712 			off += MMU_PAGESIZE;
       
  1713 		}
       
  1714 	}
       
  1715 
       
  1716 	cmn_err(CE_CONT, "?mem = %luK (0x%lx)\n",
       
  1717 	    physinstalled << (MMU_PAGESHIFT - 10), ptob(physinstalled));
       
  1718 
       
  1719 	PRM_POINT("Calling hat_init_finish()...");
       
  1720 	hat_init_finish();
       
  1721 	PRM_POINT("hat_init_finish() done");
       
  1722 
       
  1723 	/*
       
  1724 	 * Initialize the segkp segment type.
       
  1725 	 */
       
  1726 	rw_enter(&kas.a_lock, RW_WRITER);
       
  1727 	if (!segkp_fromheap) {
       
  1728 		if (seg_attach(&kas, (caddr_t)segkp_base, mmu_ptob(segkpsize),
       
  1729 		    segkp) < 0) {
       
  1730 			panic("startup: cannot attach segkp");
       
  1731 			/*NOTREACHED*/
       
  1732 		}
       
  1733 	} else {
       
  1734 		/*
       
  1735 		 * For 32 bit x86 systems, we will have segkp under the heap.
       
  1736 		 * There will not be a segkp segment.  We do, however, need
       
  1737 		 * to fill in the seg structure.
       
  1738 		 */
       
  1739 		segkp->s_as = &kas;
       
  1740 	}
       
  1741 	if (segkp_create(segkp) != 0) {
       
  1742 		panic("startup: segkp_create failed");
       
  1743 		/*NOTREACHED*/
       
  1744 	}
       
  1745 	PRM_DEBUG(segkp);
       
  1746 	rw_exit(&kas.a_lock);
       
  1747 
       
  1748 	/*
       
  1749 	 * kpm segment
       
  1750 	 */
       
  1751 	segmap_kpm = 0;
       
  1752 	if (kpm_desired) {
       
  1753 		kpm_init();
       
  1754 		kpm_enable = 1;
       
  1755 	}
       
  1756 
       
  1757 	/*
       
  1758 	 * Now create segmap segment.
       
  1759 	 */
       
  1760 	rw_enter(&kas.a_lock, RW_WRITER);
       
  1761 	if (seg_attach(&kas, (caddr_t)segkmap_start, segmapsize, segkmap) < 0) {
       
  1762 		panic("cannot attach segkmap");
       
  1763 		/*NOTREACHED*/
       
  1764 	}
       
  1765 	PRM_DEBUG(segkmap);
       
  1766 
       
  1767 	/*
       
  1768 	 * The 64 bit HAT permanently maps only segmap's page tables.
       
  1769 	 * The 32 bit HAT maps the heap's page tables too.
       
  1770 	 */
       
  1771 #if defined(__amd64)
       
  1772 	hat_kmap_init(segkmap_start, segmapsize);
       
  1773 #else /* __i386 */
       
  1774 	ASSERT(segkmap_start + segmapsize == (uintptr_t)final_kernelheap);
       
  1775 	hat_kmap_init(segkmap_start, (uintptr_t)ekernelheap - segkmap_start);
       
  1776 #endif /* __i386 */
       
  1777 
       
  1778 	a.prot = PROT_READ | PROT_WRITE;
       
  1779 	a.shmsize = 0;
       
  1780 	a.nfreelist = segmapfreelists;
       
  1781 
       
  1782 	if (segmap_create(segkmap, (caddr_t)&a) != 0)
       
  1783 		panic("segmap_create segkmap");
       
  1784 	rw_exit(&kas.a_lock);
       
  1785 
       
  1786 	setup_vaddr_for_ppcopy(CPU);
       
  1787 
       
  1788 	segdev_init();
       
  1789 	pmem_init();
       
  1790 	PRM_POINT("startup_vm() done");
       
  1791 }
       
  1792 
       
  1793 static void
       
  1794 startup_end(void)
       
  1795 {
       
  1796 	extern void setx86isalist(void);
       
  1797 
       
  1798 	PRM_POINT("startup_end() starting...");
       
  1799 
       
  1800 	/*
       
  1801 	 * Perform tasks that get done after most of the VM
       
  1802 	 * initialization has been done but before the clock
       
  1803 	 * and other devices get started.
       
  1804 	 */
       
  1805 	kern_setup1();
       
  1806 
       
  1807 	/*
       
  1808 	 * Perform CPC initialization for this CPU.
       
  1809 	 */
       
  1810 	kcpc_hw_init(CPU);
       
  1811 
       
  1812 #if defined(__amd64)
       
  1813 	/*
       
  1814 	 * Validate support for syscall/sysret
       
  1815 	 * XX64 -- include SSE, SSE2, etc. here too?
       
  1816 	 */
       
  1817 	if ((x86_feature & X86_ASYSC) == 0) {
       
  1818 		cmn_err(CE_WARN,
       
  1819 		    "cpu%d does not support syscall/sysret", CPU->cpu_id);
       
  1820 	}
       
  1821 #endif
       
  1822 	/*
       
  1823 	 * Configure the system.
       
  1824 	 */
       
  1825 	PRM_POINT("Calling configure()...");
       
  1826 	configure();		/* set up devices */
       
  1827 	PRM_POINT("configure() done");
       
  1828 
       
  1829 	/*
       
  1830 	 * Set the isa_list string to the defined instruction sets we
       
  1831 	 * support.
       
  1832 	 */
       
  1833 	setx86isalist();
       
  1834 	init_intr_threads(CPU);
       
  1835 	psm_install();
       
  1836 
       
  1837 	/*
       
  1838 	 * We're done with bootops.  We don't unmap the bootstrap yet because
       
  1839 	 * we're still using bootsvcs.
       
  1840 	 */
       
  1841 	PRM_POINT("zeroing out bootops");
       
  1842 	*bootopsp = (struct bootops *)0;
       
  1843 	bootops = (struct bootops *)NULL;
       
  1844 
       
  1845 	PRM_POINT("Enabling interrupts");
       
  1846 	(*picinitf)();
       
  1847 	sti();
       
  1848 
       
  1849 	(void) add_avsoftintr((void *)&softlevel1_hdl, 1, softlevel1,
       
  1850 		"softlevel1", NULL, NULL); /* XXX to be moved later */
       
  1851 
       
  1852 	PRM_POINT("startup_end() done");
       
  1853 }
       
  1854 
       
  1855 extern char hw_serial[];
       
  1856 char *_hs1107 = hw_serial;
       
  1857 ulong_t  _bdhs34;
       
  1858 
       
  1859 void
       
  1860 post_startup(void)
       
  1861 {
       
  1862 	extern void memscrub_init(void);
       
  1863 
       
  1864 	/*
       
  1865 	 * Set the system wide, processor-specific flags to be passed
       
  1866 	 * to userland via the aux vector for performance hints and
       
  1867 	 * instruction set extensions.
       
  1868 	 */
       
  1869 	bind_hwcap();
       
  1870 
       
  1871 	/*
       
  1872 	 * Startup memory scrubber.
       
  1873 	 */
       
  1874 	(void) memscrub_init();
       
  1875 
       
  1876 	/*
       
  1877 	 * Perform forceloading tasks for /etc/system.
       
  1878 	 */
       
  1879 	(void) mod_sysctl(SYS_FORCELOAD, NULL);
       
  1880 
       
  1881 	/*
       
  1882 	 * complete mmu initialization, now that kernel and critical
       
  1883 	 * modules have been loaded.
       
  1884 	 */
       
  1885 	(void) post_startup_mmu_initialization();
       
  1886 
       
  1887 	/*
       
  1888 	 * ON4.0: Force /proc module in until clock interrupt handle fixed
       
  1889 	 * ON4.0: This must be fixed or restated in /etc/systems.
       
  1890 	 */
       
  1891 	(void) modload("fs", "procfs");
       
  1892 
       
  1893 #if defined(__i386)
       
  1894 	/*
       
  1895 	 * Check for required functional Floating Point hardware,
       
  1896 	 * unless FP hardware explicitly disabled.
       
  1897 	 */
       
  1898 	if (fpu_exists && (fpu_pentium_fdivbug || fp_kind == FP_NO))
       
  1899 		halt("No working FP hardware found");
       
  1900 #endif
       
  1901 
       
  1902 	maxmem = freemem;
       
  1903 
       
  1904 	add_cpunode2devtree(CPU->cpu_id, CPU->cpu_m.mcpu_cpi);
       
  1905 
       
  1906 	/*
       
  1907 	 * Perform the formal initialization of the boot chip,
       
  1908 	 * and associate the boot cpu with it.
       
  1909 	 * This must be done after the cpu node for CPU has been
       
  1910 	 * added to the device tree, when the necessary probing to
       
  1911 	 * know the chip type and chip "id" is performed.
       
  1912 	 */
       
  1913 	chip_cpu_init(CPU);
       
  1914 	chip_cpu_assign(CPU);
       
  1915 }
       
  1916 
       
  1917 static int
       
  1918 pp_in_ramdisk(page_t *pp)
       
  1919 {
       
  1920 	extern uint64_t ramdisk_start, ramdisk_end;
       
  1921 
       
  1922 	return ((pp->p_pagenum >= btop(ramdisk_start)) &&
       
  1923 	    (pp->p_pagenum < btopr(ramdisk_end)));
       
  1924 }
       
  1925 
       
  1926 void
       
  1927 release_bootstrap(void)
       
  1928 {
       
  1929 	int root_is_ramdisk;
       
  1930 	pfn_t pfn;
       
  1931 	page_t *pp;
       
  1932 	extern void kobj_boot_unmountroot(void);
       
  1933 	extern dev_t rootdev;
       
  1934 
       
  1935 	/* unmount boot ramdisk and release kmem usage */
       
  1936 	kobj_boot_unmountroot();
       
  1937 
       
  1938 	/*
       
  1939 	 * We're finished using the boot loader so free its pages.
       
  1940 	 */
       
  1941 	PRM_POINT("Unmapping lower boot pages");
       
  1942 	clear_boot_mappings(0, kernelbase);
       
  1943 #if defined(__amd64)
       
  1944 	PRM_POINT("Unmapping upper boot pages");
       
  1945 	clear_boot_mappings(BOOT_DOUBLEMAP_BASE,
       
  1946 	    BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE);
       
  1947 #endif
       
  1948 
       
  1949 	/*
       
  1950 	 * If root isn't on ramdisk, destroy the hardcoded
       
  1951 	 * ramdisk node now and release the memory. Else,
       
  1952 	 * ramdisk memory is kept in rd_pages.
       
  1953 	 */
       
  1954 	root_is_ramdisk = (getmajor(rootdev) == ddi_name_to_major("ramdisk"));
       
  1955 	if (!root_is_ramdisk) {
       
  1956 		dev_info_t *dip = ddi_find_devinfo("ramdisk", -1, 0);
       
  1957 		ASSERT(dip && ddi_get_parent(dip) == ddi_root_node());
       
  1958 		ndi_rele_devi(dip);	/* held from ddi_find_devinfo */
       
  1959 		(void) ddi_remove_child(dip, 0);
       
  1960 	}
       
  1961 
       
  1962 	PRM_POINT("Releasing boot pages");
       
  1963 	while (bootpages) {
       
  1964 		pp = bootpages;
       
  1965 		bootpages = pp->p_next;
       
  1966 		if (root_is_ramdisk && pp_in_ramdisk(pp)) {
       
  1967 			pp->p_next = rd_pages;
       
  1968 			rd_pages = pp;
       
  1969 			continue;
       
  1970 		}
       
  1971 		pp->p_next = (struct page *)0;
       
  1972 		page_free(pp, 1);
       
  1973 	}
       
  1974 
       
  1975 	/*
       
  1976 	 * Find 1 page below 1 MB so that other processors can boot up.
       
  1977 	 * Make sure it has a kernel VA as well as a 1:1 mapping.
       
  1978 	 * We should have just free'd one up.
       
  1979 	 */
       
  1980 	if (use_mp) {
       
  1981 		for (pfn = 1; pfn < btop(1*1024*1024); pfn++) {
       
  1982 			if (page_numtopp_alloc(pfn) == NULL)
       
  1983 				continue;
       
  1984 			rm_platter_va = i86devmap(pfn, 1,
       
  1985 			    PROT_READ | PROT_WRITE | PROT_EXEC);
       
  1986 			rm_platter_pa = ptob(pfn);
       
  1987 			hat_devload(kas.a_hat,
       
  1988 			    (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
       
  1989 			    pfn, PROT_READ | PROT_WRITE | PROT_EXEC,
       
  1990 			    HAT_LOAD_NOCONSIST);
       
  1991 			break;
       
  1992 		}
       
  1993 		if (pfn == btop(1*1024*1024))
       
  1994 			panic("No page available for starting "
       
  1995 			    "other processors");
       
  1996 	}
       
  1997 
       
  1998 #if defined(__amd64)
       
  1999 	PRM_POINT("Returning boot's VA space to kernel heap");
       
  2000 	if (kmem_setaside != NULL)
       
  2001 		vmem_free(heap_arena, kmem_setaside, BOOT_DOUBLEMAP_SIZE);
       
  2002 #endif
       
  2003 }
       
  2004 
       
  2005 /*
       
  2006  * Initialize the platform-specific parts of a page_t.
       
  2007  */
       
  2008 void
       
  2009 add_physmem_cb(page_t *pp, pfn_t pnum)
       
  2010 {
       
  2011 	pp->p_pagenum = pnum;
       
  2012 	pp->p_mapping = NULL;
       
  2013 	pp->p_embed = 0;
       
  2014 	pp->p_share = 0;
       
  2015 	pp->p_mlentry = 0;
       
  2016 }
       
  2017 
       
  2018 /*
       
  2019  * kphysm_init() initializes physical memory.
       
  2020  */
       
  2021 static pgcnt_t
       
  2022 kphysm_init(
       
  2023 	page_t *inpp,
       
  2024 	struct memseg *memsegp,
       
  2025 	pgcnt_t start,
       
  2026 	pgcnt_t npages)
       
  2027 {
       
  2028 	struct memlist	*pmem;
       
  2029 	struct memseg	*cur_memseg;
       
  2030 	struct memseg	**memsegpp;
       
  2031 	pfn_t		base_pfn;
       
  2032 	pgcnt_t		num;
       
  2033 	pgcnt_t		total_skipped = 0;
       
  2034 	pgcnt_t		skipping = 0;
       
  2035 	pgcnt_t		pages_done = 0;
       
  2036 	pgcnt_t		largepgcnt;
       
  2037 	uint64_t	addr;
       
  2038 	uint64_t	size;
       
  2039 	page_t		*pp = inpp;
       
  2040 	int		dobreak = 0;
       
  2041 	extern pfn_t	ddiphysmin;
       
  2042 
       
  2043 	ASSERT(page_hash != NULL && page_hashsz != 0);
       
  2044 
       
  2045 	for (cur_memseg = memsegp; cur_memseg->pages != NULL; cur_memseg++);
       
  2046 	ASSERT(cur_memseg == memsegp || start > 0);
       
  2047 
       
  2048 	for (pmem = phys_avail; pmem && npages; pmem = pmem->next) {
       
  2049 		/*
       
  2050 		 * In a 32 bit kernel can't use higher memory if we're
       
  2051 		 * not booting in PAE mode. This check takes care of that.
       
  2052 		 */
       
  2053 		addr = pmem->address;
       
  2054 		size = pmem->size;
       
  2055 		if (btop(addr) > physmax)
       
  2056 			continue;
       
  2057 
       
  2058 		/*
       
  2059 		 * align addr and size - they may not be at page boundaries
       
  2060 		 */
       
  2061 		if ((addr & MMU_PAGEOFFSET) != 0) {
       
  2062 			addr += MMU_PAGEOFFSET;
       
  2063 			addr &= ~(uint64_t)MMU_PAGEOFFSET;
       
  2064 			size -= addr - pmem->address;
       
  2065 		}
       
  2066 
       
  2067 		/* only process pages below physmax */
       
  2068 		if (btop(addr + size) > physmax)
       
  2069 			size = ptob(physmax - btop(addr));
       
  2070 
       
  2071 		num = btop(size);
       
  2072 		if (num == 0)
       
  2073 			continue;
       
  2074 
       
  2075 		if (total_skipped < start) {
       
  2076 			if (start - total_skipped > num) {
       
  2077 				total_skipped += num;
       
  2078 				continue;
       
  2079 			}
       
  2080 			skipping = start - total_skipped;
       
  2081 			num -= skipping;
       
  2082 			addr += (MMU_PAGESIZE * skipping);
       
  2083 			total_skipped = start;
       
  2084 		}
       
  2085 		if (num == 0)
       
  2086 			continue;
       
  2087 
       
  2088 		if (num > npages)
       
  2089 			num = npages;
       
  2090 
       
  2091 		npages -= num;
       
  2092 		pages_done += num;
       
  2093 		base_pfn = btop(addr);
       
  2094 
       
  2095 		/*
       
  2096 		 * If the caller didn't provide space for the page
       
  2097 		 * structures, carve them out of the memseg they will
       
  2098 		 * represent.
       
  2099 		 */
       
  2100 		if (pp == NULL) {
       
  2101 			pgcnt_t pp_pgs;
       
  2102 
       
  2103 			if (num <= 1)
       
  2104 				continue;
       
  2105 
       
  2106 			/*
       
  2107 			 * Compute how many of the pages we need to use for
       
  2108 			 * page_ts
       
  2109 			 */
       
  2110 			pp_pgs = (num * sizeof (page_t)) / MMU_PAGESIZE + 1;
       
  2111 			while (mmu_ptob(pp_pgs - 1) / sizeof (page_t) >=
       
  2112 			    num - pp_pgs + 1)
       
  2113 				--pp_pgs;
       
  2114 			PRM_DEBUG(pp_pgs);
       
  2115 
       
  2116 			pp = vmem_alloc(heap_arena, mmu_ptob(pp_pgs),
       
  2117 			    VM_NOSLEEP);
       
  2118 			if (pp == NULL) {
       
  2119 				cmn_err(CE_WARN, "Unable to add %ld pages to "
       
  2120 				    "the system.", num);
       
  2121 				continue;
       
  2122 			}
       
  2123 
       
  2124 			hat_devload(kas.a_hat, (void *)pp, mmu_ptob(pp_pgs),
       
  2125 			    base_pfn, PROT_READ | PROT_WRITE | HAT_UNORDERED_OK,
       
  2126 			    HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
       
  2127 			bzero(pp, mmu_ptob(pp_pgs));
       
  2128 			num -= pp_pgs;
       
  2129 			base_pfn += pp_pgs;
       
  2130 		}
       
  2131 
       
  2132 		if (prom_debug)
       
  2133 			prom_printf("MEMSEG addr=0x%" PRIx64
       
  2134 			    " pgs=0x%lx pfn 0x%lx-0x%lx\n",
       
  2135 			    addr, num, base_pfn, base_pfn + num);
       
  2136 
       
  2137 		/*
       
  2138 		 * drop pages below ddiphysmin to simplify ddi memory
       
  2139 		 * allocation with non-zero addr_lo requests.
       
  2140 		 */
       
  2141 		if (base_pfn < ddiphysmin) {
       
  2142 			if (base_pfn + num <= ddiphysmin) {
       
  2143 				/* drop entire range below ddiphysmin */
       
  2144 				continue;
       
  2145 			}
       
  2146 			/* adjust range to ddiphysmin */
       
  2147 			pp += (ddiphysmin - base_pfn);
       
  2148 			num -= (ddiphysmin - base_pfn);
       
  2149 			base_pfn = ddiphysmin;
       
  2150 		}
       
  2151 		/*
       
  2152 		 * Build the memsegs entry
       
  2153 		 */
       
  2154 		cur_memseg->pages = pp;
       
  2155 		cur_memseg->epages = pp + num;
       
  2156 		cur_memseg->pages_base = base_pfn;
       
  2157 		cur_memseg->pages_end = base_pfn + num;
       
  2158 
       
  2159 		/*
       
  2160 		 * insert in memseg list in decreasing pfn range order.
       
  2161 		 * Low memory is typically more fragmented such that this
       
  2162 		 * ordering keeps the larger ranges at the front of the list
       
  2163 		 * for code that searches memseg.
       
  2164 		 */
       
  2165 		memsegpp = &memsegs;
       
  2166 		for (;;) {
       
  2167 			if (*memsegpp == NULL) {
       
  2168 				/* empty memsegs */
       
  2169 				memsegs = cur_memseg;
       
  2170 				break;
       
  2171 			}
       
  2172 			/* check for continuity with start of memsegpp */
       
  2173 			if (cur_memseg->pages_end == (*memsegpp)->pages_base) {
       
  2174 				if (cur_memseg->epages == (*memsegpp)->pages) {
       
  2175 					/*
       
  2176 					 * contiguous pfn and page_t's. Merge
       
  2177 					 * cur_memseg into *memsegpp. Drop
       
  2178 					 * cur_memseg
       
  2179 					 */
       
  2180 					(*memsegpp)->pages_base =
       
  2181 					    cur_memseg->pages_base;
       
  2182 					(*memsegpp)->pages =
       
  2183 					    cur_memseg->pages;
       
  2184 					/*
       
  2185 					 * check if contiguous with the end of
       
  2186 					 * the next memseg.
       
  2187 					 */
       
  2188 					if ((*memsegpp)->next &&
       
  2189 					    ((*memsegpp)->pages_base ==
       
  2190 					    (*memsegpp)->next->pages_end)) {
       
  2191 						cur_memseg = *memsegpp;
       
  2192 						memsegpp = &((*memsegpp)->next);
       
  2193 						dobreak = 1;
       
  2194 					} else {
       
  2195 						break;
       
  2196 					}
       
  2197 				} else {
       
  2198 					/*
       
  2199 					 * contiguous pfn but not page_t's.
       
  2200 					 * drop last pfn/page_t in cur_memseg
       
  2201 					 * to prevent creation of large pages
       
  2202 					 * with noncontiguous page_t's if not
       
  2203 					 * aligned to largest page boundary.
       
  2204 					 */
       
  2205 					largepgcnt = page_get_pagecnt(
       
  2206 					    page_num_pagesizes() - 1);
       
  2207 
       
  2208 					if (cur_memseg->pages_end &
       
  2209 					    (largepgcnt - 1)) {
       
  2210 						num--;
       
  2211 						cur_memseg->epages--;
       
  2212 						cur_memseg->pages_end--;
       
  2213 					}
       
  2214 				}
       
  2215 			}
       
  2216 
       
  2217 			/* check for continuity with end of memsegpp */
       
  2218 			if (cur_memseg->pages_base == (*memsegpp)->pages_end) {
       
  2219 				if (cur_memseg->pages == (*memsegpp)->epages) {
       
  2220 					/*
       
  2221 					 * contiguous pfn and page_t's. Merge
       
  2222 					 * cur_memseg into *memsegpp. Drop
       
  2223 					 * cur_memseg.
       
  2224 					 */
       
  2225 					if (dobreak) {
       
  2226 						/* merge previously done */
       
  2227 						cur_memseg->pages =
       
  2228 						    (*memsegpp)->pages;
       
  2229 						cur_memseg->pages_base =
       
  2230 						    (*memsegpp)->pages_base;
       
  2231 						cur_memseg->next =
       
  2232 						    (*memsegpp)->next;
       
  2233 					} else {
       
  2234 						(*memsegpp)->pages_end =
       
  2235 						    cur_memseg->pages_end;
       
  2236 						(*memsegpp)->epages =
       
  2237 						    cur_memseg->epages;
       
  2238 					}
       
  2239 					break;
       
  2240 				}
       
  2241 				/*
       
  2242 				 * contiguous pfn but not page_t's.
       
  2243 				 * drop first pfn/page_t in cur_memseg
       
  2244 				 * to prevent creation of large pages
       
  2245 				 * with noncontiguous page_t's if not
       
  2246 				 * aligned to largest page boundary.
       
  2247 				 */
       
  2248 				largepgcnt = page_get_pagecnt(
       
  2249 				    page_num_pagesizes() - 1);
       
  2250 				if (base_pfn & (largepgcnt - 1)) {
       
  2251 					num--;
       
  2252 					base_pfn++;
       
  2253 					cur_memseg->pages++;
       
  2254 					cur_memseg->pages_base++;
       
  2255 					pp = cur_memseg->pages;
       
  2256 				}
       
  2257 				if (dobreak)
       
  2258 					break;
       
  2259 			}
       
  2260 
       
  2261 			if (cur_memseg->pages_base >=
       
  2262 			    (*memsegpp)->pages_end) {
       
  2263 				cur_memseg->next = *memsegpp;
       
  2264 				*memsegpp = cur_memseg;
       
  2265 				break;
       
  2266 			}
       
  2267 			if ((*memsegpp)->next == NULL) {
       
  2268 				cur_memseg->next = NULL;
       
  2269 				(*memsegpp)->next = cur_memseg;
       
  2270 				break;
       
  2271 			}
       
  2272 			memsegpp = &((*memsegpp)->next);
       
  2273 			ASSERT(*memsegpp != NULL);
       
  2274 		}
       
  2275 
       
  2276 		/*
       
  2277 		 * add_physmem() initializes the PSM part of the page
       
  2278 		 * struct by calling the PSM back with add_physmem_cb().
       
  2279 		 * In addition it coalesces pages into larger pages as
       
  2280 		 * it initializes them.
       
  2281 		 */
       
  2282 		add_physmem(pp, num, base_pfn);
       
  2283 		cur_memseg++;
       
  2284 		availrmem_initial += num;
       
  2285 		availrmem += num;
       
  2286 
       
  2287 		/*
       
  2288 		 * If the caller provided the page frames to us, then
       
  2289 		 * advance in that list.  Otherwise, prepare to allocate
       
  2290 		 * our own page frames for the next memseg.
       
  2291 		 */
       
  2292 		pp = (inpp == NULL) ? NULL : pp + num;
       
  2293 	}
       
  2294 
       
  2295 	PRM_DEBUG(availrmem_initial);
       
  2296 	PRM_DEBUG(availrmem);
       
  2297 	PRM_DEBUG(freemem);
       
  2298 	build_pfn_hash();
       
  2299 	return (pages_done);
       
  2300 }
       
  2301 
       
  2302 /*
       
  2303  * Kernel VM initialization.
       
  2304  */
       
  2305 static void
       
  2306 kvm_init(void)
       
  2307 {
       
  2308 #ifdef DEBUG
       
  2309 	extern void _start();
       
  2310 
       
  2311 	ASSERT((caddr_t)_start == s_text);
       
  2312 #endif
       
  2313 	ASSERT((((uintptr_t)s_text) & MMU_PAGEOFFSET) == 0);
       
  2314 
       
  2315 	/*
       
  2316 	 * Put the kernel segments in kernel address space.
       
  2317 	 */
       
  2318 	rw_enter(&kas.a_lock, RW_WRITER);
       
  2319 	as_avlinit(&kas);
       
  2320 
       
  2321 	(void) seg_attach(&kas, s_text, e_moddata - s_text, &ktextseg);
       
  2322 	(void) segkmem_create(&ktextseg);
       
  2323 
       
  2324 	(void) seg_attach(&kas, (caddr_t)valloc_base, valloc_sz, &kvalloc);
       
  2325 	(void) segkmem_create(&kvalloc);
       
  2326 
       
  2327 	/*
       
  2328 	 * We're about to map out /boot.  This is the beginning of the
       
  2329 	 * system resource management transition. We can no longer
       
  2330 	 * call into /boot for I/O or memory allocations.
       
  2331 	 *
       
  2332 	 * XX64 - Is this still correct with kernelheap_extend() being called
       
  2333 	 * later than this????
       
  2334 	 */
       
  2335 	(void) seg_attach(&kas, final_kernelheap,
       
  2336 	    ekernelheap - final_kernelheap, &kvseg);
       
  2337 	(void) segkmem_create(&kvseg);
       
  2338 
       
  2339 #if defined(__amd64)
       
  2340 	(void) seg_attach(&kas, (caddr_t)core_base, core_size, &kvseg_core);
       
  2341 	(void) segkmem_create(&kvseg_core);
       
  2342 #endif
       
  2343 
       
  2344 	(void) seg_attach(&kas, (caddr_t)SEGDEBUGBASE, (size_t)SEGDEBUGSIZE,
       
  2345 	    &kdebugseg);
       
  2346 	(void) segkmem_create(&kdebugseg);
       
  2347 
       
  2348 	rw_exit(&kas.a_lock);
       
  2349 
       
  2350 	/*
       
  2351 	 * Ensure that the red zone at kernelbase is never accessible.
       
  2352 	 */
       
  2353 	(void) as_setprot(&kas, (caddr_t)kernelbase, KERNEL_REDZONE_SIZE, 0);
       
  2354 
       
  2355 	/*
       
  2356 	 * Make the text writable so that it can be hot patched by DTrace.
       
  2357 	 */
       
  2358 	(void) as_setprot(&kas, s_text, e_modtext - s_text,
       
  2359 	    PROT_READ | PROT_WRITE | PROT_EXEC);
       
  2360 
       
  2361 	/*
       
  2362 	 * Make data writable until end.
       
  2363 	 */
       
  2364 	(void) as_setprot(&kas, s_data, e_moddata - s_data,
       
  2365 	    PROT_READ | PROT_WRITE | PROT_EXEC);
       
  2366 }
       
  2367 
       
  2368 /*
       
  2369  * These are MTTR registers supported by P6
       
  2370  */
       
  2371 static struct	mtrrvar	mtrrphys_arr[MAX_MTRRVAR];
       
  2372 static uint64_t mtrr64k, mtrr16k1, mtrr16k2;
       
  2373 static uint64_t mtrr4k1, mtrr4k2, mtrr4k3;
       
  2374 static uint64_t mtrr4k4, mtrr4k5, mtrr4k6;
       
  2375 static uint64_t mtrr4k7, mtrr4k8, mtrrcap;
       
  2376 uint64_t mtrrdef, pat_attr_reg;
       
  2377 
       
  2378 /*
       
  2379  * Disable reprogramming of MTRRs by default.
       
  2380  */
       
  2381 int	enable_relaxed_mtrr = 0;
       
  2382 
       
  2383 /*
       
  2384  * These must serve for Pentium, Pentium Pro (P6/Pentium II/Pentium III)
       
  2385  * and Pentium 4, and yes, they are named 0, 1, 2, 4, 3 in ascending
       
  2386  * address order (starting from 0x400).  The Pentium 4 only implements
       
  2387  * 4 sets, and while they are named 0-3 in the doc, the corresponding
       
  2388  * names for P6 are 0,1,2,4.  So define these arrays in address order
       
  2389  * so that they work for both pre-Pentium4 and Pentium 4 processors.
       
  2390  */
       
  2391 
       
  2392 static uint_t	mci_ctl[] = {REG_MC0_CTL, REG_MC1_CTL, REG_MC2_CTL,
       
  2393 		    REG_MC4_CTL, REG_MC3_CTL};
       
  2394 static uint_t	mci_status[] = {REG_MC0_STATUS, REG_MC1_STATUS, REG_MC2_STATUS,
       
  2395 		    REG_MC4_STATUS, REG_MC3_STATUS};
       
  2396 static uint_t	mci_addr[] = {REG_MC0_ADDR, REG_MC1_ADDR, REG_MC2_ADDR,
       
  2397 		    REG_MC4_ADDR, REG_MC3_ADDR};
       
  2398 static int	mca_cnt;
       
  2399 
       
  2400 
       
  2401 void
       
  2402 setup_mca()
       
  2403 {
       
  2404 	int 		i;
       
  2405 	uint64_t	allzeros;
       
  2406 	uint64_t	allones;
       
  2407 	uint64_t	mca_cap;
       
  2408 
       
  2409 	if (!(x86_feature & X86_MCA))
       
  2410 		return;
       
  2411 	(void) rdmsr(REG_MCG_CAP, &mca_cap);
       
  2412 	allones = 0xffffffffffffffffULL;
       
  2413 	if (mca_cap & MCG_CAP_CTL_P)
       
  2414 		(void) wrmsr(REG_MCG_CTL, &allones);
       
  2415 	mca_cnt = mca_cap & MCG_CAP_COUNT_MASK;
       
  2416 	if (mca_cnt > P6_MCG_CAP_COUNT)
       
  2417 		mca_cnt = P6_MCG_CAP_COUNT;
       
  2418 	for (i = 1; i < mca_cnt; i++)
       
  2419 		(void) wrmsr(mci_ctl[i], &allones);
       
  2420 	allzeros = 0;
       
  2421 	for (i = 0; i < mca_cnt; i++)
       
  2422 		(void) wrmsr(mci_status[i], &allzeros);
       
  2423 	setcr4(getcr4() | CR4_MCE);
       
  2424 
       
  2425 }
       
  2426 
       
  2427 int
       
  2428 mca_exception(struct regs *rp)
       
  2429 {
       
  2430 	uint64_t	status, addr;
       
  2431 	uint64_t	allzeros;
       
  2432 	uint64_t	buf;
       
  2433 	int		i, ret = 1, errcode, mserrcode;
       
  2434 
       
  2435 	allzeros = 0;
       
  2436 	(void) rdmsr(REG_MCG_STATUS, &buf);
       
  2437 	status = buf;
       
  2438 	if (status & MCG_STATUS_RIPV)
       
  2439 		ret = 0;
       
  2440 	if (status & MCG_STATUS_EIPV)
       
  2441 		cmn_err(CE_WARN, "MCE at 0x%lx", rp->r_pc);
       
  2442 	(void) wrmsr(REG_MCG_STATUS, &allzeros);
       
  2443 	for (i = 0; i < mca_cnt; i++) {
       
  2444 		(void) rdmsr(mci_status[i], &buf);
       
  2445 		status = buf;
       
  2446 		/*
       
  2447 		 * If status register not valid skip this bank
       
  2448 		 */
       
  2449 		if (!(status & MCI_STATUS_VAL))
       
  2450 			continue;
       
  2451 		errcode = status & MCI_STATUS_ERRCODE;
       
  2452 		mserrcode = (status  >> MSERRCODE_SHFT) & MCI_STATUS_ERRCODE;
       
  2453 		if (status & MCI_STATUS_ADDRV) {
       
  2454 			/*
       
  2455 			 * If mci_addr contains the address where
       
  2456 			 * error occurred, display the address
       
  2457 			 */
       
  2458 			(void) rdmsr(mci_addr[i], &buf);
       
  2459 			addr = buf;
       
  2460 			cmn_err(CE_WARN, "MCE: Bank %d: error code 0x%x:"\
       
  2461 			    "addr = 0x%" PRIx64 ", model errcode = 0x%x", i,
       
  2462 			    errcode, addr, mserrcode);
       
  2463 		} else {
       
  2464 			cmn_err(CE_WARN,
       
  2465 			    "MCE: Bank %d: error code 0x%x, mserrcode = 0x%x",
       
  2466 			    i, errcode, mserrcode);
       
  2467 		}
       
  2468 		(void) wrmsr(mci_status[i], &allzeros);
       
  2469 	}
       
  2470 	return (ret);
       
  2471 }
       
  2472 
       
  2473 void
       
  2474 setup_mtrr()
       
  2475 {
       
  2476 	int i, ecx;
       
  2477 	int vcnt;
       
  2478 	struct	mtrrvar	*mtrrphys;
       
  2479 
       
  2480 	if (!(x86_feature & X86_MTRR))
       
  2481 		return;
       
  2482 
       
  2483 	(void) rdmsr(REG_MTRRCAP, &mtrrcap);
       
  2484 	(void) rdmsr(REG_MTRRDEF, &mtrrdef);
       
  2485 	if (mtrrcap & MTRRCAP_FIX) {
       
  2486 		(void) rdmsr(REG_MTRR64K, &mtrr64k);
       
  2487 		(void) rdmsr(REG_MTRR16K1, &mtrr16k1);
       
  2488 		(void) rdmsr(REG_MTRR16K2, &mtrr16k2);
       
  2489 		(void) rdmsr(REG_MTRR4K1, &mtrr4k1);
       
  2490 		(void) rdmsr(REG_MTRR4K2, &mtrr4k2);
       
  2491 		(void) rdmsr(REG_MTRR4K3, &mtrr4k3);
       
  2492 		(void) rdmsr(REG_MTRR4K4, &mtrr4k4);
       
  2493 		(void) rdmsr(REG_MTRR4K5, &mtrr4k5);
       
  2494 		(void) rdmsr(REG_MTRR4K6, &mtrr4k6);
       
  2495 		(void) rdmsr(REG_MTRR4K7, &mtrr4k7);
       
  2496 		(void) rdmsr(REG_MTRR4K8, &mtrr4k8);
       
  2497 	}
       
  2498 	if ((vcnt = (mtrrcap & MTRRCAP_VCNTMASK)) > MAX_MTRRVAR)
       
  2499 		vcnt = MAX_MTRRVAR;
       
  2500 
       
  2501 	for (i = 0, ecx = REG_MTRRPHYSBASE0, mtrrphys = mtrrphys_arr;
       
  2502 		i <  vcnt - 1; i++, ecx += 2, mtrrphys++) {
       
  2503 		(void) rdmsr(ecx, &mtrrphys->mtrrphys_base);
       
  2504 		(void) rdmsr(ecx + 1, &mtrrphys->mtrrphys_mask);
       
  2505 		if ((x86_feature & X86_PAT) && enable_relaxed_mtrr) {
       
  2506 			mtrrphys->mtrrphys_mask &= ~MTRRPHYSMASK_V;
       
  2507 		}
       
  2508 	}
       
  2509 	if (x86_feature & X86_PAT) {
       
  2510 		if (enable_relaxed_mtrr)
       
  2511 			mtrrdef = MTRR_TYPE_WB|MTRRDEF_FE|MTRRDEF_E;
       
  2512 		pat_attr_reg = PAT_DEFAULT_ATTRIBUTE;
       
  2513 	}
       
  2514 
       
  2515 	mtrr_sync();
       
  2516 }
       
  2517 
       
  2518 /*
       
  2519  * Sync current cpu mtrr with the incore copy of mtrr.
       
  2520  * This function has to be invoked with interrupts disabled
       
  2521  * Currently we do not capture other cpu's. This is invoked on cpu0
       
  2522  * just after reading /etc/system.
       
  2523  * On other cpu's its invoked from mp_startup().
       
  2524  */
       
  2525 void
       
  2526 mtrr_sync()
       
  2527 {
       
  2528 	uint64_t my_mtrrdef;
       
  2529 	uint_t	crvalue, cr0_orig;
       
  2530 	int	vcnt, i, ecx;
       
  2531 	struct	mtrrvar	*mtrrphys;
       
  2532 
       
  2533 	cr0_orig = crvalue = getcr0();
       
  2534 	crvalue |= CR0_CD;
       
  2535 	crvalue &= ~CR0_NW;
       
  2536 	setcr0(crvalue);
       
  2537 	invalidate_cache();
       
  2538 	setcr3(getcr3());
       
  2539 
       
  2540 	if (x86_feature & X86_PAT) {
       
  2541 		(void) wrmsr(REG_MTRRPAT, &pat_attr_reg);
       
  2542 	}
       
  2543 	(void) rdmsr(REG_MTRRDEF, &my_mtrrdef);
       
  2544 	my_mtrrdef &= ~MTRRDEF_E;
       
  2545 	(void) wrmsr(REG_MTRRDEF, &my_mtrrdef);
       
  2546 	if (mtrrcap & MTRRCAP_FIX) {
       
  2547 		(void) wrmsr(REG_MTRR64K, &mtrr64k);
       
  2548 		(void) wrmsr(REG_MTRR16K1, &mtrr16k1);
       
  2549 		(void) wrmsr(REG_MTRR16K2, &mtrr16k2);
       
  2550 		(void) wrmsr(REG_MTRR4K1, &mtrr4k1);
       
  2551 		(void) wrmsr(REG_MTRR4K2, &mtrr4k2);
       
  2552 		(void) wrmsr(REG_MTRR4K3, &mtrr4k3);
       
  2553 		(void) wrmsr(REG_MTRR4K4, &mtrr4k4);
       
  2554 		(void) wrmsr(REG_MTRR4K5, &mtrr4k5);
       
  2555 		(void) wrmsr(REG_MTRR4K6, &mtrr4k6);
       
  2556 		(void) wrmsr(REG_MTRR4K7, &mtrr4k7);
       
  2557 		(void) wrmsr(REG_MTRR4K8, &mtrr4k8);
       
  2558 	}
       
  2559 	if ((vcnt = (mtrrcap & MTRRCAP_VCNTMASK)) > MAX_MTRRVAR)
       
  2560 		vcnt = MAX_MTRRVAR;
       
  2561 	for (i = 0, ecx = REG_MTRRPHYSBASE0, mtrrphys = mtrrphys_arr;
       
  2562 		i <  vcnt - 1; i++, ecx += 2, mtrrphys++) {
       
  2563 		(void) wrmsr(ecx, &mtrrphys->mtrrphys_base);
       
  2564 		(void) wrmsr(ecx + 1, &mtrrphys->mtrrphys_mask);
       
  2565 	}
       
  2566 	(void) wrmsr(REG_MTRRDEF, &mtrrdef);
       
  2567 	setcr3(getcr3());
       
  2568 	invalidate_cache();
       
  2569 	setcr0(cr0_orig);
       
  2570 }
       
  2571 
       
  2572 /*
       
  2573  * resync mtrr so that BIOS is happy. Called from mdboot
       
  2574  */
       
  2575 void
       
  2576 mtrr_resync()
       
  2577 {
       
  2578 	if ((x86_feature & X86_PAT) && enable_relaxed_mtrr) {
       
  2579 		/*
       
  2580 		 * We could have changed the default mtrr definition.
       
  2581 		 * Put it back to uncached which is what it is at power on
       
  2582 		 */
       
  2583 		mtrrdef = MTRR_TYPE_UC|MTRRDEF_FE|MTRRDEF_E;
       
  2584 		mtrr_sync();
       
  2585 	}
       
  2586 }
       
  2587 
       
  2588 void
       
  2589 get_system_configuration()
       
  2590 {
       
  2591 	char	prop[32];
       
  2592 	u_longlong_t nodes_ll, cpus_pernode_ll, lvalue;
       
  2593 
       
  2594 	if (((BOP_GETPROPLEN(bootops, "nodes") > sizeof (prop)) ||
       
  2595 		(BOP_GETPROP(bootops, "nodes", prop) < 0) 	||
       
  2596 		(kobj_getvalue(prop, &nodes_ll) == -1) ||
       
  2597 		(nodes_ll > MAXNODES))			   ||
       
  2598 	    ((BOP_GETPROPLEN(bootops, "cpus_pernode") > sizeof (prop)) ||
       
  2599 		(BOP_GETPROP(bootops, "cpus_pernode", prop) < 0) ||
       
  2600 		(kobj_getvalue(prop, &cpus_pernode_ll) == -1))) {
       
  2601 
       
  2602 		system_hardware.hd_nodes = 1;
       
  2603 		system_hardware.hd_cpus_per_node = 0;
       
  2604 	} else {
       
  2605 		system_hardware.hd_nodes = (int)nodes_ll;
       
  2606 		system_hardware.hd_cpus_per_node = (int)cpus_pernode_ll;
       
  2607 	}
       
  2608 	if ((BOP_GETPROPLEN(bootops, "kernelbase") > sizeof (prop)) ||
       
  2609 		(BOP_GETPROP(bootops, "kernelbase", prop) < 0) 	||
       
  2610 		(kobj_getvalue(prop, &lvalue) == -1))
       
  2611 			eprom_kernelbase = NULL;
       
  2612 	else
       
  2613 			eprom_kernelbase = (uintptr_t)lvalue;
       
  2614 
       
  2615 	if ((BOP_GETPROPLEN(bootops, "segmapsize") > sizeof (prop)) ||
       
  2616 	    (BOP_GETPROP(bootops, "segmapsize", prop) < 0) ||
       
  2617 	    (kobj_getvalue(prop, &lvalue) == -1)) {
       
  2618 		segmapsize = SEGMAPDEFAULT;
       
  2619 	} else {
       
  2620 		segmapsize = (uintptr_t)lvalue;
       
  2621 	}
       
  2622 
       
  2623 	if ((BOP_GETPROPLEN(bootops, "segmapfreelists") > sizeof (prop)) ||
       
  2624 	    (BOP_GETPROP(bootops, "segmapfreelists", prop) < 0) ||
       
  2625 	    (kobj_getvalue(prop, &lvalue) == -1)) {
       
  2626 		segmapfreelists = 0;	/* use segmap driver default */
       
  2627 	} else {
       
  2628 		segmapfreelists = (int)lvalue;
       
  2629 	}
       
  2630 }
       
  2631 
       
  2632 /*
       
  2633  * Add to a memory list.
       
  2634  * start = start of new memory segment
       
  2635  * len = length of new memory segment in bytes
       
  2636  * new = pointer to a new struct memlist
       
  2637  * memlistp = memory list to which to add segment.
       
  2638  */
       
  2639 static void
       
  2640 memlist_add(
       
  2641 	uint64_t start,
       
  2642 	uint64_t len,
       
  2643 	struct memlist *new,
       
  2644 	struct memlist **memlistp)
       
  2645 {
       
  2646 	struct memlist *cur;
       
  2647 	uint64_t end = start + len;
       
  2648 
       
  2649 	new->address = start;
       
  2650 	new->size = len;
       
  2651 
       
  2652 	cur = *memlistp;
       
  2653 
       
  2654 	while (cur) {
       
  2655 		if (cur->address >= end) {
       
  2656 			new->next = cur;
       
  2657 			*memlistp = new;
       
  2658 			new->prev = cur->prev;
       
  2659 			cur->prev = new;
       
  2660 			return;
       
  2661 		}
       
  2662 		ASSERT(cur->address + cur->size <= start);
       
  2663 		if (cur->next == NULL) {
       
  2664 			cur->next = new;
       
  2665 			new->prev = cur;
       
  2666 			new->next = NULL;
       
  2667 			return;
       
  2668 		}
       
  2669 		memlistp = &cur->next;
       
  2670 		cur = cur->next;
       
  2671 	}
       
  2672 }
       
  2673 
       
  2674 void
       
  2675 kobj_vmem_init(vmem_t **text_arena, vmem_t **data_arena)
       
  2676 {
       
  2677 	size_t tsize = e_modtext - modtext;
       
  2678 	size_t dsize = e_moddata - moddata;
       
  2679 
       
  2680 	*text_arena = vmem_create("module_text", tsize ? modtext : NULL, tsize,
       
  2681 	    1, segkmem_alloc, segkmem_free, heaptext_arena, 0, VM_SLEEP);
       
  2682 	*data_arena = vmem_create("module_data", dsize ? moddata : NULL, dsize,
       
  2683 	    1, segkmem_alloc, segkmem_free, heap32_arena, 0, VM_SLEEP);
       
  2684 }
       
  2685 
       
  2686 caddr_t
       
  2687 kobj_text_alloc(vmem_t *arena, size_t size)
       
  2688 {
       
  2689 	return (vmem_alloc(arena, size, VM_SLEEP | VM_BESTFIT));
       
  2690 }
       
  2691 
       
  2692 /*ARGSUSED*/
       
  2693 caddr_t
       
  2694 kobj_texthole_alloc(caddr_t addr, size_t size)
       
  2695 {
       
  2696 	panic("unexpected call to kobj_texthole_alloc()");
       
  2697 	/*NOTREACHED*/
       
  2698 	return (0);
       
  2699 }
       
  2700 
       
  2701 /*ARGSUSED*/
       
  2702 void
       
  2703 kobj_texthole_free(caddr_t addr, size_t size)
       
  2704 {
       
  2705 	panic("unexpected call to kobj_texthole_free()");
       
  2706 }
       
  2707 
       
  2708 /*
       
  2709  * This is called just after configure() in startup().
       
  2710  *
       
  2711  * The ISALIST concept is a bit hopeless on Intel, because
       
  2712  * there's no guarantee of an ever-more-capable processor
       
  2713  * given that various parts of the instruction set may appear
       
  2714  * and disappear between different implementations.
       
  2715  *
       
  2716  * While it would be possible to correct it and even enhance
       
  2717  * it somewhat, the explicit hardware capability bitmask allows
       
  2718  * more flexibility.
       
  2719  *
       
  2720  * So, we just leave this alone.
       
  2721  */
       
  2722 void
       
  2723 setx86isalist(void)
       
  2724 {
       
  2725 	char *tp;
       
  2726 	size_t len;
       
  2727 	extern char *isa_list;
       
  2728 
       
  2729 #define	TBUFSIZE	1024
       
  2730 
       
  2731 	tp = kmem_alloc(TBUFSIZE, KM_SLEEP);
       
  2732 	*tp = '\0';
       
  2733 
       
  2734 #if defined(__amd64)
       
  2735 	(void) strcpy(tp, "amd64 ");
       
  2736 #endif
       
  2737 
       
  2738 	switch (x86_vendor) {
       
  2739 	case X86_VENDOR_Intel:
       
  2740 	case X86_VENDOR_AMD:
       
  2741 	case X86_VENDOR_TM:
       
  2742 		if (x86_feature & X86_CMOV) {
       
  2743 			/*
       
  2744 			 * Pentium Pro or later
       
  2745 			 */
       
  2746 			(void) strcat(tp, "pentium_pro");
       
  2747 			(void) strcat(tp, x86_feature & X86_MMX ?
       
  2748 			    "+mmx pentium_pro " : " ");
       
  2749 		}
       
  2750 		/*FALLTHROUGH*/
       
  2751 	case X86_VENDOR_Cyrix:
       
  2752 		/*
       
  2753 		 * The Cyrix 6x86 does not have any Pentium features
       
  2754 		 * accessible while not at privilege level 0.
       
  2755 		 */
       
  2756 		if (x86_feature & X86_CPUID) {
       
  2757 			(void) strcat(tp, "pentium");
       
  2758 			(void) strcat(tp, x86_feature & X86_MMX ?
       
  2759 			    "+mmx pentium " : " ");
       
  2760 		}
       
  2761 		break;
       
  2762 	default:
       
  2763 		break;
       
  2764 	}
       
  2765 	(void) strcat(tp, "i486 i386 i86");
       
  2766 	len = strlen(tp) + 1;   /* account for NULL at end of string */
       
  2767 	isa_list = strcpy(kmem_alloc(len, KM_SLEEP), tp);
       
  2768 	kmem_free(tp, TBUFSIZE);
       
  2769 
       
  2770 #undef TBUFSIZE
       
  2771 }
       
  2772 
       
  2773 
       
  2774 #ifdef __amd64
       
  2775 
       
  2776 void *
       
  2777 device_arena_alloc(size_t size, int vm_flag)
       
  2778 {
       
  2779 	return (vmem_alloc(device_arena, size, vm_flag));
       
  2780 }
       
  2781 
       
  2782 void
       
  2783 device_arena_free(void *vaddr, size_t size)
       
  2784 {
       
  2785 	vmem_free(device_arena, vaddr, size);
       
  2786 }
       
  2787 
       
  2788 #else
       
  2789 
       
  2790 void *
       
  2791 device_arena_alloc(size_t size, int vm_flag)
       
  2792 {
       
  2793 	caddr_t	vaddr;
       
  2794 	uintptr_t v;
       
  2795 	size_t	start;
       
  2796 	size_t	end;
       
  2797 
       
  2798 	vaddr = vmem_alloc(heap_arena, size, vm_flag);
       
  2799 	if (vaddr == NULL)
       
  2800 		return (NULL);
       
  2801 
       
  2802 	v = (uintptr_t)vaddr;
       
  2803 	ASSERT(v >= kernelbase);
       
  2804 	ASSERT(v + size <= ptable_va);
       
  2805 
       
  2806 	start = btop(v - kernelbase);
       
  2807 	end = btop(v + size - 1 - kernelbase);
       
  2808 	ASSERT(start < toxic_bit_map_len);
       
  2809 	ASSERT(end < toxic_bit_map_len);
       
  2810 
       
  2811 	while (start <= end) {
       
  2812 		BT_ATOMIC_SET(toxic_bit_map, start);
       
  2813 		++start;
       
  2814 	}
       
  2815 	return (vaddr);
       
  2816 }
       
  2817 
       
  2818 void
       
  2819 device_arena_free(void *vaddr, size_t size)
       
  2820 {
       
  2821 	uintptr_t v = (uintptr_t)vaddr;
       
  2822 	size_t	start;
       
  2823 	size_t	end;
       
  2824 
       
  2825 	ASSERT(v >= kernelbase);
       
  2826 	ASSERT(v + size <= ptable_va);
       
  2827 
       
  2828 	start = btop(v - kernelbase);
       
  2829 	end = btop(v + size - 1 - kernelbase);
       
  2830 	ASSERT(start < toxic_bit_map_len);
       
  2831 	ASSERT(end < toxic_bit_map_len);
       
  2832 
       
  2833 	while (start <= end) {
       
  2834 		ASSERT(BT_TEST(toxic_bit_map, start) != 0);
       
  2835 		BT_ATOMIC_CLEAR(toxic_bit_map, start);
       
  2836 		++start;
       
  2837 	}
       
  2838 	vmem_free(heap_arena, vaddr, size);
       
  2839 }
       
  2840 
       
  2841 /*
       
  2842  * returns 1st address in range that is in device arena, or NULL
       
  2843  * if len is not NULL it returns the length of the toxic range
       
  2844  */
       
  2845 void *
       
  2846 device_arena_contains(void *vaddr, size_t size, size_t *len)
       
  2847 {
       
  2848 	uintptr_t v = (uintptr_t)vaddr;
       
  2849 	uintptr_t eaddr = v + size;
       
  2850 	size_t start;
       
  2851 	size_t end;
       
  2852 
       
  2853 	/*
       
  2854 	 * if called very early by kmdb, just return NULL
       
  2855 	 */
       
  2856 	if (toxic_bit_map == NULL)
       
  2857 		return (NULL);
       
  2858 
       
  2859 	/*
       
  2860 	 * First check if we're completely outside the bitmap range.
       
  2861 	 */
       
  2862 	if (v >= ptable_va || eaddr < kernelbase)
       
  2863 		return (NULL);
       
  2864 
       
  2865 	/*
       
  2866 	 * Trim ends of search to look at only what the bitmap covers.
       
  2867 	 */
       
  2868 	if (v < kernelbase)
       
  2869 		v = kernelbase;
       
  2870 	start = btop(v - kernelbase);
       
  2871 	end = btop(eaddr - kernelbase);
       
  2872 	if (end >= toxic_bit_map_len)
       
  2873 		end = toxic_bit_map_len;
       
  2874 
       
  2875 	if (bt_range(toxic_bit_map, &start, &end, end) == 0)
       
  2876 		return (NULL);
       
  2877 
       
  2878 	v = kernelbase + ptob(start);
       
  2879 	if (len != NULL)
       
  2880 		*len = ptob(end - start);
       
  2881 	return ((void *)v);
       
  2882 }
       
  2883 
       
  2884 #endif