|
1 /* |
|
2 * CDDL HEADER START |
|
3 * |
|
4 * The contents of this file are subject to the terms of the |
|
5 * Common Development and Distribution License, Version 1.0 only |
|
6 * (the "License"). You may not use this file except in compliance |
|
7 * with the License. |
|
8 * |
|
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
10 * or http://www.opensolaris.org/os/licensing. |
|
11 * See the License for the specific language governing permissions |
|
12 * and limitations under the License. |
|
13 * |
|
14 * When distributing Covered Code, include this CDDL HEADER in each |
|
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
16 * If applicable, add the following below this CDDL HEADER, with the |
|
17 * fields enclosed by brackets "[]" replaced with your own identifying |
|
18 * information: Portions Copyright [yyyy] [name of copyright owner] |
|
19 * |
|
20 * CDDL HEADER END |
|
21 */ |
|
22 /* |
|
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
|
24 * Use is subject to license terms. |
|
25 */ |
|
26 |
|
27 #pragma ident "%Z%%M% %I% %E% SMI" |
|
28 |
|
29 #include <sys/types.h> |
|
30 #include <sys/t_lock.h> |
|
31 #include <sys/param.h> |
|
32 #include <sys/sysmacros.h> |
|
33 #include <sys/signal.h> |
|
34 #include <sys/systm.h> |
|
35 #include <sys/user.h> |
|
36 #include <sys/mman.h> |
|
37 #include <sys/vm.h> |
|
38 #include <sys/conf.h> |
|
39 #include <sys/avintr.h> |
|
40 #include <sys/autoconf.h> |
|
41 #include <sys/disp.h> |
|
42 #include <sys/class.h> |
|
43 #include <sys/bitmap.h> |
|
44 |
|
45 #include <sys/privregs.h> |
|
46 |
|
47 #include <sys/proc.h> |
|
48 #include <sys/buf.h> |
|
49 #include <sys/kmem.h> |
|
50 #include <sys/kstat.h> |
|
51 |
|
52 #include <sys/reboot.h> |
|
53 #include <sys/uadmin.h> |
|
54 |
|
55 #include <sys/cred.h> |
|
56 #include <sys/vnode.h> |
|
57 #include <sys/file.h> |
|
58 |
|
59 #include <sys/procfs.h> |
|
60 #include <sys/acct.h> |
|
61 |
|
62 #include <sys/vfs.h> |
|
63 #include <sys/dnlc.h> |
|
64 #include <sys/var.h> |
|
65 #include <sys/cmn_err.h> |
|
66 #include <sys/utsname.h> |
|
67 #include <sys/debug.h> |
|
68 #include <sys/kdi.h> |
|
69 |
|
70 #include <sys/dumphdr.h> |
|
71 #include <sys/bootconf.h> |
|
72 #include <sys/varargs.h> |
|
73 #include <sys/promif.h> |
|
74 #include <sys/prom_emul.h> /* for create_prom_prop */ |
|
75 #include <sys/modctl.h> /* for "procfs" hack */ |
|
76 |
|
77 #include <sys/consdev.h> |
|
78 #include <sys/frame.h> |
|
79 |
|
80 #include <sys/sunddi.h> |
|
81 #include <sys/sunndi.h> |
|
82 #include <sys/ndi_impldefs.h> |
|
83 #include <sys/ddidmareq.h> |
|
84 #include <sys/psw.h> |
|
85 #include <sys/regset.h> |
|
86 #include <sys/clock.h> |
|
87 #include <sys/pte.h> |
|
88 #include <sys/mmu.h> |
|
89 #include <sys/tss.h> |
|
90 #include <sys/stack.h> |
|
91 #include <sys/trap.h> |
|
92 #include <sys/pic.h> |
|
93 #include <sys/fp.h> |
|
94 #include <vm/anon.h> |
|
95 #include <vm/as.h> |
|
96 #include <vm/page.h> |
|
97 #include <vm/seg.h> |
|
98 #include <vm/seg_dev.h> |
|
99 #include <vm/seg_kmem.h> |
|
100 #include <vm/seg_kpm.h> |
|
101 #include <vm/seg_map.h> |
|
102 #include <vm/seg_vn.h> |
|
103 #include <vm/seg_kp.h> |
|
104 #include <sys/memnode.h> |
|
105 #include <vm/vm_dep.h> |
|
106 #include <sys/swap.h> |
|
107 #include <sys/thread.h> |
|
108 #include <sys/sysconf.h> |
|
109 #include <sys/vm_machparam.h> |
|
110 #include <sys/archsystm.h> |
|
111 #include <sys/machsystm.h> |
|
112 #include <vm/hat.h> |
|
113 #include <vm/hat_i86.h> |
|
114 #include <sys/pmem.h> |
|
115 #include <sys/instance.h> |
|
116 #include <sys/smp_impldefs.h> |
|
117 #include <sys/x86_archext.h> |
|
118 #include <sys/segments.h> |
|
119 #include <sys/clconf.h> |
|
120 #include <sys/kobj.h> |
|
121 #include <sys/kobj_lex.h> |
|
122 #include <sys/prom_emul.h> |
|
123 #include <sys/cpc_impl.h> |
|
124 #include <sys/chip.h> |
|
125 #include <sys/x86_archext.h> |
|
126 |
|
127 extern void debug_enter(char *); |
|
128 extern void progressbar_init(void); |
|
129 extern void progressbar_start(void); |
|
130 |
|
131 /* |
|
132 * XXX make declaration below "static" when drivers no longer use this |
|
133 * interface. |
|
134 */ |
|
135 extern caddr_t p0_va; /* Virtual address for accessing physical page 0 */ |
|
136 |
|
137 /* |
|
138 * segkp |
|
139 */ |
|
140 extern int segkp_fromheap; |
|
141 |
|
142 static void kvm_init(void); |
|
143 static void startup_init(void); |
|
144 static void startup_memlist(void); |
|
145 static void startup_modules(void); |
|
146 static void startup_bop_gone(void); |
|
147 static void startup_vm(void); |
|
148 static void startup_end(void); |
|
149 |
|
150 /* |
|
151 * Declare these as initialized data so we can patch them. |
|
152 */ |
|
153 pgcnt_t physmem = 0; /* memory size in pages, patch if you want less */ |
|
154 pgcnt_t obp_pages; /* Memory used by PROM for its text and data */ |
|
155 |
|
156 char *kobj_file_buf; |
|
157 int kobj_file_bufsize; /* set in /etc/system */ |
|
158 |
|
159 /* Global variables for MP support. Used in mp_startup */ |
|
160 caddr_t rm_platter_va; |
|
161 uint32_t rm_platter_pa; |
|
162 |
|
163 /* |
|
164 * Some CPUs have holes in the middle of the 64-bit virtual address range. |
|
165 */ |
|
166 uintptr_t hole_start, hole_end; |
|
167 |
|
168 /* |
|
169 * kpm mapping window |
|
170 */ |
|
171 caddr_t kpm_vbase; |
|
172 size_t kpm_size; |
|
173 static int kpm_desired = 0; /* Do we want to try to use segkpm? */ |
|
174 |
|
175 /* |
|
176 * VA range that must be preserved for boot until we release all of its |
|
177 * mappings. |
|
178 */ |
|
179 #if defined(__amd64) |
|
180 static void *kmem_setaside; |
|
181 #endif |
|
182 |
|
183 /* |
|
184 * Configuration parameters set at boot time. |
|
185 */ |
|
186 |
|
187 caddr_t econtig; /* end of first block of contiguous kernel */ |
|
188 |
|
189 struct bootops *bootops = 0; /* passed in from boot */ |
|
190 struct bootops **bootopsp; |
|
191 struct boot_syscalls *sysp; /* passed in from boot */ |
|
192 |
|
193 char bootblock_fstype[16]; |
|
194 |
|
195 char kern_bootargs[OBP_MAXPATHLEN]; |
|
196 |
|
197 /* |
|
198 * new memory fragmentations are possible in startup() due to BOP_ALLOCs. this |
|
199 * depends on number of BOP_ALLOC calls made and requested size, memory size |
|
200 * combination and whether boot.bin memory needs to be freed. |
|
201 */ |
|
202 #define POSS_NEW_FRAGMENTS 12 |
|
203 |
|
204 /* |
|
205 * VM data structures |
|
206 */ |
|
207 long page_hashsz; /* Size of page hash table (power of two) */ |
|
208 struct page *pp_base; /* Base of initial system page struct array */ |
|
209 struct page **page_hash; /* Page hash table */ |
|
210 struct seg ktextseg; /* Segment used for kernel executable image */ |
|
211 struct seg kvalloc; /* Segment used for "valloc" mapping */ |
|
212 struct seg kpseg; /* Segment used for pageable kernel virt mem */ |
|
213 struct seg kmapseg; /* Segment used for generic kernel mappings */ |
|
214 struct seg kdebugseg; /* Segment used for the kernel debugger */ |
|
215 |
|
216 struct seg *segkmap = &kmapseg; /* Kernel generic mapping segment */ |
|
217 struct seg *segkp = &kpseg; /* Pageable kernel virtual memory segment */ |
|
218 |
|
219 #if defined(__amd64) |
|
220 struct seg kvseg_core; /* Segment used for the core heap */ |
|
221 struct seg kpmseg; /* Segment used for physical mapping */ |
|
222 struct seg *segkpm = &kpmseg; /* 64bit kernel physical mapping segment */ |
|
223 #else |
|
224 struct seg *segkpm = NULL; /* Unused on IA32 */ |
|
225 #endif |
|
226 |
|
227 caddr_t segkp_base; /* Base address of segkp */ |
|
228 #if defined(__amd64) |
|
229 pgcnt_t segkpsize = btop(SEGKPDEFSIZE); /* size of segkp segment in pages */ |
|
230 #else |
|
231 pgcnt_t segkpsize = 0; |
|
232 #endif |
|
233 |
|
234 struct memseg *memseg_base; |
|
235 struct vnode unused_pages_vp; |
|
236 |
|
237 #define FOURGB 0x100000000LL |
|
238 |
|
239 struct memlist *memlist; |
|
240 |
|
241 caddr_t s_text; /* start of kernel text segment */ |
|
242 caddr_t e_text; /* end of kernel text segment */ |
|
243 caddr_t s_data; /* start of kernel data segment */ |
|
244 caddr_t e_data; /* end of kernel data segment */ |
|
245 caddr_t modtext; /* start of loadable module text reserved */ |
|
246 caddr_t e_modtext; /* end of loadable module text reserved */ |
|
247 caddr_t moddata; /* start of loadable module data reserved */ |
|
248 caddr_t e_moddata; /* end of loadable module data reserved */ |
|
249 |
|
250 struct memlist *phys_install; /* Total installed physical memory */ |
|
251 struct memlist *phys_avail; /* Total available physical memory */ |
|
252 |
|
253 static void memlist_add(uint64_t, uint64_t, struct memlist *, |
|
254 struct memlist **); |
|
255 |
|
256 /* |
|
257 * kphysm_init returns the number of pages that were processed |
|
258 */ |
|
259 static pgcnt_t kphysm_init(page_t *, struct memseg *, pgcnt_t, pgcnt_t); |
|
260 |
|
261 #define IO_PROP_SIZE 64 /* device property size */ |
|
262 |
|
263 /* |
|
264 * a couple useful roundup macros |
|
265 */ |
|
266 #define ROUND_UP_PAGE(x) \ |
|
267 ((uintptr_t)P2ROUNDUP((uintptr_t)(x), (uintptr_t)MMU_PAGESIZE)) |
|
268 #define ROUND_UP_LPAGE(x) \ |
|
269 ((uintptr_t)P2ROUNDUP((uintptr_t)(x), mmu.level_size[1])) |
|
270 #define ROUND_UP_4MEG(x) \ |
|
271 ((uintptr_t)P2ROUNDUP((uintptr_t)(x), (uintptr_t)FOURMB_PAGESIZE)) |
|
272 #define ROUND_UP_TOPLEVEL(x) \ |
|
273 ((uintptr_t)P2ROUNDUP((uintptr_t)(x), mmu.level_size[mmu.max_level])) |
|
274 |
|
275 /* |
|
276 * 32-bit Kernel's Virtual memory layout. |
|
277 * +-----------------------+ |
|
278 * | psm 1-1 map | |
|
279 * | exec args area | |
|
280 * 0xFFC00000 -|-----------------------|- ARGSBASE |
|
281 * | debugger | |
|
282 * 0xFF800000 -|-----------------------|- SEGDEBUGBASE |
|
283 * | Kernel Data | |
|
284 * 0xFEC00000 -|-----------------------| |
|
285 * | Kernel Text | |
|
286 * 0xFE800000 -|-----------------------|- KERNEL_TEXT |
|
287 * | LUFS sinkhole | |
|
288 * 0xFE000000 -|-----------------------|- lufs_addr |
|
289 * --- -|-----------------------|- valloc_base + valloc_sz |
|
290 * | early pp structures | |
|
291 * | memsegs, memlists, | |
|
292 * | page hash, etc. | |
|
293 * --- -|-----------------------|- valloc_base (floating) |
|
294 * | ptable_va | |
|
295 * 0xFDFFE000 -|-----------------------|- ekernelheap, ptable_va |
|
296 * | | (segkp is an arena under the heap) |
|
297 * | | |
|
298 * | kvseg | |
|
299 * | | |
|
300 * | | |
|
301 * --- -|-----------------------|- kernelheap (floating) |
|
302 * | Segkmap | |
|
303 * 0xC3002000 -|-----------------------|- segkmap_start (floating) |
|
304 * | Red Zone | |
|
305 * 0xC3000000 -|-----------------------|- kernelbase / userlimit (floating) |
|
306 * | | || |
|
307 * | Shared objects | \/ |
|
308 * | | |
|
309 * : : |
|
310 * | user data | |
|
311 * |-----------------------| |
|
312 * | user text | |
|
313 * 0x08048000 -|-----------------------| |
|
314 * | user stack | |
|
315 * : : |
|
316 * | invalid | |
|
317 * 0x00000000 +-----------------------+ |
|
318 * |
|
319 * |
|
320 * 64-bit Kernel's Virtual memory layout. (assuming 64 bit app) |
|
321 * +-----------------------+ |
|
322 * | psm 1-1 map | |
|
323 * | exec args area | |
|
324 * 0xFFFFFFFF.FFC00000 |-----------------------|- ARGSBASE |
|
325 * | debugger (?) | |
|
326 * 0xFFFFFFFF.FF800000 |-----------------------|- SEGDEBUGBASE |
|
327 * | unused | |
|
328 * +-----------------------+ |
|
329 * | Kernel Data | |
|
330 * 0xFFFFFFFF.FBC00000 |-----------------------| |
|
331 * | Kernel Text | |
|
332 * 0xFFFFFFFF.FB800000 |-----------------------|- KERNEL_TEXT |
|
333 * | LUFS sinkhole | |
|
334 * 0xFFFFFFFF.FB000000 -|-----------------------|- lufs_addr |
|
335 * --- |-----------------------|- valloc_base + valloc_sz |
|
336 * | early pp structures | |
|
337 * | memsegs, memlists, | |
|
338 * | page hash, etc. | |
|
339 * --- |-----------------------|- valloc_base |
|
340 * | ptable_va | |
|
341 * --- |-----------------------|- ptable_va |
|
342 * | Core heap | (used for loadable modules) |
|
343 * 0xFFFFFFFF.C0000000 |-----------------------|- core_base / ekernelheap |
|
344 * | Kernel | |
|
345 * | heap | |
|
346 * 0xFFFFFXXX.XXX00000 |-----------------------|- kernelheap (floating) |
|
347 * | segkmap | |
|
348 * 0xFFFFFXXX.XXX00000 |-----------------------|- segkmap_start (floating) |
|
349 * | device mappings | |
|
350 * 0xFFFFFXXX.XXX00000 |-----------------------|- toxic_addr (floating) |
|
351 * | segkp | |
|
352 * --- |-----------------------|- segkp_base |
|
353 * | segkpm | |
|
354 * 0xFFFFFE00.00000000 |-----------------------| |
|
355 * | Red Zone | |
|
356 * 0xFFFFFD80.00000000 |-----------------------|- KERNELBASE |
|
357 * | User stack |- User space memory |
|
358 * | | |
|
359 * | shared objects, etc | (grows downwards) |
|
360 * : : |
|
361 * | | |
|
362 * 0xFFFF8000.00000000 |-----------------------| |
|
363 * | | |
|
364 * | VA Hole / unused | |
|
365 * | | |
|
366 * 0x00008000.00000000 |-----------------------| |
|
367 * | | |
|
368 * | | |
|
369 * : : |
|
370 * | user heap | (grows upwards) |
|
371 * | | |
|
372 * | user data | |
|
373 * |-----------------------| |
|
374 * | user text | |
|
375 * 0x00000000.04000000 |-----------------------| |
|
376 * | invalid | |
|
377 * 0x00000000.00000000 +-----------------------+ |
|
378 * |
|
379 * A 32 bit app on the 64 bit kernel sees the same layout as on the 32 bit |
|
380 * kernel, except that userlimit is raised to 0xfe000000 |
|
381 * |
|
382 * Floating values: |
|
383 * |
|
384 * valloc_base: start of the kernel's memory management/tracking data |
|
385 * structures. This region contains page_t structures for the lowest 4GB |
|
386 * of physical memory, memsegs, memlists, and the page hash. |
|
387 * |
|
388 * core_base: start of the kernel's "core" heap area on 64-bit systems. |
|
389 * This area is intended to be used for global data as well as for module |
|
390 * text/data that does not fit into the nucleus pages. The core heap is |
|
391 * restricted to a 2GB range, allowing every address within it to be |
|
392 * accessed using rip-relative addressing |
|
393 * |
|
394 * ekernelheap: end of kernelheap and start of segmap. |
|
395 * |
|
396 * kernelheap: start of kernel heap. On 32-bit systems, this starts right |
|
397 * above a red zone that separates the user's address space from the |
|
398 * kernel's. On 64-bit systems, it sits above segkp and segkpm. |
|
399 * |
|
400 * segkmap_start: start of segmap. The length of segmap can be modified |
|
401 * by changing segmapsize in /etc/system (preferred) or eeprom (deprecated). |
|
402 * The default length is 16MB on 32-bit systems and 64MB on 64-bit systems. |
|
403 * |
|
404 * kernelbase: On a 32-bit kernel the default value of 0xd4000000 will be |
|
405 * decreased by 2X the size required for page_t. This allows the kernel |
|
406 * heap to grow in size with physical memory. With sizeof(page_t) == 80 |
|
407 * bytes, the following shows the values of kernelbase and kernel heap |
|
408 * sizes for different memory configurations (assuming default segmap and |
|
409 * segkp sizes). |
|
410 * |
|
411 * mem size for kernelbase kernel heap |
|
412 * size page_t's size |
|
413 * ---- --------- ---------- ----------- |
|
414 * 1gb 0x01400000 0xd1800000 684MB |
|
415 * 2gb 0x02800000 0xcf000000 704MB |
|
416 * 4gb 0x05000000 0xca000000 744MB |
|
417 * 6gb 0x07800000 0xc5000000 784MB |
|
418 * 8gb 0x0a000000 0xc0000000 824MB |
|
419 * 16gb 0x14000000 0xac000000 984MB |
|
420 * 32gb 0x28000000 0x84000000 1304MB |
|
421 * 64gb 0x50000000 0x34000000 1944MB (*) |
|
422 * |
|
423 * kernelbase is less than the abi minimum of 0xc0000000 for memory |
|
424 * configurations above 8gb. |
|
425 * |
|
426 * (*) support for memory configurations above 32gb will require manual tuning |
|
427 * of kernelbase to balance out the need of user applications. |
|
428 */ |
|
429 |
|
430 void init_intr_threads(struct cpu *); |
|
431 |
|
432 /* |
|
433 * Dummy spl priority masks |
|
434 */ |
|
435 static unsigned char dummy_cpu_pri[MAXIPL + 1] = { |
|
436 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, |
|
437 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf |
|
438 }; |
|
439 |
|
440 /* real-time-clock initialization parameters */ |
|
441 long gmt_lag; /* offset in seconds of gmt to local time */ |
|
442 extern long process_rtc_config_file(void); |
|
443 |
|
444 char *final_kernelheap; |
|
445 char *boot_kernelheap; |
|
446 uintptr_t kernelbase; |
|
447 uintptr_t eprom_kernelbase; |
|
448 size_t segmapsize; |
|
449 static uintptr_t segmap_reserved; |
|
450 uintptr_t segkmap_start; |
|
451 int segmapfreelists; |
|
452 pgcnt_t boot_npages; |
|
453 pgcnt_t npages; |
|
454 size_t core_size; /* size of "core" heap */ |
|
455 uintptr_t core_base; /* base address of "core" heap */ |
|
456 |
|
457 /* |
|
458 * List of bootstrap pages. We mark these as allocated in startup. |
|
459 * release_bootstrap() will free them when we're completely done with |
|
460 * the bootstrap. |
|
461 */ |
|
462 static page_t *bootpages, *rd_pages; |
|
463 |
|
464 struct system_hardware system_hardware; |
|
465 |
|
466 /* |
|
467 * Enable some debugging messages concerning memory usage... |
|
468 * |
|
469 * XX64 There should only be one print routine once memlist usage between |
|
470 * vmx and the kernel is cleaned up and there is a single memlist structure |
|
471 * shared between kernel and boot. |
|
472 */ |
|
473 static void |
|
474 print_boot_memlist(char *title, struct memlist *mp) |
|
475 { |
|
476 prom_printf("MEMLIST: %s:\n", title); |
|
477 while (mp != NULL) { |
|
478 prom_printf("\tAddress 0x%" PRIx64 ", size 0x%" PRIx64 "\n", |
|
479 mp->address, mp->size); |
|
480 mp = mp->next; |
|
481 } |
|
482 } |
|
483 |
|
484 static void |
|
485 print_kernel_memlist(char *title, struct memlist *mp) |
|
486 { |
|
487 prom_printf("MEMLIST: %s:\n", title); |
|
488 while (mp != NULL) { |
|
489 prom_printf("\tAddress 0x%" PRIx64 ", size 0x%" PRIx64 "\n", |
|
490 mp->address, mp->size); |
|
491 mp = mp->next; |
|
492 } |
|
493 } |
|
494 |
|
495 /* |
|
496 * XX64 need a comment here.. are these just default values, surely |
|
497 * we read the "cpuid" type information to figure this out. |
|
498 */ |
|
499 int l2cache_sz = 0x80000; |
|
500 int l2cache_linesz = 0x40; |
|
501 int l2cache_assoc = 1; |
|
502 |
|
503 /* |
|
504 * on 64 bit we use a predifined VA range for mapping devices in the kernel |
|
505 * on 32 bit the mappings are intermixed in the heap, so we use a bit map |
|
506 */ |
|
507 #ifdef __amd64 |
|
508 |
|
509 vmem_t *device_arena; |
|
510 uintptr_t toxic_addr = (uintptr_t)NULL; |
|
511 size_t toxic_size = 1 * 1024 * 1024 * 1024; /* Sparc uses 1 gig too */ |
|
512 |
|
513 #else /* __i386 */ |
|
514 |
|
515 ulong_t *toxic_bit_map; /* one bit for each 4k of VA in heap_arena */ |
|
516 size_t toxic_bit_map_len = 0; /* in bits */ |
|
517 |
|
518 #endif /* __i386 */ |
|
519 |
|
520 /* |
|
521 * Simple boot time debug facilities |
|
522 */ |
|
523 static char *prm_dbg_str[] = { |
|
524 "%s:%d: '%s' is 0x%x\n", |
|
525 "%s:%d: '%s' is 0x%llx\n" |
|
526 }; |
|
527 |
|
528 int prom_debug; |
|
529 |
|
530 #define PRM_DEBUG(q) if (prom_debug) \ |
|
531 prom_printf(prm_dbg_str[sizeof (q) >> 3], "startup.c", __LINE__, #q, q); |
|
532 #define PRM_POINT(q) if (prom_debug) \ |
|
533 prom_printf("%s:%d: %s\n", "startup.c", __LINE__, q); |
|
534 |
|
535 /* |
|
536 * This structure is used to keep track of the intial allocations |
|
537 * done in startup_memlist(). The value of NUM_ALLOCATIONS needs to |
|
538 * be >= the number of ADD_TO_ALLOCATIONS() executed in the code. |
|
539 */ |
|
540 #define NUM_ALLOCATIONS 7 |
|
541 int num_allocations = 0; |
|
542 struct { |
|
543 void **al_ptr; |
|
544 size_t al_size; |
|
545 } allocations[NUM_ALLOCATIONS]; |
|
546 size_t valloc_sz = 0; |
|
547 uintptr_t valloc_base; |
|
548 extern uintptr_t ptable_va; |
|
549 extern size_t ptable_sz; |
|
550 |
|
551 #define ADD_TO_ALLOCATIONS(ptr, size) { \ |
|
552 size = ROUND_UP_PAGE(size); \ |
|
553 if (num_allocations == NUM_ALLOCATIONS) \ |
|
554 panic("too many ADD_TO_ALLOCATIONS()"); \ |
|
555 allocations[num_allocations].al_ptr = (void**)&ptr; \ |
|
556 allocations[num_allocations].al_size = size; \ |
|
557 valloc_sz += size; \ |
|
558 ++num_allocations; \ |
|
559 } |
|
560 |
|
561 static void |
|
562 perform_allocations(void) |
|
563 { |
|
564 caddr_t mem; |
|
565 int i; |
|
566 |
|
567 mem = BOP_ALLOC(bootops, (caddr_t)valloc_base, valloc_sz, BO_NO_ALIGN); |
|
568 if (mem != (caddr_t)valloc_base) |
|
569 panic("BOP_ALLOC() failed"); |
|
570 bzero(mem, valloc_sz); |
|
571 for (i = 0; i < num_allocations; ++i) { |
|
572 *allocations[i].al_ptr = (void *)mem; |
|
573 mem += allocations[i].al_size; |
|
574 } |
|
575 } |
|
576 |
|
577 /* |
|
578 * Our world looks like this at startup time. |
|
579 * |
|
580 * In a 32-bit OS, boot loads the kernel text at 0xfe800000 and kernel data |
|
581 * at 0xfec00000. On a 64-bit OS, kernel text and data are loaded at |
|
582 * 0xffffffff.fe800000 and 0xffffffff.fec00000 respectively. Those |
|
583 * addresses are fixed in the binary at link time. |
|
584 * |
|
585 * On the text page: |
|
586 * unix/genunix/krtld/module text loads. |
|
587 * |
|
588 * On the data page: |
|
589 * unix/genunix/krtld/module data loads and space for page_t's. |
|
590 */ |
|
591 /* |
|
592 * Machine-dependent startup code |
|
593 */ |
|
594 void |
|
595 startup(void) |
|
596 { |
|
597 extern void startup_bios_disk(); |
|
598 /* |
|
599 * Make sure that nobody tries to use sekpm until we have |
|
600 * initialized it properly. |
|
601 */ |
|
602 #if defined(__amd64) |
|
603 kpm_desired = kpm_enable; |
|
604 #endif |
|
605 kpm_enable = 0; |
|
606 |
|
607 progressbar_init(); |
|
608 startup_init(); |
|
609 startup_memlist(); |
|
610 startup_modules(); |
|
611 startup_bios_disk(); |
|
612 startup_bop_gone(); |
|
613 startup_vm(); |
|
614 startup_end(); |
|
615 progressbar_start(); |
|
616 } |
|
617 |
|
618 static void |
|
619 startup_init() |
|
620 { |
|
621 PRM_POINT("startup_init() starting..."); |
|
622 |
|
623 /* |
|
624 * Complete the extraction of cpuid data |
|
625 */ |
|
626 cpuid_pass2(CPU); |
|
627 |
|
628 (void) check_boot_version(BOP_GETVERSION(bootops)); |
|
629 |
|
630 /* |
|
631 * Check for prom_debug in boot environment |
|
632 */ |
|
633 if (BOP_GETPROPLEN(bootops, "prom_debug") >= 0) { |
|
634 ++prom_debug; |
|
635 PRM_POINT("prom_debug found in boot enviroment"); |
|
636 } |
|
637 |
|
638 /* |
|
639 * Collect node, cpu and memory configuration information. |
|
640 */ |
|
641 get_system_configuration(); |
|
642 |
|
643 /* |
|
644 * Halt if this is an unsupported processor. |
|
645 */ |
|
646 if (x86_type == X86_TYPE_486 || x86_type == X86_TYPE_CYRIX_486) { |
|
647 printf("\n486 processor (\"%s\") detected.\n", |
|
648 CPU->cpu_brandstr); |
|
649 halt("This processor is not supported by this release " |
|
650 "of Solaris."); |
|
651 } |
|
652 |
|
653 /* |
|
654 * Set up dummy values till psm spl code installed |
|
655 */ |
|
656 CPU->cpu_pri_data = dummy_cpu_pri; |
|
657 |
|
658 PRM_POINT("startup_init() done"); |
|
659 } |
|
660 |
|
661 /* |
|
662 * Callback for copy_memlist_filter() to filter nucleus, kadb/kmdb, (ie. |
|
663 * everything mapped above KERNEL_TEXT) pages from phys_avail. Note it |
|
664 * also filters out physical page zero. There is some reliance on the |
|
665 * boot loader allocating only a few contiguous physical memory chunks. |
|
666 */ |
|
667 static void |
|
668 avail_filter(uint64_t *addr, uint64_t *size) |
|
669 { |
|
670 uintptr_t va; |
|
671 uintptr_t next_va; |
|
672 pfn_t pfn; |
|
673 uint64_t pfn_addr; |
|
674 uint64_t pfn_eaddr; |
|
675 uint_t prot; |
|
676 size_t len; |
|
677 uint_t change; |
|
678 |
|
679 if (prom_debug) |
|
680 prom_printf("\tFilter: in: a=%" PRIx64 ", s=%" PRIx64 "\n", |
|
681 *addr, *size); |
|
682 |
|
683 /* |
|
684 * page zero is required for BIOS.. never make it available |
|
685 */ |
|
686 if (*addr == 0) { |
|
687 *addr += MMU_PAGESIZE; |
|
688 *size -= MMU_PAGESIZE; |
|
689 } |
|
690 |
|
691 /* |
|
692 * First we trim from the front of the range. Since hat_boot_probe() |
|
693 * walks ranges in virtual order, but addr/size are physical, we need |
|
694 * to the list until no changes are seen. This deals with the case |
|
695 * where page "p" is mapped at v, page "p + PAGESIZE" is mapped at w |
|
696 * but w < v. |
|
697 */ |
|
698 do { |
|
699 change = 0; |
|
700 for (va = KERNEL_TEXT; |
|
701 *size > 0 && hat_boot_probe(&va, &len, &pfn, &prot) != 0; |
|
702 va = next_va) { |
|
703 |
|
704 next_va = va + len; |
|
705 pfn_addr = ptob((uint64_t)pfn); |
|
706 pfn_eaddr = pfn_addr + len; |
|
707 |
|
708 if (pfn_addr <= *addr && pfn_eaddr > *addr) { |
|
709 change = 1; |
|
710 while (*size > 0 && len > 0) { |
|
711 *addr += MMU_PAGESIZE; |
|
712 *size -= MMU_PAGESIZE; |
|
713 len -= MMU_PAGESIZE; |
|
714 } |
|
715 } |
|
716 } |
|
717 if (change && prom_debug) |
|
718 prom_printf("\t\ttrim: a=%" PRIx64 ", s=%" PRIx64 "\n", |
|
719 *addr, *size); |
|
720 } while (change); |
|
721 |
|
722 /* |
|
723 * Trim pages from the end of the range. |
|
724 */ |
|
725 for (va = KERNEL_TEXT; |
|
726 *size > 0 && hat_boot_probe(&va, &len, &pfn, &prot) != 0; |
|
727 va = next_va) { |
|
728 |
|
729 next_va = va + len; |
|
730 pfn_addr = ptob((uint64_t)pfn); |
|
731 |
|
732 if (pfn_addr >= *addr && pfn_addr < *addr + *size) |
|
733 *size = pfn_addr - *addr; |
|
734 } |
|
735 |
|
736 if (prom_debug) |
|
737 prom_printf("\tFilter out: a=%" PRIx64 ", s=%" PRIx64 "\n", |
|
738 *addr, *size); |
|
739 } |
|
740 |
|
741 static void |
|
742 kpm_init() |
|
743 { |
|
744 struct segkpm_crargs b; |
|
745 uintptr_t start, end; |
|
746 struct memlist *pmem; |
|
747 |
|
748 /* |
|
749 * These variables were all designed for sfmmu in which segkpm is |
|
750 * mapped using a single pagesize - either 8KB or 4MB. On x86, we |
|
751 * might use 2+ page sizes on a single machine, so none of these |
|
752 * variables have a single correct value. They are set up as if we |
|
753 * always use a 4KB pagesize, which should do no harm. In the long |
|
754 * run, we should get rid of KPM's assumption that only a single |
|
755 * pagesize is used. |
|
756 */ |
|
757 kpm_pgshft = MMU_PAGESHIFT; |
|
758 kpm_pgsz = MMU_PAGESIZE; |
|
759 kpm_pgoff = MMU_PAGEOFFSET; |
|
760 kpmp2pshft = 0; |
|
761 kpmpnpgs = 1; |
|
762 ASSERT(((uintptr_t)kpm_vbase & (kpm_pgsz - 1)) == 0); |
|
763 |
|
764 PRM_POINT("about to create segkpm"); |
|
765 rw_enter(&kas.a_lock, RW_WRITER); |
|
766 |
|
767 if (seg_attach(&kas, kpm_vbase, kpm_size, segkpm) < 0) |
|
768 panic("cannot attach segkpm"); |
|
769 |
|
770 b.prot = PROT_READ | PROT_WRITE; |
|
771 b.nvcolors = 1; |
|
772 |
|
773 if (segkpm_create(segkpm, (caddr_t)&b) != 0) |
|
774 panic("segkpm_create segkpm"); |
|
775 |
|
776 rw_exit(&kas.a_lock); |
|
777 |
|
778 /* |
|
779 * Map each of the memsegs into the kpm segment, coalesing adjacent |
|
780 * memsegs to allow mapping with the largest possible pages. |
|
781 */ |
|
782 pmem = phys_install; |
|
783 start = pmem->address; |
|
784 end = start + pmem->size; |
|
785 for (;;) { |
|
786 if (pmem == NULL || pmem->address > end) { |
|
787 hat_devload(kas.a_hat, kpm_vbase + start, |
|
788 end - start, mmu_btop(start), |
|
789 PROT_READ | PROT_WRITE, |
|
790 HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); |
|
791 if (pmem == NULL) |
|
792 break; |
|
793 start = pmem->address; |
|
794 } |
|
795 end = pmem->address + pmem->size; |
|
796 pmem = pmem->next; |
|
797 } |
|
798 } |
|
799 |
|
800 /* |
|
801 * The purpose of startup memlist is to get the system to the |
|
802 * point where it can use kmem_alloc()'s that operate correctly |
|
803 * relying on BOP_ALLOC(). This includes allocating page_ts, |
|
804 * page hash table, vmem initialized, etc. |
|
805 * |
|
806 * Boot's versions of physinstalled and physavail are insufficient for |
|
807 * the kernel's purposes. Specifically we don't know which pages that |
|
808 * are not in physavail can be reclaimed after boot is gone. |
|
809 * |
|
810 * This code solves the problem by dividing the address space |
|
811 * into 3 regions as it takes over the MMU from the booter. |
|
812 * |
|
813 * 1) Any (non-nucleus) pages that are mapped at addresses above KERNEL_TEXT |
|
814 * can not be used by the kernel. |
|
815 * |
|
816 * 2) Any free page that happens to be mapped below kernelbase |
|
817 * is protected until the boot loader is released, but will then be reclaimed. |
|
818 * |
|
819 * 3) Boot shouldn't use any address in the remaining area between kernelbase |
|
820 * and KERNEL_TEXT. |
|
821 * |
|
822 * In the case of multiple mappings to the same page, region 1 has precedence |
|
823 * over region 2. |
|
824 */ |
|
825 static void |
|
826 startup_memlist(void) |
|
827 { |
|
828 size_t memlist_sz; |
|
829 size_t memseg_sz; |
|
830 size_t pagehash_sz; |
|
831 size_t pp_sz; |
|
832 uintptr_t va; |
|
833 size_t len; |
|
834 uint_t prot; |
|
835 pfn_t pfn; |
|
836 int memblocks; |
|
837 caddr_t pagecolor_mem; |
|
838 size_t pagecolor_memsz; |
|
839 caddr_t page_ctrs_mem; |
|
840 size_t page_ctrs_size; |
|
841 struct memlist *current; |
|
842 extern void startup_build_mem_nodes(struct memlist *); |
|
843 |
|
844 /* XX64 fix these - they should be in include files */ |
|
845 extern ulong_t cr4_value; |
|
846 extern size_t page_coloring_init(uint_t, int, int); |
|
847 extern void page_coloring_setup(caddr_t); |
|
848 |
|
849 PRM_POINT("startup_memlist() starting..."); |
|
850 |
|
851 /* |
|
852 * Take the most current snapshot we can by calling mem-update. |
|
853 * For this to work properly, we first have to ask boot for its |
|
854 * end address. |
|
855 */ |
|
856 if (BOP_GETPROPLEN(bootops, "memory-update") == 0) |
|
857 (void) BOP_GETPROP(bootops, "memory-update", NULL); |
|
858 |
|
859 /* |
|
860 * find if the kernel is mapped on a large page |
|
861 */ |
|
862 va = KERNEL_TEXT; |
|
863 if (hat_boot_probe(&va, &len, &pfn, &prot) == 0) |
|
864 panic("Couldn't find kernel text boot mapping"); |
|
865 |
|
866 /* |
|
867 * Use leftover large page nucleus text/data space for loadable modules. |
|
868 * Use at most MODTEXT/MODDATA. |
|
869 */ |
|
870 if (len > MMU_PAGESIZE) { |
|
871 |
|
872 moddata = (caddr_t)ROUND_UP_PAGE(e_data); |
|
873 e_moddata = (caddr_t)ROUND_UP_4MEG(e_data); |
|
874 if (e_moddata - moddata > MODDATA) |
|
875 e_moddata = moddata + MODDATA; |
|
876 |
|
877 modtext = (caddr_t)ROUND_UP_PAGE(e_text); |
|
878 e_modtext = (caddr_t)ROUND_UP_4MEG(e_text); |
|
879 if (e_modtext - modtext > MODTEXT) |
|
880 e_modtext = modtext + MODTEXT; |
|
881 |
|
882 |
|
883 } else { |
|
884 |
|
885 PRM_POINT("Kernel NOT loaded on Large Page!"); |
|
886 e_moddata = moddata = (caddr_t)ROUND_UP_PAGE(e_data); |
|
887 e_modtext = modtext = (caddr_t)ROUND_UP_PAGE(e_text); |
|
888 |
|
889 } |
|
890 econtig = e_moddata; |
|
891 |
|
892 PRM_DEBUG(modtext); |
|
893 PRM_DEBUG(e_modtext); |
|
894 PRM_DEBUG(moddata); |
|
895 PRM_DEBUG(e_moddata); |
|
896 PRM_DEBUG(econtig); |
|
897 |
|
898 /* |
|
899 * For MP machines cr4_value must be set or the non-boot |
|
900 * CPUs will not be able to start. |
|
901 */ |
|
902 if (x86_feature & X86_LARGEPAGE) |
|
903 cr4_value = getcr4(); |
|
904 PRM_DEBUG(cr4_value); |
|
905 |
|
906 /* |
|
907 * Examine the boot loaders physical memory map to find out: |
|
908 * - total memory in system - physinstalled |
|
909 * - the max physical address - physmax |
|
910 * - the number of segments the intsalled memory comes in |
|
911 */ |
|
912 if (prom_debug) |
|
913 print_boot_memlist("boot physinstalled", |
|
914 bootops->boot_mem->physinstalled); |
|
915 installed_top_size(bootops->boot_mem->physinstalled, &physmax, |
|
916 &physinstalled, &memblocks); |
|
917 PRM_DEBUG(physmax); |
|
918 PRM_DEBUG(physinstalled); |
|
919 PRM_DEBUG(memblocks); |
|
920 |
|
921 if (prom_debug) |
|
922 print_boot_memlist("boot physavail", |
|
923 bootops->boot_mem->physavail); |
|
924 |
|
925 /* |
|
926 * Initialize hat's mmu parameters. |
|
927 * Check for enforce-prot-exec in boot environment. It's used to |
|
928 * enable/disable support for the page table entry NX bit. |
|
929 * The default is to enforce PROT_EXEC on processors that support NX. |
|
930 * Boot seems to round up the "len", but 8 seems to be big enough. |
|
931 */ |
|
932 mmu_init(); |
|
933 |
|
934 #ifdef __i386 |
|
935 /* |
|
936 * physmax is lowered if there is more memory than can be |
|
937 * physically addressed in 32 bit (PAE/non-PAE) modes. |
|
938 */ |
|
939 if (mmu.pae_hat) { |
|
940 if (PFN_ABOVE64G(physmax)) { |
|
941 physinstalled -= (physmax - (PFN_64G - 1)); |
|
942 physmax = PFN_64G - 1; |
|
943 } |
|
944 } else { |
|
945 if (PFN_ABOVE4G(physmax)) { |
|
946 physinstalled -= (physmax - (PFN_4G - 1)); |
|
947 physmax = PFN_4G - 1; |
|
948 } |
|
949 } |
|
950 #endif |
|
951 |
|
952 startup_build_mem_nodes(bootops->boot_mem->physinstalled); |
|
953 |
|
954 if (BOP_GETPROPLEN(bootops, "enforce-prot-exec") >= 0) { |
|
955 int len = BOP_GETPROPLEN(bootops, "enforce-prot-exec"); |
|
956 char value[8]; |
|
957 |
|
958 if (len < 8) |
|
959 (void) BOP_GETPROP(bootops, "enforce-prot-exec", value); |
|
960 else |
|
961 (void) strcpy(value, ""); |
|
962 if (strcmp(value, "off") == 0) |
|
963 mmu.pt_nx = 0; |
|
964 } |
|
965 PRM_DEBUG(mmu.pt_nx); |
|
966 |
|
967 /* |
|
968 * We will need page_t's for every page in the system, except for |
|
969 * memory mapped at or above above the start of the kernel text segment. |
|
970 * |
|
971 * pages above e_modtext are attributed to kernel debugger (obp_pages) |
|
972 */ |
|
973 npages = physinstalled - 1; /* avail_filter() skips page 0, so "- 1" */ |
|
974 obp_pages = 0; |
|
975 va = KERNEL_TEXT; |
|
976 while (hat_boot_probe(&va, &len, &pfn, &prot) != 0) { |
|
977 npages -= len >> MMU_PAGESHIFT; |
|
978 if (va >= (uintptr_t)e_moddata) |
|
979 obp_pages += len >> MMU_PAGESHIFT; |
|
980 va += len; |
|
981 } |
|
982 PRM_DEBUG(npages); |
|
983 PRM_DEBUG(obp_pages); |
|
984 |
|
985 /* |
|
986 * If physmem is patched to be non-zero, use it instead of |
|
987 * the computed value unless it is larger than the real |
|
988 * amount of memory on hand. |
|
989 */ |
|
990 if (physmem == 0 || physmem > npages) |
|
991 physmem = npages; |
|
992 else |
|
993 npages = physmem; |
|
994 PRM_DEBUG(physmem); |
|
995 |
|
996 /* |
|
997 * We now compute the sizes of all the initial allocations for |
|
998 * structures the kernel needs in order do kmem_alloc(). These |
|
999 * include: |
|
1000 * memsegs |
|
1001 * memlists |
|
1002 * page hash table |
|
1003 * page_t's |
|
1004 * page coloring data structs |
|
1005 */ |
|
1006 memseg_sz = sizeof (struct memseg) * (memblocks + POSS_NEW_FRAGMENTS); |
|
1007 ADD_TO_ALLOCATIONS(memseg_base, memseg_sz); |
|
1008 PRM_DEBUG(memseg_sz); |
|
1009 |
|
1010 /* |
|
1011 * Reserve space for phys_avail/phys_install memlists. |
|
1012 * There's no real good way to know exactly how much room we'll need, |
|
1013 * but this should be a good upper bound. |
|
1014 */ |
|
1015 memlist_sz = ROUND_UP_PAGE(2 * sizeof (struct memlist) * |
|
1016 (memblocks + POSS_NEW_FRAGMENTS)); |
|
1017 ADD_TO_ALLOCATIONS(memlist, memlist_sz); |
|
1018 PRM_DEBUG(memlist_sz); |
|
1019 |
|
1020 /* |
|
1021 * The page structure hash table size is a power of 2 |
|
1022 * such that the average hash chain length is PAGE_HASHAVELEN. |
|
1023 */ |
|
1024 page_hashsz = npages / PAGE_HASHAVELEN; |
|
1025 page_hashsz = 1 << highbit(page_hashsz); |
|
1026 pagehash_sz = sizeof (struct page *) * page_hashsz; |
|
1027 ADD_TO_ALLOCATIONS(page_hash, pagehash_sz); |
|
1028 PRM_DEBUG(pagehash_sz); |
|
1029 |
|
1030 /* |
|
1031 * Set aside room for the page structures themselves. Note: on |
|
1032 * 64-bit systems we don't allocate page_t's for every page here. |
|
1033 * We just allocate enough to map the lowest 4GB of physical |
|
1034 * memory, minus those pages that are used for the "nucleus" kernel |
|
1035 * text and data. The remaining pages are allocated once we can |
|
1036 * map around boot. |
|
1037 * |
|
1038 * boot_npages is used to allocate an area big enough for our |
|
1039 * initial page_t's. kphym_init may use less than that. |
|
1040 */ |
|
1041 boot_npages = npages; |
|
1042 #if defined(__amd64) |
|
1043 if (npages > mmu_btop(FOURGB - (econtig - s_text))) |
|
1044 boot_npages = mmu_btop(FOURGB - (econtig - s_text)); |
|
1045 #endif |
|
1046 PRM_DEBUG(boot_npages); |
|
1047 pp_sz = sizeof (struct page) * boot_npages; |
|
1048 ADD_TO_ALLOCATIONS(pp_base, pp_sz); |
|
1049 PRM_DEBUG(pp_sz); |
|
1050 |
|
1051 /* |
|
1052 * determine l2 cache info and memory size for page coloring |
|
1053 */ |
|
1054 (void) getl2cacheinfo(CPU, |
|
1055 &l2cache_sz, &l2cache_linesz, &l2cache_assoc); |
|
1056 pagecolor_memsz = |
|
1057 page_coloring_init(l2cache_sz, l2cache_linesz, l2cache_assoc); |
|
1058 ADD_TO_ALLOCATIONS(pagecolor_mem, pagecolor_memsz); |
|
1059 PRM_DEBUG(pagecolor_memsz); |
|
1060 |
|
1061 page_ctrs_size = page_ctrs_sz(); |
|
1062 ADD_TO_ALLOCATIONS(page_ctrs_mem, page_ctrs_size); |
|
1063 PRM_DEBUG(page_ctrs_size); |
|
1064 |
|
1065 /* |
|
1066 * valloc_base will be below kernel text |
|
1067 * The extra pages are for the HAT and kmdb to map page tables. |
|
1068 */ |
|
1069 valloc_sz = ROUND_UP_LPAGE(valloc_sz); |
|
1070 valloc_base = KERNEL_TEXT - valloc_sz; |
|
1071 PRM_DEBUG(valloc_base); |
|
1072 ptable_va = valloc_base - ptable_sz; |
|
1073 |
|
1074 #if defined(__amd64) |
|
1075 if (eprom_kernelbase && eprom_kernelbase != KERNELBASE) |
|
1076 cmn_err(CE_NOTE, "!kernelbase cannot be changed on 64-bit " |
|
1077 "systems."); |
|
1078 kernelbase = (uintptr_t)KERNELBASE; |
|
1079 core_base = (uintptr_t)COREHEAP_BASE; |
|
1080 core_size = ptable_va - core_base; |
|
1081 #else /* __i386 */ |
|
1082 /* |
|
1083 * We configure kernelbase based on: |
|
1084 * |
|
1085 * 1. user specified kernelbase via eeprom command. Value cannot exceed |
|
1086 * KERNELBASE_MAX. we large page align eprom_kernelbase |
|
1087 * |
|
1088 * 2. Default to KERNELBASE and adjust to 2X less the size for page_t. |
|
1089 * On large memory systems we must lower kernelbase to allow |
|
1090 * enough room for page_t's for all of memory. |
|
1091 * |
|
1092 * The value set here, might be changed a little later. |
|
1093 */ |
|
1094 if (eprom_kernelbase) { |
|
1095 kernelbase = eprom_kernelbase & mmu.level_mask[1]; |
|
1096 if (kernelbase > KERNELBASE_MAX) |
|
1097 kernelbase = KERNELBASE_MAX; |
|
1098 } else { |
|
1099 kernelbase = (uintptr_t)KERNELBASE; |
|
1100 kernelbase -= ROUND_UP_4MEG(2 * valloc_sz); |
|
1101 } |
|
1102 ASSERT((kernelbase & mmu.level_offset[1]) == 0); |
|
1103 core_base = ptable_va; |
|
1104 core_size = 0; |
|
1105 #endif |
|
1106 |
|
1107 PRM_DEBUG(kernelbase); |
|
1108 PRM_DEBUG(core_base); |
|
1109 PRM_DEBUG(core_size); |
|
1110 |
|
1111 /* |
|
1112 * At this point, we can only use a portion of the kernelheap that |
|
1113 * will be available after we boot. Both 32-bit and 64-bit systems |
|
1114 * have this limitation, although the reasons are completely |
|
1115 * different. |
|
1116 * |
|
1117 * On 64-bit systems, the booter only supports allocations in the |
|
1118 * upper 4GB of memory, so we have to work with a reduced kernel |
|
1119 * heap until we take over all allocations. The booter also sits |
|
1120 * in the lower portion of that 4GB range, so we have to raise the |
|
1121 * bottom of the heap even further. |
|
1122 * |
|
1123 * On 32-bit systems we have to leave room to place segmap below |
|
1124 * the heap. We don't yet know how large segmap will be, so we |
|
1125 * have to be very conservative. |
|
1126 */ |
|
1127 #if defined(__amd64) |
|
1128 /* |
|
1129 * XX64: For now, we let boot have the lower 2GB of the top 4GB |
|
1130 * address range. In the long run, that should be fixed. It's |
|
1131 * insane for a booter to need 2 2GB address ranges. |
|
1132 */ |
|
1133 boot_kernelheap = (caddr_t)(BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE); |
|
1134 segmap_reserved = 0; |
|
1135 |
|
1136 #else /* __i386 */ |
|
1137 segkp_fromheap = 1; |
|
1138 segmap_reserved = ROUND_UP_LPAGE(MAX(segmapsize, SEGMAPMAX)); |
|
1139 boot_kernelheap = (caddr_t)(ROUND_UP_LPAGE(kernelbase) + |
|
1140 segmap_reserved); |
|
1141 #endif |
|
1142 PRM_DEBUG(boot_kernelheap); |
|
1143 kernelheap = boot_kernelheap; |
|
1144 ekernelheap = (char *)core_base; |
|
1145 |
|
1146 /* |
|
1147 * If segmap is too large we can push the bottom of the kernel heap |
|
1148 * higher than the base. Or worse, it could exceed the top of the |
|
1149 * VA space entirely, causing it to wrap around. |
|
1150 */ |
|
1151 if (kernelheap >= ekernelheap || (uintptr_t)kernelheap < kernelbase) |
|
1152 panic("too little memory available for kernelheap," |
|
1153 " use a different kernelbase"); |
|
1154 |
|
1155 /* |
|
1156 * Now that we know the real value of kernelbase, |
|
1157 * update variables that were initialized with a value of |
|
1158 * KERNELBASE (in common/conf/param.c). |
|
1159 * |
|
1160 * XXX The problem with this sort of hackery is that the |
|
1161 * compiler just may feel like putting the const declarations |
|
1162 * (in param.c) into the .text section. Perhaps they should |
|
1163 * just be declared as variables there? |
|
1164 */ |
|
1165 |
|
1166 #if defined(__amd64) |
|
1167 ASSERT(_kernelbase == KERNELBASE); |
|
1168 ASSERT(_userlimit == USERLIMIT); |
|
1169 /* |
|
1170 * As one final sanity check, verify that the "red zone" between |
|
1171 * kernel and userspace is exactly the size we expected. |
|
1172 */ |
|
1173 ASSERT(_kernelbase == (_userlimit + (2 * 1024 * 1024))); |
|
1174 #else |
|
1175 *(uintptr_t *)&_kernelbase = kernelbase; |
|
1176 *(uintptr_t *)&_userlimit = kernelbase; |
|
1177 *(uintptr_t *)&_userlimit32 = _userlimit; |
|
1178 #endif |
|
1179 PRM_DEBUG(_kernelbase); |
|
1180 PRM_DEBUG(_userlimit); |
|
1181 PRM_DEBUG(_userlimit32); |
|
1182 |
|
1183 /* |
|
1184 * do all the initial allocations |
|
1185 */ |
|
1186 perform_allocations(); |
|
1187 |
|
1188 /* |
|
1189 * Initialize the kernel heap. Note 3rd argument must be > 1st. |
|
1190 */ |
|
1191 kernelheap_init(kernelheap, ekernelheap, kernelheap + MMU_PAGESIZE, |
|
1192 (void *)core_base, (void *)ptable_va); |
|
1193 |
|
1194 /* |
|
1195 * Build phys_install and phys_avail in kernel memspace. |
|
1196 * - phys_install should be all memory in the system. |
|
1197 * - phys_avail is phys_install minus any memory mapped before this |
|
1198 * point above KERNEL_TEXT. |
|
1199 */ |
|
1200 current = phys_install = memlist; |
|
1201 copy_memlist_filter(bootops->boot_mem->physinstalled, ¤t, NULL); |
|
1202 if ((caddr_t)current > (caddr_t)memlist + memlist_sz) |
|
1203 panic("physinstalled was too big!"); |
|
1204 if (prom_debug) |
|
1205 print_kernel_memlist("phys_install", phys_install); |
|
1206 |
|
1207 phys_avail = current; |
|
1208 PRM_POINT("Building phys_avail:\n"); |
|
1209 copy_memlist_filter(bootops->boot_mem->physinstalled, ¤t, |
|
1210 avail_filter); |
|
1211 if ((caddr_t)current > (caddr_t)memlist + memlist_sz) |
|
1212 panic("physavail was too big!"); |
|
1213 if (prom_debug) |
|
1214 print_kernel_memlist("phys_avail", phys_avail); |
|
1215 |
|
1216 /* |
|
1217 * setup page coloring |
|
1218 */ |
|
1219 page_coloring_setup(pagecolor_mem); |
|
1220 page_lock_init(); /* currently a no-op */ |
|
1221 |
|
1222 /* |
|
1223 * free page list counters |
|
1224 */ |
|
1225 (void) page_ctrs_alloc(page_ctrs_mem); |
|
1226 |
|
1227 /* |
|
1228 * Initialize the page structures from the memory lists. |
|
1229 */ |
|
1230 availrmem_initial = availrmem = freemem = 0; |
|
1231 PRM_POINT("Calling kphysm_init()..."); |
|
1232 boot_npages = kphysm_init(pp_base, memseg_base, 0, boot_npages); |
|
1233 PRM_POINT("kphysm_init() done"); |
|
1234 PRM_DEBUG(boot_npages); |
|
1235 |
|
1236 /* |
|
1237 * Now that page_t's have been initialized, remove all the |
|
1238 * initial allocation pages from the kernel free page lists. |
|
1239 */ |
|
1240 boot_mapin((caddr_t)valloc_base, valloc_sz); |
|
1241 |
|
1242 /* |
|
1243 * Initialize kernel memory allocator. |
|
1244 */ |
|
1245 kmem_init(); |
|
1246 |
|
1247 /* |
|
1248 * print this out early so that we know what's going on |
|
1249 */ |
|
1250 cmn_err(CE_CONT, "?features: %b\n", x86_feature, FMT_X86_FEATURE); |
|
1251 |
|
1252 /* |
|
1253 * Initialize bp_mapin(). |
|
1254 */ |
|
1255 bp_init(MMU_PAGESIZE, HAT_STORECACHING_OK); |
|
1256 |
|
1257 #if defined(__i386) |
|
1258 if (eprom_kernelbase && (eprom_kernelbase != kernelbase)) |
|
1259 cmn_err(CE_WARN, "kernelbase value, User specified 0x%lx, " |
|
1260 "System using 0x%lx", |
|
1261 (uintptr_t)eprom_kernelbase, (uintptr_t)kernelbase); |
|
1262 #endif |
|
1263 |
|
1264 #ifdef KERNELBASE_ABI_MIN |
|
1265 if (kernelbase < (uintptr_t)KERNELBASE_ABI_MIN) { |
|
1266 cmn_err(CE_NOTE, "!kernelbase set to 0x%lx, system is not " |
|
1267 "i386 ABI compliant.", (uintptr_t)kernelbase); |
|
1268 } |
|
1269 #endif |
|
1270 |
|
1271 PRM_POINT("startup_memlist() done"); |
|
1272 } |
|
1273 |
|
1274 static void |
|
1275 startup_modules(void) |
|
1276 { |
|
1277 unsigned int i; |
|
1278 extern void impl_setup_ddi(void); |
|
1279 extern void prom_setup(void); |
|
1280 |
|
1281 PRM_POINT("startup_modules() starting..."); |
|
1282 /* |
|
1283 * Initialize ten-micro second timer so that drivers will |
|
1284 * not get short changed in their init phase. This was |
|
1285 * not getting called until clkinit which, on fast cpu's |
|
1286 * caused the drv_usecwait to be way too short. |
|
1287 */ |
|
1288 microfind(); |
|
1289 |
|
1290 /* |
|
1291 * Read the GMT lag from /etc/rtc_config. |
|
1292 */ |
|
1293 gmt_lag = process_rtc_config_file(); |
|
1294 |
|
1295 /* |
|
1296 * Calculate default settings of system parameters based upon |
|
1297 * maxusers, yet allow to be overridden via the /etc/system file. |
|
1298 */ |
|
1299 param_calc(0); |
|
1300 |
|
1301 mod_setup(); |
|
1302 |
|
1303 /* |
|
1304 * Setup machine check architecture on P6 |
|
1305 */ |
|
1306 setup_mca(); |
|
1307 |
|
1308 /* |
|
1309 * Initialize system parameters. |
|
1310 */ |
|
1311 param_init(); |
|
1312 |
|
1313 /* |
|
1314 * maxmem is the amount of physical memory we're playing with. |
|
1315 */ |
|
1316 maxmem = physmem; |
|
1317 |
|
1318 /* |
|
1319 * Initialize the hat layer. |
|
1320 */ |
|
1321 hat_init(); |
|
1322 |
|
1323 /* |
|
1324 * Initialize segment management stuff. |
|
1325 */ |
|
1326 seg_init(); |
|
1327 |
|
1328 if (modload("fs", "specfs") == -1) |
|
1329 halt("Can't load specfs"); |
|
1330 |
|
1331 if (modload("fs", "devfs") == -1) |
|
1332 halt("Can't load devfs"); |
|
1333 |
|
1334 dispinit(); |
|
1335 |
|
1336 /* |
|
1337 * This is needed here to initialize hw_serial[] for cluster booting. |
|
1338 */ |
|
1339 if ((i = modload("misc", "sysinit")) != (unsigned int)-1) |
|
1340 (void) modunload(i); |
|
1341 else |
|
1342 cmn_err(CE_CONT, "sysinit load failed"); |
|
1343 |
|
1344 /* Read cluster configuration data. */ |
|
1345 clconf_init(); |
|
1346 |
|
1347 /* |
|
1348 * Create a kernel device tree. First, create rootnex and |
|
1349 * then invoke bus specific code to probe devices. |
|
1350 */ |
|
1351 setup_ddi(); |
|
1352 impl_setup_ddi(); |
|
1353 /* |
|
1354 * Fake a prom tree such that /dev/openprom continues to work |
|
1355 */ |
|
1356 prom_setup(); |
|
1357 |
|
1358 /* |
|
1359 * Load all platform specific modules |
|
1360 */ |
|
1361 psm_modload(); |
|
1362 |
|
1363 PRM_POINT("startup_modules() done"); |
|
1364 } |
|
1365 |
|
1366 static void |
|
1367 startup_bop_gone(void) |
|
1368 { |
|
1369 PRM_POINT("startup_bop_gone() starting..."); |
|
1370 |
|
1371 /* |
|
1372 * Do final allocations of HAT data structures that need to |
|
1373 * be allocated before quiescing the boot loader. |
|
1374 */ |
|
1375 PRM_POINT("Calling hat_kern_alloc()..."); |
|
1376 hat_kern_alloc(); |
|
1377 PRM_POINT("hat_kern_alloc() done"); |
|
1378 |
|
1379 /* |
|
1380 * Setup MTRR (Memory type range registers) |
|
1381 */ |
|
1382 setup_mtrr(); |
|
1383 PRM_POINT("startup_bop_gone() done"); |
|
1384 } |
|
1385 |
|
1386 /* |
|
1387 * Walk through the pagetables looking for pages mapped in by boot. If the |
|
1388 * setaside flag is set the pages are expected to be returned to the |
|
1389 * kernel later in boot, so we add them to the bootpages list. |
|
1390 */ |
|
1391 static void |
|
1392 protect_boot_range(uintptr_t low, uintptr_t high, int setaside) |
|
1393 { |
|
1394 uintptr_t va = low; |
|
1395 size_t len; |
|
1396 uint_t prot; |
|
1397 pfn_t pfn; |
|
1398 page_t *pp; |
|
1399 pgcnt_t boot_protect_cnt = 0; |
|
1400 |
|
1401 while (hat_boot_probe(&va, &len, &pfn, &prot) != 0 && va < high) { |
|
1402 if (va + len >= high) |
|
1403 panic("0x%lx byte mapping at 0x%p exceeds boot's " |
|
1404 "legal range.", len, (void *)va); |
|
1405 |
|
1406 while (len > 0) { |
|
1407 pp = page_numtopp_alloc(pfn); |
|
1408 if (pp != NULL) { |
|
1409 if (setaside == 0) |
|
1410 panic("Unexpected mapping by boot. " |
|
1411 "addr=%p pfn=%lx\n", |
|
1412 (void *)va, pfn); |
|
1413 |
|
1414 pp->p_next = bootpages; |
|
1415 bootpages = pp; |
|
1416 ++boot_protect_cnt; |
|
1417 } |
|
1418 |
|
1419 ++pfn; |
|
1420 len -= MMU_PAGESIZE; |
|
1421 va += MMU_PAGESIZE; |
|
1422 } |
|
1423 } |
|
1424 PRM_DEBUG(boot_protect_cnt); |
|
1425 } |
|
1426 |
|
1427 static void |
|
1428 startup_vm(void) |
|
1429 { |
|
1430 struct segmap_crargs a; |
|
1431 extern void hat_kern_setup(void); |
|
1432 pgcnt_t pages_left; |
|
1433 |
|
1434 PRM_POINT("startup_vm() starting..."); |
|
1435 |
|
1436 /* |
|
1437 * The next two loops are done in distinct steps in order |
|
1438 * to be sure that any page that is doubly mapped (both above |
|
1439 * KERNEL_TEXT and below kernelbase) is dealt with correctly. |
|
1440 * Note this may never happen, but it might someday. |
|
1441 */ |
|
1442 |
|
1443 bootpages = NULL; |
|
1444 PRM_POINT("Protecting boot pages"); |
|
1445 /* |
|
1446 * Protect any pages mapped above KERNEL_TEXT that somehow have |
|
1447 * page_t's. This can only happen if something weird allocated |
|
1448 * in this range (like kadb/kmdb). |
|
1449 */ |
|
1450 protect_boot_range(KERNEL_TEXT, (uintptr_t)-1, 0); |
|
1451 |
|
1452 /* |
|
1453 * Before we can take over memory allocation/mapping from the boot |
|
1454 * loader we must remove from our free page lists any boot pages that |
|
1455 * will stay mapped until release_bootstrap(). |
|
1456 */ |
|
1457 protect_boot_range(0, kernelbase, 1); |
|
1458 #if defined(__amd64) |
|
1459 protect_boot_range(BOOT_DOUBLEMAP_BASE, |
|
1460 BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE, 0); |
|
1461 #endif |
|
1462 |
|
1463 /* |
|
1464 * Copy in boot's page tables, set up extra page tables for the kernel, |
|
1465 * and switch to the kernel's context. |
|
1466 */ |
|
1467 PRM_POINT("Calling hat_kern_setup()..."); |
|
1468 hat_kern_setup(); |
|
1469 |
|
1470 /* |
|
1471 * It is no longer safe to call BOP_ALLOC(), so make sure we don't. |
|
1472 */ |
|
1473 bootops->bsys_alloc = NULL; |
|
1474 PRM_POINT("hat_kern_setup() done"); |
|
1475 |
|
1476 hat_cpu_online(CPU); |
|
1477 |
|
1478 /* |
|
1479 * Before we call kvm_init(), we need to establish the final size |
|
1480 * of the kernel's heap. So, we need to figure out how much space |
|
1481 * to set aside for segkp, segkpm, and segmap. |
|
1482 */ |
|
1483 final_kernelheap = (caddr_t)ROUND_UP_LPAGE(kernelbase); |
|
1484 #if defined(__amd64) |
|
1485 if (kpm_desired) { |
|
1486 /* |
|
1487 * Segkpm appears at the bottom of the kernel's address |
|
1488 * range. To detect accidental overruns of the user |
|
1489 * address space, we leave a "red zone" of unmapped memory |
|
1490 * between kernelbase and the beginning of segkpm. |
|
1491 */ |
|
1492 kpm_vbase = final_kernelheap + KERNEL_REDZONE_SIZE; |
|
1493 kpm_size = mmu_ptob(physmax); |
|
1494 PRM_DEBUG(kpm_vbase); |
|
1495 PRM_DEBUG(kpm_size); |
|
1496 final_kernelheap = |
|
1497 (caddr_t)ROUND_UP_TOPLEVEL(kpm_vbase + kpm_size); |
|
1498 } |
|
1499 |
|
1500 if (!segkp_fromheap) { |
|
1501 size_t sz = mmu_ptob(segkpsize); |
|
1502 |
|
1503 /* |
|
1504 * determine size of segkp and adjust the bottom of the |
|
1505 * kernel's heap. |
|
1506 */ |
|
1507 if (sz < SEGKPMINSIZE || sz > SEGKPMAXSIZE) { |
|
1508 sz = SEGKPDEFSIZE; |
|
1509 cmn_err(CE_WARN, "!Illegal value for segkpsize. " |
|
1510 "segkpsize has been reset to %ld pages", |
|
1511 mmu_btop(sz)); |
|
1512 } |
|
1513 sz = MIN(sz, MAX(SEGKPMINSIZE, mmu_ptob(physmem))); |
|
1514 |
|
1515 segkpsize = mmu_btop(ROUND_UP_LPAGE(sz)); |
|
1516 segkp_base = final_kernelheap; |
|
1517 PRM_DEBUG(segkpsize); |
|
1518 PRM_DEBUG(segkp_base); |
|
1519 final_kernelheap = segkp_base + mmu_ptob(segkpsize); |
|
1520 PRM_DEBUG(final_kernelheap); |
|
1521 } |
|
1522 |
|
1523 /* |
|
1524 * put the range of VA for device mappings next |
|
1525 */ |
|
1526 toxic_addr = (uintptr_t)final_kernelheap; |
|
1527 PRM_DEBUG(toxic_addr); |
|
1528 final_kernelheap = (char *)toxic_addr + toxic_size; |
|
1529 #endif |
|
1530 PRM_DEBUG(final_kernelheap); |
|
1531 ASSERT(final_kernelheap < boot_kernelheap); |
|
1532 |
|
1533 /* |
|
1534 * Users can change segmapsize through eeprom or /etc/system. |
|
1535 * If the variable is tuned through eeprom, there is no upper |
|
1536 * bound on the size of segmap. If it is tuned through |
|
1537 * /etc/system on 32-bit systems, it must be no larger than we |
|
1538 * planned for in startup_memlist(). |
|
1539 */ |
|
1540 segmapsize = MAX(ROUND_UP_LPAGE(segmapsize), SEGMAPDEFAULT); |
|
1541 segkmap_start = ROUND_UP_LPAGE((uintptr_t)final_kernelheap); |
|
1542 |
|
1543 #if defined(__i386) |
|
1544 if (segmapsize > segmap_reserved) { |
|
1545 cmn_err(CE_NOTE, "!segmapsize may not be set > 0x%lx in " |
|
1546 "/etc/system. Use eeprom.", (long)SEGMAPMAX); |
|
1547 segmapsize = segmap_reserved; |
|
1548 } |
|
1549 /* |
|
1550 * 32-bit systems don't have segkpm or segkp, so segmap appears at |
|
1551 * the bottom of the kernel's address range. Set aside space for a |
|
1552 * red zone just below the start of segmap. |
|
1553 */ |
|
1554 segkmap_start += KERNEL_REDZONE_SIZE; |
|
1555 segmapsize -= KERNEL_REDZONE_SIZE; |
|
1556 #endif |
|
1557 final_kernelheap = (char *)(segkmap_start + segmapsize); |
|
1558 |
|
1559 PRM_DEBUG(segkmap_start); |
|
1560 PRM_DEBUG(segmapsize); |
|
1561 PRM_DEBUG(final_kernelheap); |
|
1562 |
|
1563 /* |
|
1564 * Initialize VM system |
|
1565 */ |
|
1566 PRM_POINT("Calling kvm_init()..."); |
|
1567 kvm_init(); |
|
1568 PRM_POINT("kvm_init() done"); |
|
1569 |
|
1570 /* |
|
1571 * Tell kmdb that the VM system is now working |
|
1572 */ |
|
1573 if (boothowto & RB_DEBUG) |
|
1574 kdi_dvec_vmready(); |
|
1575 |
|
1576 /* |
|
1577 * Mangle the brand string etc. |
|
1578 */ |
|
1579 cpuid_pass3(CPU); |
|
1580 |
|
1581 PRM_DEBUG(final_kernelheap); |
|
1582 |
|
1583 /* |
|
1584 * Now that we can use memory outside the top 4GB (on 64-bit |
|
1585 * systems) and we know the size of segmap, we can set the final |
|
1586 * size of the kernel's heap. Note: on 64-bit systems we still |
|
1587 * can't touch anything in the bottom half of the top 4GB range |
|
1588 * because boot still has pages mapped there. |
|
1589 */ |
|
1590 if (final_kernelheap < boot_kernelheap) { |
|
1591 kernelheap_extend(final_kernelheap, boot_kernelheap); |
|
1592 #if defined(__amd64) |
|
1593 kmem_setaside = vmem_xalloc(heap_arena, BOOT_DOUBLEMAP_SIZE, |
|
1594 MMU_PAGESIZE, 0, 0, (void *)(BOOT_DOUBLEMAP_BASE), |
|
1595 (void *)(BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE), |
|
1596 VM_NOSLEEP | VM_BESTFIT | VM_PANIC); |
|
1597 PRM_DEBUG(kmem_setaside); |
|
1598 if (kmem_setaside == NULL) |
|
1599 panic("Could not protect boot's memory"); |
|
1600 #endif |
|
1601 } |
|
1602 /* |
|
1603 * Now that the kernel heap may have grown significantly, we need |
|
1604 * to make all the remaining page_t's available to back that memory. |
|
1605 * |
|
1606 * XX64 this should probably wait till after release boot-strap too. |
|
1607 */ |
|
1608 pages_left = npages - boot_npages; |
|
1609 if (pages_left > 0) { |
|
1610 PRM_DEBUG(pages_left); |
|
1611 (void) kphysm_init(NULL, memseg_base, boot_npages, pages_left); |
|
1612 } |
|
1613 |
|
1614 #if defined(__amd64) |
|
1615 |
|
1616 /* |
|
1617 * Create the device arena for toxic (to dtrace/kmdb) mappings. |
|
1618 */ |
|
1619 device_arena = vmem_create("device", (void *)toxic_addr, |
|
1620 toxic_size, MMU_PAGESIZE, NULL, NULL, NULL, 0, VM_SLEEP); |
|
1621 |
|
1622 #else /* __i386 */ |
|
1623 |
|
1624 /* |
|
1625 * allocate the bit map that tracks toxic pages |
|
1626 */ |
|
1627 toxic_bit_map_len = btop((ulong_t)(ptable_va - kernelbase)); |
|
1628 PRM_DEBUG(toxic_bit_map_len); |
|
1629 toxic_bit_map = |
|
1630 kmem_zalloc(BT_SIZEOFMAP(toxic_bit_map_len), KM_NOSLEEP); |
|
1631 ASSERT(toxic_bit_map != NULL); |
|
1632 PRM_DEBUG(toxic_bit_map); |
|
1633 |
|
1634 #endif /* __i386 */ |
|
1635 |
|
1636 |
|
1637 /* |
|
1638 * Now that we've got more VA, as well as the ability to allocate from |
|
1639 * it, tell the debugger. |
|
1640 */ |
|
1641 if (boothowto & RB_DEBUG) |
|
1642 kdi_dvec_memavail(); |
|
1643 |
|
1644 /* |
|
1645 * The following code installs a special page fault handler (#pf) |
|
1646 * to work around a pentium bug. |
|
1647 */ |
|
1648 #if !defined(__amd64) |
|
1649 if (x86_type == X86_TYPE_P5) { |
|
1650 gate_desc_t *newidt; |
|
1651 desctbr_t newidt_r; |
|
1652 |
|
1653 if ((newidt = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP)) == NULL) |
|
1654 panic("failed to install pentium_pftrap"); |
|
1655 |
|
1656 bcopy(idt0, newidt, sizeof (idt0)); |
|
1657 set_gatesegd(&newidt[T_PGFLT], &pentium_pftrap, |
|
1658 KCS_SEL, 0, SDT_SYSIGT, SEL_KPL); |
|
1659 |
|
1660 (void) as_setprot(&kas, (caddr_t)newidt, MMU_PAGESIZE, |
|
1661 PROT_READ|PROT_EXEC); |
|
1662 |
|
1663 newidt_r.dtr_limit = sizeof (idt0) - 1; |
|
1664 newidt_r.dtr_base = (uintptr_t)newidt; |
|
1665 CPU->cpu_idt = newidt; |
|
1666 wr_idtr(&newidt_r); |
|
1667 } |
|
1668 #endif /* !__amd64 */ |
|
1669 |
|
1670 /* |
|
1671 * Map page pfn=0 for drivers, such as kd, that need to pick up |
|
1672 * parameters left there by controllers/BIOS. |
|
1673 */ |
|
1674 PRM_POINT("setup up p0_va"); |
|
1675 p0_va = i86devmap(0, 1, PROT_READ); |
|
1676 PRM_DEBUG(p0_va); |
|
1677 |
|
1678 /* |
|
1679 * If the following is true, someone has patched phsymem to be less |
|
1680 * than the number of pages that the system actually has. Remove |
|
1681 * pages until system memory is limited to the requested amount. |
|
1682 * Since we have allocated page structures for all pages, we |
|
1683 * correct the amount of memory we want to remove by the size of |
|
1684 * the memory used to hold page structures for the non-used pages. |
|
1685 */ |
|
1686 if (physmem < npages) { |
|
1687 uint_t diff; |
|
1688 offset_t off; |
|
1689 struct page *pp; |
|
1690 caddr_t rand_vaddr; |
|
1691 struct seg kseg; |
|
1692 |
|
1693 cmn_err(CE_WARN, "limiting physmem to %lu pages", physmem); |
|
1694 |
|
1695 off = 0; |
|
1696 diff = npages - physmem; |
|
1697 diff -= mmu_btopr(diff * sizeof (struct page)); |
|
1698 kseg.s_as = &kas; |
|
1699 while (diff--) { |
|
1700 rand_vaddr = (caddr_t) |
|
1701 (((uintptr_t)&unused_pages_vp >> 7) ^ |
|
1702 (uintptr_t)((u_offset_t)off >> MMU_PAGESHIFT)); |
|
1703 pp = page_create_va(&unused_pages_vp, off, MMU_PAGESIZE, |
|
1704 PG_WAIT | PG_EXCL, &kseg, rand_vaddr); |
|
1705 if (pp == NULL) { |
|
1706 panic("limited physmem too much!"); |
|
1707 /*NOTREACHED*/ |
|
1708 } |
|
1709 page_io_unlock(pp); |
|
1710 page_downgrade(pp); |
|
1711 availrmem--; |
|
1712 off += MMU_PAGESIZE; |
|
1713 } |
|
1714 } |
|
1715 |
|
1716 cmn_err(CE_CONT, "?mem = %luK (0x%lx)\n", |
|
1717 physinstalled << (MMU_PAGESHIFT - 10), ptob(physinstalled)); |
|
1718 |
|
1719 PRM_POINT("Calling hat_init_finish()..."); |
|
1720 hat_init_finish(); |
|
1721 PRM_POINT("hat_init_finish() done"); |
|
1722 |
|
1723 /* |
|
1724 * Initialize the segkp segment type. |
|
1725 */ |
|
1726 rw_enter(&kas.a_lock, RW_WRITER); |
|
1727 if (!segkp_fromheap) { |
|
1728 if (seg_attach(&kas, (caddr_t)segkp_base, mmu_ptob(segkpsize), |
|
1729 segkp) < 0) { |
|
1730 panic("startup: cannot attach segkp"); |
|
1731 /*NOTREACHED*/ |
|
1732 } |
|
1733 } else { |
|
1734 /* |
|
1735 * For 32 bit x86 systems, we will have segkp under the heap. |
|
1736 * There will not be a segkp segment. We do, however, need |
|
1737 * to fill in the seg structure. |
|
1738 */ |
|
1739 segkp->s_as = &kas; |
|
1740 } |
|
1741 if (segkp_create(segkp) != 0) { |
|
1742 panic("startup: segkp_create failed"); |
|
1743 /*NOTREACHED*/ |
|
1744 } |
|
1745 PRM_DEBUG(segkp); |
|
1746 rw_exit(&kas.a_lock); |
|
1747 |
|
1748 /* |
|
1749 * kpm segment |
|
1750 */ |
|
1751 segmap_kpm = 0; |
|
1752 if (kpm_desired) { |
|
1753 kpm_init(); |
|
1754 kpm_enable = 1; |
|
1755 } |
|
1756 |
|
1757 /* |
|
1758 * Now create segmap segment. |
|
1759 */ |
|
1760 rw_enter(&kas.a_lock, RW_WRITER); |
|
1761 if (seg_attach(&kas, (caddr_t)segkmap_start, segmapsize, segkmap) < 0) { |
|
1762 panic("cannot attach segkmap"); |
|
1763 /*NOTREACHED*/ |
|
1764 } |
|
1765 PRM_DEBUG(segkmap); |
|
1766 |
|
1767 /* |
|
1768 * The 64 bit HAT permanently maps only segmap's page tables. |
|
1769 * The 32 bit HAT maps the heap's page tables too. |
|
1770 */ |
|
1771 #if defined(__amd64) |
|
1772 hat_kmap_init(segkmap_start, segmapsize); |
|
1773 #else /* __i386 */ |
|
1774 ASSERT(segkmap_start + segmapsize == (uintptr_t)final_kernelheap); |
|
1775 hat_kmap_init(segkmap_start, (uintptr_t)ekernelheap - segkmap_start); |
|
1776 #endif /* __i386 */ |
|
1777 |
|
1778 a.prot = PROT_READ | PROT_WRITE; |
|
1779 a.shmsize = 0; |
|
1780 a.nfreelist = segmapfreelists; |
|
1781 |
|
1782 if (segmap_create(segkmap, (caddr_t)&a) != 0) |
|
1783 panic("segmap_create segkmap"); |
|
1784 rw_exit(&kas.a_lock); |
|
1785 |
|
1786 setup_vaddr_for_ppcopy(CPU); |
|
1787 |
|
1788 segdev_init(); |
|
1789 pmem_init(); |
|
1790 PRM_POINT("startup_vm() done"); |
|
1791 } |
|
1792 |
|
1793 static void |
|
1794 startup_end(void) |
|
1795 { |
|
1796 extern void setx86isalist(void); |
|
1797 |
|
1798 PRM_POINT("startup_end() starting..."); |
|
1799 |
|
1800 /* |
|
1801 * Perform tasks that get done after most of the VM |
|
1802 * initialization has been done but before the clock |
|
1803 * and other devices get started. |
|
1804 */ |
|
1805 kern_setup1(); |
|
1806 |
|
1807 /* |
|
1808 * Perform CPC initialization for this CPU. |
|
1809 */ |
|
1810 kcpc_hw_init(CPU); |
|
1811 |
|
1812 #if defined(__amd64) |
|
1813 /* |
|
1814 * Validate support for syscall/sysret |
|
1815 * XX64 -- include SSE, SSE2, etc. here too? |
|
1816 */ |
|
1817 if ((x86_feature & X86_ASYSC) == 0) { |
|
1818 cmn_err(CE_WARN, |
|
1819 "cpu%d does not support syscall/sysret", CPU->cpu_id); |
|
1820 } |
|
1821 #endif |
|
1822 /* |
|
1823 * Configure the system. |
|
1824 */ |
|
1825 PRM_POINT("Calling configure()..."); |
|
1826 configure(); /* set up devices */ |
|
1827 PRM_POINT("configure() done"); |
|
1828 |
|
1829 /* |
|
1830 * Set the isa_list string to the defined instruction sets we |
|
1831 * support. |
|
1832 */ |
|
1833 setx86isalist(); |
|
1834 init_intr_threads(CPU); |
|
1835 psm_install(); |
|
1836 |
|
1837 /* |
|
1838 * We're done with bootops. We don't unmap the bootstrap yet because |
|
1839 * we're still using bootsvcs. |
|
1840 */ |
|
1841 PRM_POINT("zeroing out bootops"); |
|
1842 *bootopsp = (struct bootops *)0; |
|
1843 bootops = (struct bootops *)NULL; |
|
1844 |
|
1845 PRM_POINT("Enabling interrupts"); |
|
1846 (*picinitf)(); |
|
1847 sti(); |
|
1848 |
|
1849 (void) add_avsoftintr((void *)&softlevel1_hdl, 1, softlevel1, |
|
1850 "softlevel1", NULL, NULL); /* XXX to be moved later */ |
|
1851 |
|
1852 PRM_POINT("startup_end() done"); |
|
1853 } |
|
1854 |
|
1855 extern char hw_serial[]; |
|
1856 char *_hs1107 = hw_serial; |
|
1857 ulong_t _bdhs34; |
|
1858 |
|
1859 void |
|
1860 post_startup(void) |
|
1861 { |
|
1862 extern void memscrub_init(void); |
|
1863 |
|
1864 /* |
|
1865 * Set the system wide, processor-specific flags to be passed |
|
1866 * to userland via the aux vector for performance hints and |
|
1867 * instruction set extensions. |
|
1868 */ |
|
1869 bind_hwcap(); |
|
1870 |
|
1871 /* |
|
1872 * Startup memory scrubber. |
|
1873 */ |
|
1874 (void) memscrub_init(); |
|
1875 |
|
1876 /* |
|
1877 * Perform forceloading tasks for /etc/system. |
|
1878 */ |
|
1879 (void) mod_sysctl(SYS_FORCELOAD, NULL); |
|
1880 |
|
1881 /* |
|
1882 * complete mmu initialization, now that kernel and critical |
|
1883 * modules have been loaded. |
|
1884 */ |
|
1885 (void) post_startup_mmu_initialization(); |
|
1886 |
|
1887 /* |
|
1888 * ON4.0: Force /proc module in until clock interrupt handle fixed |
|
1889 * ON4.0: This must be fixed or restated in /etc/systems. |
|
1890 */ |
|
1891 (void) modload("fs", "procfs"); |
|
1892 |
|
1893 #if defined(__i386) |
|
1894 /* |
|
1895 * Check for required functional Floating Point hardware, |
|
1896 * unless FP hardware explicitly disabled. |
|
1897 */ |
|
1898 if (fpu_exists && (fpu_pentium_fdivbug || fp_kind == FP_NO)) |
|
1899 halt("No working FP hardware found"); |
|
1900 #endif |
|
1901 |
|
1902 maxmem = freemem; |
|
1903 |
|
1904 add_cpunode2devtree(CPU->cpu_id, CPU->cpu_m.mcpu_cpi); |
|
1905 |
|
1906 /* |
|
1907 * Perform the formal initialization of the boot chip, |
|
1908 * and associate the boot cpu with it. |
|
1909 * This must be done after the cpu node for CPU has been |
|
1910 * added to the device tree, when the necessary probing to |
|
1911 * know the chip type and chip "id" is performed. |
|
1912 */ |
|
1913 chip_cpu_init(CPU); |
|
1914 chip_cpu_assign(CPU); |
|
1915 } |
|
1916 |
|
1917 static int |
|
1918 pp_in_ramdisk(page_t *pp) |
|
1919 { |
|
1920 extern uint64_t ramdisk_start, ramdisk_end; |
|
1921 |
|
1922 return ((pp->p_pagenum >= btop(ramdisk_start)) && |
|
1923 (pp->p_pagenum < btopr(ramdisk_end))); |
|
1924 } |
|
1925 |
|
1926 void |
|
1927 release_bootstrap(void) |
|
1928 { |
|
1929 int root_is_ramdisk; |
|
1930 pfn_t pfn; |
|
1931 page_t *pp; |
|
1932 extern void kobj_boot_unmountroot(void); |
|
1933 extern dev_t rootdev; |
|
1934 |
|
1935 /* unmount boot ramdisk and release kmem usage */ |
|
1936 kobj_boot_unmountroot(); |
|
1937 |
|
1938 /* |
|
1939 * We're finished using the boot loader so free its pages. |
|
1940 */ |
|
1941 PRM_POINT("Unmapping lower boot pages"); |
|
1942 clear_boot_mappings(0, kernelbase); |
|
1943 #if defined(__amd64) |
|
1944 PRM_POINT("Unmapping upper boot pages"); |
|
1945 clear_boot_mappings(BOOT_DOUBLEMAP_BASE, |
|
1946 BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE); |
|
1947 #endif |
|
1948 |
|
1949 /* |
|
1950 * If root isn't on ramdisk, destroy the hardcoded |
|
1951 * ramdisk node now and release the memory. Else, |
|
1952 * ramdisk memory is kept in rd_pages. |
|
1953 */ |
|
1954 root_is_ramdisk = (getmajor(rootdev) == ddi_name_to_major("ramdisk")); |
|
1955 if (!root_is_ramdisk) { |
|
1956 dev_info_t *dip = ddi_find_devinfo("ramdisk", -1, 0); |
|
1957 ASSERT(dip && ddi_get_parent(dip) == ddi_root_node()); |
|
1958 ndi_rele_devi(dip); /* held from ddi_find_devinfo */ |
|
1959 (void) ddi_remove_child(dip, 0); |
|
1960 } |
|
1961 |
|
1962 PRM_POINT("Releasing boot pages"); |
|
1963 while (bootpages) { |
|
1964 pp = bootpages; |
|
1965 bootpages = pp->p_next; |
|
1966 if (root_is_ramdisk && pp_in_ramdisk(pp)) { |
|
1967 pp->p_next = rd_pages; |
|
1968 rd_pages = pp; |
|
1969 continue; |
|
1970 } |
|
1971 pp->p_next = (struct page *)0; |
|
1972 page_free(pp, 1); |
|
1973 } |
|
1974 |
|
1975 /* |
|
1976 * Find 1 page below 1 MB so that other processors can boot up. |
|
1977 * Make sure it has a kernel VA as well as a 1:1 mapping. |
|
1978 * We should have just free'd one up. |
|
1979 */ |
|
1980 if (use_mp) { |
|
1981 for (pfn = 1; pfn < btop(1*1024*1024); pfn++) { |
|
1982 if (page_numtopp_alloc(pfn) == NULL) |
|
1983 continue; |
|
1984 rm_platter_va = i86devmap(pfn, 1, |
|
1985 PROT_READ | PROT_WRITE | PROT_EXEC); |
|
1986 rm_platter_pa = ptob(pfn); |
|
1987 hat_devload(kas.a_hat, |
|
1988 (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE, |
|
1989 pfn, PROT_READ | PROT_WRITE | PROT_EXEC, |
|
1990 HAT_LOAD_NOCONSIST); |
|
1991 break; |
|
1992 } |
|
1993 if (pfn == btop(1*1024*1024)) |
|
1994 panic("No page available for starting " |
|
1995 "other processors"); |
|
1996 } |
|
1997 |
|
1998 #if defined(__amd64) |
|
1999 PRM_POINT("Returning boot's VA space to kernel heap"); |
|
2000 if (kmem_setaside != NULL) |
|
2001 vmem_free(heap_arena, kmem_setaside, BOOT_DOUBLEMAP_SIZE); |
|
2002 #endif |
|
2003 } |
|
2004 |
|
2005 /* |
|
2006 * Initialize the platform-specific parts of a page_t. |
|
2007 */ |
|
2008 void |
|
2009 add_physmem_cb(page_t *pp, pfn_t pnum) |
|
2010 { |
|
2011 pp->p_pagenum = pnum; |
|
2012 pp->p_mapping = NULL; |
|
2013 pp->p_embed = 0; |
|
2014 pp->p_share = 0; |
|
2015 pp->p_mlentry = 0; |
|
2016 } |
|
2017 |
|
2018 /* |
|
2019 * kphysm_init() initializes physical memory. |
|
2020 */ |
|
2021 static pgcnt_t |
|
2022 kphysm_init( |
|
2023 page_t *inpp, |
|
2024 struct memseg *memsegp, |
|
2025 pgcnt_t start, |
|
2026 pgcnt_t npages) |
|
2027 { |
|
2028 struct memlist *pmem; |
|
2029 struct memseg *cur_memseg; |
|
2030 struct memseg **memsegpp; |
|
2031 pfn_t base_pfn; |
|
2032 pgcnt_t num; |
|
2033 pgcnt_t total_skipped = 0; |
|
2034 pgcnt_t skipping = 0; |
|
2035 pgcnt_t pages_done = 0; |
|
2036 pgcnt_t largepgcnt; |
|
2037 uint64_t addr; |
|
2038 uint64_t size; |
|
2039 page_t *pp = inpp; |
|
2040 int dobreak = 0; |
|
2041 extern pfn_t ddiphysmin; |
|
2042 |
|
2043 ASSERT(page_hash != NULL && page_hashsz != 0); |
|
2044 |
|
2045 for (cur_memseg = memsegp; cur_memseg->pages != NULL; cur_memseg++); |
|
2046 ASSERT(cur_memseg == memsegp || start > 0); |
|
2047 |
|
2048 for (pmem = phys_avail; pmem && npages; pmem = pmem->next) { |
|
2049 /* |
|
2050 * In a 32 bit kernel can't use higher memory if we're |
|
2051 * not booting in PAE mode. This check takes care of that. |
|
2052 */ |
|
2053 addr = pmem->address; |
|
2054 size = pmem->size; |
|
2055 if (btop(addr) > physmax) |
|
2056 continue; |
|
2057 |
|
2058 /* |
|
2059 * align addr and size - they may not be at page boundaries |
|
2060 */ |
|
2061 if ((addr & MMU_PAGEOFFSET) != 0) { |
|
2062 addr += MMU_PAGEOFFSET; |
|
2063 addr &= ~(uint64_t)MMU_PAGEOFFSET; |
|
2064 size -= addr - pmem->address; |
|
2065 } |
|
2066 |
|
2067 /* only process pages below physmax */ |
|
2068 if (btop(addr + size) > physmax) |
|
2069 size = ptob(physmax - btop(addr)); |
|
2070 |
|
2071 num = btop(size); |
|
2072 if (num == 0) |
|
2073 continue; |
|
2074 |
|
2075 if (total_skipped < start) { |
|
2076 if (start - total_skipped > num) { |
|
2077 total_skipped += num; |
|
2078 continue; |
|
2079 } |
|
2080 skipping = start - total_skipped; |
|
2081 num -= skipping; |
|
2082 addr += (MMU_PAGESIZE * skipping); |
|
2083 total_skipped = start; |
|
2084 } |
|
2085 if (num == 0) |
|
2086 continue; |
|
2087 |
|
2088 if (num > npages) |
|
2089 num = npages; |
|
2090 |
|
2091 npages -= num; |
|
2092 pages_done += num; |
|
2093 base_pfn = btop(addr); |
|
2094 |
|
2095 /* |
|
2096 * If the caller didn't provide space for the page |
|
2097 * structures, carve them out of the memseg they will |
|
2098 * represent. |
|
2099 */ |
|
2100 if (pp == NULL) { |
|
2101 pgcnt_t pp_pgs; |
|
2102 |
|
2103 if (num <= 1) |
|
2104 continue; |
|
2105 |
|
2106 /* |
|
2107 * Compute how many of the pages we need to use for |
|
2108 * page_ts |
|
2109 */ |
|
2110 pp_pgs = (num * sizeof (page_t)) / MMU_PAGESIZE + 1; |
|
2111 while (mmu_ptob(pp_pgs - 1) / sizeof (page_t) >= |
|
2112 num - pp_pgs + 1) |
|
2113 --pp_pgs; |
|
2114 PRM_DEBUG(pp_pgs); |
|
2115 |
|
2116 pp = vmem_alloc(heap_arena, mmu_ptob(pp_pgs), |
|
2117 VM_NOSLEEP); |
|
2118 if (pp == NULL) { |
|
2119 cmn_err(CE_WARN, "Unable to add %ld pages to " |
|
2120 "the system.", num); |
|
2121 continue; |
|
2122 } |
|
2123 |
|
2124 hat_devload(kas.a_hat, (void *)pp, mmu_ptob(pp_pgs), |
|
2125 base_pfn, PROT_READ | PROT_WRITE | HAT_UNORDERED_OK, |
|
2126 HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); |
|
2127 bzero(pp, mmu_ptob(pp_pgs)); |
|
2128 num -= pp_pgs; |
|
2129 base_pfn += pp_pgs; |
|
2130 } |
|
2131 |
|
2132 if (prom_debug) |
|
2133 prom_printf("MEMSEG addr=0x%" PRIx64 |
|
2134 " pgs=0x%lx pfn 0x%lx-0x%lx\n", |
|
2135 addr, num, base_pfn, base_pfn + num); |
|
2136 |
|
2137 /* |
|
2138 * drop pages below ddiphysmin to simplify ddi memory |
|
2139 * allocation with non-zero addr_lo requests. |
|
2140 */ |
|
2141 if (base_pfn < ddiphysmin) { |
|
2142 if (base_pfn + num <= ddiphysmin) { |
|
2143 /* drop entire range below ddiphysmin */ |
|
2144 continue; |
|
2145 } |
|
2146 /* adjust range to ddiphysmin */ |
|
2147 pp += (ddiphysmin - base_pfn); |
|
2148 num -= (ddiphysmin - base_pfn); |
|
2149 base_pfn = ddiphysmin; |
|
2150 } |
|
2151 /* |
|
2152 * Build the memsegs entry |
|
2153 */ |
|
2154 cur_memseg->pages = pp; |
|
2155 cur_memseg->epages = pp + num; |
|
2156 cur_memseg->pages_base = base_pfn; |
|
2157 cur_memseg->pages_end = base_pfn + num; |
|
2158 |
|
2159 /* |
|
2160 * insert in memseg list in decreasing pfn range order. |
|
2161 * Low memory is typically more fragmented such that this |
|
2162 * ordering keeps the larger ranges at the front of the list |
|
2163 * for code that searches memseg. |
|
2164 */ |
|
2165 memsegpp = &memsegs; |
|
2166 for (;;) { |
|
2167 if (*memsegpp == NULL) { |
|
2168 /* empty memsegs */ |
|
2169 memsegs = cur_memseg; |
|
2170 break; |
|
2171 } |
|
2172 /* check for continuity with start of memsegpp */ |
|
2173 if (cur_memseg->pages_end == (*memsegpp)->pages_base) { |
|
2174 if (cur_memseg->epages == (*memsegpp)->pages) { |
|
2175 /* |
|
2176 * contiguous pfn and page_t's. Merge |
|
2177 * cur_memseg into *memsegpp. Drop |
|
2178 * cur_memseg |
|
2179 */ |
|
2180 (*memsegpp)->pages_base = |
|
2181 cur_memseg->pages_base; |
|
2182 (*memsegpp)->pages = |
|
2183 cur_memseg->pages; |
|
2184 /* |
|
2185 * check if contiguous with the end of |
|
2186 * the next memseg. |
|
2187 */ |
|
2188 if ((*memsegpp)->next && |
|
2189 ((*memsegpp)->pages_base == |
|
2190 (*memsegpp)->next->pages_end)) { |
|
2191 cur_memseg = *memsegpp; |
|
2192 memsegpp = &((*memsegpp)->next); |
|
2193 dobreak = 1; |
|
2194 } else { |
|
2195 break; |
|
2196 } |
|
2197 } else { |
|
2198 /* |
|
2199 * contiguous pfn but not page_t's. |
|
2200 * drop last pfn/page_t in cur_memseg |
|
2201 * to prevent creation of large pages |
|
2202 * with noncontiguous page_t's if not |
|
2203 * aligned to largest page boundary. |
|
2204 */ |
|
2205 largepgcnt = page_get_pagecnt( |
|
2206 page_num_pagesizes() - 1); |
|
2207 |
|
2208 if (cur_memseg->pages_end & |
|
2209 (largepgcnt - 1)) { |
|
2210 num--; |
|
2211 cur_memseg->epages--; |
|
2212 cur_memseg->pages_end--; |
|
2213 } |
|
2214 } |
|
2215 } |
|
2216 |
|
2217 /* check for continuity with end of memsegpp */ |
|
2218 if (cur_memseg->pages_base == (*memsegpp)->pages_end) { |
|
2219 if (cur_memseg->pages == (*memsegpp)->epages) { |
|
2220 /* |
|
2221 * contiguous pfn and page_t's. Merge |
|
2222 * cur_memseg into *memsegpp. Drop |
|
2223 * cur_memseg. |
|
2224 */ |
|
2225 if (dobreak) { |
|
2226 /* merge previously done */ |
|
2227 cur_memseg->pages = |
|
2228 (*memsegpp)->pages; |
|
2229 cur_memseg->pages_base = |
|
2230 (*memsegpp)->pages_base; |
|
2231 cur_memseg->next = |
|
2232 (*memsegpp)->next; |
|
2233 } else { |
|
2234 (*memsegpp)->pages_end = |
|
2235 cur_memseg->pages_end; |
|
2236 (*memsegpp)->epages = |
|
2237 cur_memseg->epages; |
|
2238 } |
|
2239 break; |
|
2240 } |
|
2241 /* |
|
2242 * contiguous pfn but not page_t's. |
|
2243 * drop first pfn/page_t in cur_memseg |
|
2244 * to prevent creation of large pages |
|
2245 * with noncontiguous page_t's if not |
|
2246 * aligned to largest page boundary. |
|
2247 */ |
|
2248 largepgcnt = page_get_pagecnt( |
|
2249 page_num_pagesizes() - 1); |
|
2250 if (base_pfn & (largepgcnt - 1)) { |
|
2251 num--; |
|
2252 base_pfn++; |
|
2253 cur_memseg->pages++; |
|
2254 cur_memseg->pages_base++; |
|
2255 pp = cur_memseg->pages; |
|
2256 } |
|
2257 if (dobreak) |
|
2258 break; |
|
2259 } |
|
2260 |
|
2261 if (cur_memseg->pages_base >= |
|
2262 (*memsegpp)->pages_end) { |
|
2263 cur_memseg->next = *memsegpp; |
|
2264 *memsegpp = cur_memseg; |
|
2265 break; |
|
2266 } |
|
2267 if ((*memsegpp)->next == NULL) { |
|
2268 cur_memseg->next = NULL; |
|
2269 (*memsegpp)->next = cur_memseg; |
|
2270 break; |
|
2271 } |
|
2272 memsegpp = &((*memsegpp)->next); |
|
2273 ASSERT(*memsegpp != NULL); |
|
2274 } |
|
2275 |
|
2276 /* |
|
2277 * add_physmem() initializes the PSM part of the page |
|
2278 * struct by calling the PSM back with add_physmem_cb(). |
|
2279 * In addition it coalesces pages into larger pages as |
|
2280 * it initializes them. |
|
2281 */ |
|
2282 add_physmem(pp, num, base_pfn); |
|
2283 cur_memseg++; |
|
2284 availrmem_initial += num; |
|
2285 availrmem += num; |
|
2286 |
|
2287 /* |
|
2288 * If the caller provided the page frames to us, then |
|
2289 * advance in that list. Otherwise, prepare to allocate |
|
2290 * our own page frames for the next memseg. |
|
2291 */ |
|
2292 pp = (inpp == NULL) ? NULL : pp + num; |
|
2293 } |
|
2294 |
|
2295 PRM_DEBUG(availrmem_initial); |
|
2296 PRM_DEBUG(availrmem); |
|
2297 PRM_DEBUG(freemem); |
|
2298 build_pfn_hash(); |
|
2299 return (pages_done); |
|
2300 } |
|
2301 |
|
2302 /* |
|
2303 * Kernel VM initialization. |
|
2304 */ |
|
2305 static void |
|
2306 kvm_init(void) |
|
2307 { |
|
2308 #ifdef DEBUG |
|
2309 extern void _start(); |
|
2310 |
|
2311 ASSERT((caddr_t)_start == s_text); |
|
2312 #endif |
|
2313 ASSERT((((uintptr_t)s_text) & MMU_PAGEOFFSET) == 0); |
|
2314 |
|
2315 /* |
|
2316 * Put the kernel segments in kernel address space. |
|
2317 */ |
|
2318 rw_enter(&kas.a_lock, RW_WRITER); |
|
2319 as_avlinit(&kas); |
|
2320 |
|
2321 (void) seg_attach(&kas, s_text, e_moddata - s_text, &ktextseg); |
|
2322 (void) segkmem_create(&ktextseg); |
|
2323 |
|
2324 (void) seg_attach(&kas, (caddr_t)valloc_base, valloc_sz, &kvalloc); |
|
2325 (void) segkmem_create(&kvalloc); |
|
2326 |
|
2327 /* |
|
2328 * We're about to map out /boot. This is the beginning of the |
|
2329 * system resource management transition. We can no longer |
|
2330 * call into /boot for I/O or memory allocations. |
|
2331 * |
|
2332 * XX64 - Is this still correct with kernelheap_extend() being called |
|
2333 * later than this???? |
|
2334 */ |
|
2335 (void) seg_attach(&kas, final_kernelheap, |
|
2336 ekernelheap - final_kernelheap, &kvseg); |
|
2337 (void) segkmem_create(&kvseg); |
|
2338 |
|
2339 #if defined(__amd64) |
|
2340 (void) seg_attach(&kas, (caddr_t)core_base, core_size, &kvseg_core); |
|
2341 (void) segkmem_create(&kvseg_core); |
|
2342 #endif |
|
2343 |
|
2344 (void) seg_attach(&kas, (caddr_t)SEGDEBUGBASE, (size_t)SEGDEBUGSIZE, |
|
2345 &kdebugseg); |
|
2346 (void) segkmem_create(&kdebugseg); |
|
2347 |
|
2348 rw_exit(&kas.a_lock); |
|
2349 |
|
2350 /* |
|
2351 * Ensure that the red zone at kernelbase is never accessible. |
|
2352 */ |
|
2353 (void) as_setprot(&kas, (caddr_t)kernelbase, KERNEL_REDZONE_SIZE, 0); |
|
2354 |
|
2355 /* |
|
2356 * Make the text writable so that it can be hot patched by DTrace. |
|
2357 */ |
|
2358 (void) as_setprot(&kas, s_text, e_modtext - s_text, |
|
2359 PROT_READ | PROT_WRITE | PROT_EXEC); |
|
2360 |
|
2361 /* |
|
2362 * Make data writable until end. |
|
2363 */ |
|
2364 (void) as_setprot(&kas, s_data, e_moddata - s_data, |
|
2365 PROT_READ | PROT_WRITE | PROT_EXEC); |
|
2366 } |
|
2367 |
|
2368 /* |
|
2369 * These are MTTR registers supported by P6 |
|
2370 */ |
|
2371 static struct mtrrvar mtrrphys_arr[MAX_MTRRVAR]; |
|
2372 static uint64_t mtrr64k, mtrr16k1, mtrr16k2; |
|
2373 static uint64_t mtrr4k1, mtrr4k2, mtrr4k3; |
|
2374 static uint64_t mtrr4k4, mtrr4k5, mtrr4k6; |
|
2375 static uint64_t mtrr4k7, mtrr4k8, mtrrcap; |
|
2376 uint64_t mtrrdef, pat_attr_reg; |
|
2377 |
|
2378 /* |
|
2379 * Disable reprogramming of MTRRs by default. |
|
2380 */ |
|
2381 int enable_relaxed_mtrr = 0; |
|
2382 |
|
2383 /* |
|
2384 * These must serve for Pentium, Pentium Pro (P6/Pentium II/Pentium III) |
|
2385 * and Pentium 4, and yes, they are named 0, 1, 2, 4, 3 in ascending |
|
2386 * address order (starting from 0x400). The Pentium 4 only implements |
|
2387 * 4 sets, and while they are named 0-3 in the doc, the corresponding |
|
2388 * names for P6 are 0,1,2,4. So define these arrays in address order |
|
2389 * so that they work for both pre-Pentium4 and Pentium 4 processors. |
|
2390 */ |
|
2391 |
|
2392 static uint_t mci_ctl[] = {REG_MC0_CTL, REG_MC1_CTL, REG_MC2_CTL, |
|
2393 REG_MC4_CTL, REG_MC3_CTL}; |
|
2394 static uint_t mci_status[] = {REG_MC0_STATUS, REG_MC1_STATUS, REG_MC2_STATUS, |
|
2395 REG_MC4_STATUS, REG_MC3_STATUS}; |
|
2396 static uint_t mci_addr[] = {REG_MC0_ADDR, REG_MC1_ADDR, REG_MC2_ADDR, |
|
2397 REG_MC4_ADDR, REG_MC3_ADDR}; |
|
2398 static int mca_cnt; |
|
2399 |
|
2400 |
|
2401 void |
|
2402 setup_mca() |
|
2403 { |
|
2404 int i; |
|
2405 uint64_t allzeros; |
|
2406 uint64_t allones; |
|
2407 uint64_t mca_cap; |
|
2408 |
|
2409 if (!(x86_feature & X86_MCA)) |
|
2410 return; |
|
2411 (void) rdmsr(REG_MCG_CAP, &mca_cap); |
|
2412 allones = 0xffffffffffffffffULL; |
|
2413 if (mca_cap & MCG_CAP_CTL_P) |
|
2414 (void) wrmsr(REG_MCG_CTL, &allones); |
|
2415 mca_cnt = mca_cap & MCG_CAP_COUNT_MASK; |
|
2416 if (mca_cnt > P6_MCG_CAP_COUNT) |
|
2417 mca_cnt = P6_MCG_CAP_COUNT; |
|
2418 for (i = 1; i < mca_cnt; i++) |
|
2419 (void) wrmsr(mci_ctl[i], &allones); |
|
2420 allzeros = 0; |
|
2421 for (i = 0; i < mca_cnt; i++) |
|
2422 (void) wrmsr(mci_status[i], &allzeros); |
|
2423 setcr4(getcr4() | CR4_MCE); |
|
2424 |
|
2425 } |
|
2426 |
|
2427 int |
|
2428 mca_exception(struct regs *rp) |
|
2429 { |
|
2430 uint64_t status, addr; |
|
2431 uint64_t allzeros; |
|
2432 uint64_t buf; |
|
2433 int i, ret = 1, errcode, mserrcode; |
|
2434 |
|
2435 allzeros = 0; |
|
2436 (void) rdmsr(REG_MCG_STATUS, &buf); |
|
2437 status = buf; |
|
2438 if (status & MCG_STATUS_RIPV) |
|
2439 ret = 0; |
|
2440 if (status & MCG_STATUS_EIPV) |
|
2441 cmn_err(CE_WARN, "MCE at 0x%lx", rp->r_pc); |
|
2442 (void) wrmsr(REG_MCG_STATUS, &allzeros); |
|
2443 for (i = 0; i < mca_cnt; i++) { |
|
2444 (void) rdmsr(mci_status[i], &buf); |
|
2445 status = buf; |
|
2446 /* |
|
2447 * If status register not valid skip this bank |
|
2448 */ |
|
2449 if (!(status & MCI_STATUS_VAL)) |
|
2450 continue; |
|
2451 errcode = status & MCI_STATUS_ERRCODE; |
|
2452 mserrcode = (status >> MSERRCODE_SHFT) & MCI_STATUS_ERRCODE; |
|
2453 if (status & MCI_STATUS_ADDRV) { |
|
2454 /* |
|
2455 * If mci_addr contains the address where |
|
2456 * error occurred, display the address |
|
2457 */ |
|
2458 (void) rdmsr(mci_addr[i], &buf); |
|
2459 addr = buf; |
|
2460 cmn_err(CE_WARN, "MCE: Bank %d: error code 0x%x:"\ |
|
2461 "addr = 0x%" PRIx64 ", model errcode = 0x%x", i, |
|
2462 errcode, addr, mserrcode); |
|
2463 } else { |
|
2464 cmn_err(CE_WARN, |
|
2465 "MCE: Bank %d: error code 0x%x, mserrcode = 0x%x", |
|
2466 i, errcode, mserrcode); |
|
2467 } |
|
2468 (void) wrmsr(mci_status[i], &allzeros); |
|
2469 } |
|
2470 return (ret); |
|
2471 } |
|
2472 |
|
2473 void |
|
2474 setup_mtrr() |
|
2475 { |
|
2476 int i, ecx; |
|
2477 int vcnt; |
|
2478 struct mtrrvar *mtrrphys; |
|
2479 |
|
2480 if (!(x86_feature & X86_MTRR)) |
|
2481 return; |
|
2482 |
|
2483 (void) rdmsr(REG_MTRRCAP, &mtrrcap); |
|
2484 (void) rdmsr(REG_MTRRDEF, &mtrrdef); |
|
2485 if (mtrrcap & MTRRCAP_FIX) { |
|
2486 (void) rdmsr(REG_MTRR64K, &mtrr64k); |
|
2487 (void) rdmsr(REG_MTRR16K1, &mtrr16k1); |
|
2488 (void) rdmsr(REG_MTRR16K2, &mtrr16k2); |
|
2489 (void) rdmsr(REG_MTRR4K1, &mtrr4k1); |
|
2490 (void) rdmsr(REG_MTRR4K2, &mtrr4k2); |
|
2491 (void) rdmsr(REG_MTRR4K3, &mtrr4k3); |
|
2492 (void) rdmsr(REG_MTRR4K4, &mtrr4k4); |
|
2493 (void) rdmsr(REG_MTRR4K5, &mtrr4k5); |
|
2494 (void) rdmsr(REG_MTRR4K6, &mtrr4k6); |
|
2495 (void) rdmsr(REG_MTRR4K7, &mtrr4k7); |
|
2496 (void) rdmsr(REG_MTRR4K8, &mtrr4k8); |
|
2497 } |
|
2498 if ((vcnt = (mtrrcap & MTRRCAP_VCNTMASK)) > MAX_MTRRVAR) |
|
2499 vcnt = MAX_MTRRVAR; |
|
2500 |
|
2501 for (i = 0, ecx = REG_MTRRPHYSBASE0, mtrrphys = mtrrphys_arr; |
|
2502 i < vcnt - 1; i++, ecx += 2, mtrrphys++) { |
|
2503 (void) rdmsr(ecx, &mtrrphys->mtrrphys_base); |
|
2504 (void) rdmsr(ecx + 1, &mtrrphys->mtrrphys_mask); |
|
2505 if ((x86_feature & X86_PAT) && enable_relaxed_mtrr) { |
|
2506 mtrrphys->mtrrphys_mask &= ~MTRRPHYSMASK_V; |
|
2507 } |
|
2508 } |
|
2509 if (x86_feature & X86_PAT) { |
|
2510 if (enable_relaxed_mtrr) |
|
2511 mtrrdef = MTRR_TYPE_WB|MTRRDEF_FE|MTRRDEF_E; |
|
2512 pat_attr_reg = PAT_DEFAULT_ATTRIBUTE; |
|
2513 } |
|
2514 |
|
2515 mtrr_sync(); |
|
2516 } |
|
2517 |
|
2518 /* |
|
2519 * Sync current cpu mtrr with the incore copy of mtrr. |
|
2520 * This function has to be invoked with interrupts disabled |
|
2521 * Currently we do not capture other cpu's. This is invoked on cpu0 |
|
2522 * just after reading /etc/system. |
|
2523 * On other cpu's its invoked from mp_startup(). |
|
2524 */ |
|
2525 void |
|
2526 mtrr_sync() |
|
2527 { |
|
2528 uint64_t my_mtrrdef; |
|
2529 uint_t crvalue, cr0_orig; |
|
2530 int vcnt, i, ecx; |
|
2531 struct mtrrvar *mtrrphys; |
|
2532 |
|
2533 cr0_orig = crvalue = getcr0(); |
|
2534 crvalue |= CR0_CD; |
|
2535 crvalue &= ~CR0_NW; |
|
2536 setcr0(crvalue); |
|
2537 invalidate_cache(); |
|
2538 setcr3(getcr3()); |
|
2539 |
|
2540 if (x86_feature & X86_PAT) { |
|
2541 (void) wrmsr(REG_MTRRPAT, &pat_attr_reg); |
|
2542 } |
|
2543 (void) rdmsr(REG_MTRRDEF, &my_mtrrdef); |
|
2544 my_mtrrdef &= ~MTRRDEF_E; |
|
2545 (void) wrmsr(REG_MTRRDEF, &my_mtrrdef); |
|
2546 if (mtrrcap & MTRRCAP_FIX) { |
|
2547 (void) wrmsr(REG_MTRR64K, &mtrr64k); |
|
2548 (void) wrmsr(REG_MTRR16K1, &mtrr16k1); |
|
2549 (void) wrmsr(REG_MTRR16K2, &mtrr16k2); |
|
2550 (void) wrmsr(REG_MTRR4K1, &mtrr4k1); |
|
2551 (void) wrmsr(REG_MTRR4K2, &mtrr4k2); |
|
2552 (void) wrmsr(REG_MTRR4K3, &mtrr4k3); |
|
2553 (void) wrmsr(REG_MTRR4K4, &mtrr4k4); |
|
2554 (void) wrmsr(REG_MTRR4K5, &mtrr4k5); |
|
2555 (void) wrmsr(REG_MTRR4K6, &mtrr4k6); |
|
2556 (void) wrmsr(REG_MTRR4K7, &mtrr4k7); |
|
2557 (void) wrmsr(REG_MTRR4K8, &mtrr4k8); |
|
2558 } |
|
2559 if ((vcnt = (mtrrcap & MTRRCAP_VCNTMASK)) > MAX_MTRRVAR) |
|
2560 vcnt = MAX_MTRRVAR; |
|
2561 for (i = 0, ecx = REG_MTRRPHYSBASE0, mtrrphys = mtrrphys_arr; |
|
2562 i < vcnt - 1; i++, ecx += 2, mtrrphys++) { |
|
2563 (void) wrmsr(ecx, &mtrrphys->mtrrphys_base); |
|
2564 (void) wrmsr(ecx + 1, &mtrrphys->mtrrphys_mask); |
|
2565 } |
|
2566 (void) wrmsr(REG_MTRRDEF, &mtrrdef); |
|
2567 setcr3(getcr3()); |
|
2568 invalidate_cache(); |
|
2569 setcr0(cr0_orig); |
|
2570 } |
|
2571 |
|
2572 /* |
|
2573 * resync mtrr so that BIOS is happy. Called from mdboot |
|
2574 */ |
|
2575 void |
|
2576 mtrr_resync() |
|
2577 { |
|
2578 if ((x86_feature & X86_PAT) && enable_relaxed_mtrr) { |
|
2579 /* |
|
2580 * We could have changed the default mtrr definition. |
|
2581 * Put it back to uncached which is what it is at power on |
|
2582 */ |
|
2583 mtrrdef = MTRR_TYPE_UC|MTRRDEF_FE|MTRRDEF_E; |
|
2584 mtrr_sync(); |
|
2585 } |
|
2586 } |
|
2587 |
|
2588 void |
|
2589 get_system_configuration() |
|
2590 { |
|
2591 char prop[32]; |
|
2592 u_longlong_t nodes_ll, cpus_pernode_ll, lvalue; |
|
2593 |
|
2594 if (((BOP_GETPROPLEN(bootops, "nodes") > sizeof (prop)) || |
|
2595 (BOP_GETPROP(bootops, "nodes", prop) < 0) || |
|
2596 (kobj_getvalue(prop, &nodes_ll) == -1) || |
|
2597 (nodes_ll > MAXNODES)) || |
|
2598 ((BOP_GETPROPLEN(bootops, "cpus_pernode") > sizeof (prop)) || |
|
2599 (BOP_GETPROP(bootops, "cpus_pernode", prop) < 0) || |
|
2600 (kobj_getvalue(prop, &cpus_pernode_ll) == -1))) { |
|
2601 |
|
2602 system_hardware.hd_nodes = 1; |
|
2603 system_hardware.hd_cpus_per_node = 0; |
|
2604 } else { |
|
2605 system_hardware.hd_nodes = (int)nodes_ll; |
|
2606 system_hardware.hd_cpus_per_node = (int)cpus_pernode_ll; |
|
2607 } |
|
2608 if ((BOP_GETPROPLEN(bootops, "kernelbase") > sizeof (prop)) || |
|
2609 (BOP_GETPROP(bootops, "kernelbase", prop) < 0) || |
|
2610 (kobj_getvalue(prop, &lvalue) == -1)) |
|
2611 eprom_kernelbase = NULL; |
|
2612 else |
|
2613 eprom_kernelbase = (uintptr_t)lvalue; |
|
2614 |
|
2615 if ((BOP_GETPROPLEN(bootops, "segmapsize") > sizeof (prop)) || |
|
2616 (BOP_GETPROP(bootops, "segmapsize", prop) < 0) || |
|
2617 (kobj_getvalue(prop, &lvalue) == -1)) { |
|
2618 segmapsize = SEGMAPDEFAULT; |
|
2619 } else { |
|
2620 segmapsize = (uintptr_t)lvalue; |
|
2621 } |
|
2622 |
|
2623 if ((BOP_GETPROPLEN(bootops, "segmapfreelists") > sizeof (prop)) || |
|
2624 (BOP_GETPROP(bootops, "segmapfreelists", prop) < 0) || |
|
2625 (kobj_getvalue(prop, &lvalue) == -1)) { |
|
2626 segmapfreelists = 0; /* use segmap driver default */ |
|
2627 } else { |
|
2628 segmapfreelists = (int)lvalue; |
|
2629 } |
|
2630 } |
|
2631 |
|
2632 /* |
|
2633 * Add to a memory list. |
|
2634 * start = start of new memory segment |
|
2635 * len = length of new memory segment in bytes |
|
2636 * new = pointer to a new struct memlist |
|
2637 * memlistp = memory list to which to add segment. |
|
2638 */ |
|
2639 static void |
|
2640 memlist_add( |
|
2641 uint64_t start, |
|
2642 uint64_t len, |
|
2643 struct memlist *new, |
|
2644 struct memlist **memlistp) |
|
2645 { |
|
2646 struct memlist *cur; |
|
2647 uint64_t end = start + len; |
|
2648 |
|
2649 new->address = start; |
|
2650 new->size = len; |
|
2651 |
|
2652 cur = *memlistp; |
|
2653 |
|
2654 while (cur) { |
|
2655 if (cur->address >= end) { |
|
2656 new->next = cur; |
|
2657 *memlistp = new; |
|
2658 new->prev = cur->prev; |
|
2659 cur->prev = new; |
|
2660 return; |
|
2661 } |
|
2662 ASSERT(cur->address + cur->size <= start); |
|
2663 if (cur->next == NULL) { |
|
2664 cur->next = new; |
|
2665 new->prev = cur; |
|
2666 new->next = NULL; |
|
2667 return; |
|
2668 } |
|
2669 memlistp = &cur->next; |
|
2670 cur = cur->next; |
|
2671 } |
|
2672 } |
|
2673 |
|
2674 void |
|
2675 kobj_vmem_init(vmem_t **text_arena, vmem_t **data_arena) |
|
2676 { |
|
2677 size_t tsize = e_modtext - modtext; |
|
2678 size_t dsize = e_moddata - moddata; |
|
2679 |
|
2680 *text_arena = vmem_create("module_text", tsize ? modtext : NULL, tsize, |
|
2681 1, segkmem_alloc, segkmem_free, heaptext_arena, 0, VM_SLEEP); |
|
2682 *data_arena = vmem_create("module_data", dsize ? moddata : NULL, dsize, |
|
2683 1, segkmem_alloc, segkmem_free, heap32_arena, 0, VM_SLEEP); |
|
2684 } |
|
2685 |
|
2686 caddr_t |
|
2687 kobj_text_alloc(vmem_t *arena, size_t size) |
|
2688 { |
|
2689 return (vmem_alloc(arena, size, VM_SLEEP | VM_BESTFIT)); |
|
2690 } |
|
2691 |
|
2692 /*ARGSUSED*/ |
|
2693 caddr_t |
|
2694 kobj_texthole_alloc(caddr_t addr, size_t size) |
|
2695 { |
|
2696 panic("unexpected call to kobj_texthole_alloc()"); |
|
2697 /*NOTREACHED*/ |
|
2698 return (0); |
|
2699 } |
|
2700 |
|
2701 /*ARGSUSED*/ |
|
2702 void |
|
2703 kobj_texthole_free(caddr_t addr, size_t size) |
|
2704 { |
|
2705 panic("unexpected call to kobj_texthole_free()"); |
|
2706 } |
|
2707 |
|
2708 /* |
|
2709 * This is called just after configure() in startup(). |
|
2710 * |
|
2711 * The ISALIST concept is a bit hopeless on Intel, because |
|
2712 * there's no guarantee of an ever-more-capable processor |
|
2713 * given that various parts of the instruction set may appear |
|
2714 * and disappear between different implementations. |
|
2715 * |
|
2716 * While it would be possible to correct it and even enhance |
|
2717 * it somewhat, the explicit hardware capability bitmask allows |
|
2718 * more flexibility. |
|
2719 * |
|
2720 * So, we just leave this alone. |
|
2721 */ |
|
2722 void |
|
2723 setx86isalist(void) |
|
2724 { |
|
2725 char *tp; |
|
2726 size_t len; |
|
2727 extern char *isa_list; |
|
2728 |
|
2729 #define TBUFSIZE 1024 |
|
2730 |
|
2731 tp = kmem_alloc(TBUFSIZE, KM_SLEEP); |
|
2732 *tp = '\0'; |
|
2733 |
|
2734 #if defined(__amd64) |
|
2735 (void) strcpy(tp, "amd64 "); |
|
2736 #endif |
|
2737 |
|
2738 switch (x86_vendor) { |
|
2739 case X86_VENDOR_Intel: |
|
2740 case X86_VENDOR_AMD: |
|
2741 case X86_VENDOR_TM: |
|
2742 if (x86_feature & X86_CMOV) { |
|
2743 /* |
|
2744 * Pentium Pro or later |
|
2745 */ |
|
2746 (void) strcat(tp, "pentium_pro"); |
|
2747 (void) strcat(tp, x86_feature & X86_MMX ? |
|
2748 "+mmx pentium_pro " : " "); |
|
2749 } |
|
2750 /*FALLTHROUGH*/ |
|
2751 case X86_VENDOR_Cyrix: |
|
2752 /* |
|
2753 * The Cyrix 6x86 does not have any Pentium features |
|
2754 * accessible while not at privilege level 0. |
|
2755 */ |
|
2756 if (x86_feature & X86_CPUID) { |
|
2757 (void) strcat(tp, "pentium"); |
|
2758 (void) strcat(tp, x86_feature & X86_MMX ? |
|
2759 "+mmx pentium " : " "); |
|
2760 } |
|
2761 break; |
|
2762 default: |
|
2763 break; |
|
2764 } |
|
2765 (void) strcat(tp, "i486 i386 i86"); |
|
2766 len = strlen(tp) + 1; /* account for NULL at end of string */ |
|
2767 isa_list = strcpy(kmem_alloc(len, KM_SLEEP), tp); |
|
2768 kmem_free(tp, TBUFSIZE); |
|
2769 |
|
2770 #undef TBUFSIZE |
|
2771 } |
|
2772 |
|
2773 |
|
2774 #ifdef __amd64 |
|
2775 |
|
2776 void * |
|
2777 device_arena_alloc(size_t size, int vm_flag) |
|
2778 { |
|
2779 return (vmem_alloc(device_arena, size, vm_flag)); |
|
2780 } |
|
2781 |
|
2782 void |
|
2783 device_arena_free(void *vaddr, size_t size) |
|
2784 { |
|
2785 vmem_free(device_arena, vaddr, size); |
|
2786 } |
|
2787 |
|
2788 #else |
|
2789 |
|
2790 void * |
|
2791 device_arena_alloc(size_t size, int vm_flag) |
|
2792 { |
|
2793 caddr_t vaddr; |
|
2794 uintptr_t v; |
|
2795 size_t start; |
|
2796 size_t end; |
|
2797 |
|
2798 vaddr = vmem_alloc(heap_arena, size, vm_flag); |
|
2799 if (vaddr == NULL) |
|
2800 return (NULL); |
|
2801 |
|
2802 v = (uintptr_t)vaddr; |
|
2803 ASSERT(v >= kernelbase); |
|
2804 ASSERT(v + size <= ptable_va); |
|
2805 |
|
2806 start = btop(v - kernelbase); |
|
2807 end = btop(v + size - 1 - kernelbase); |
|
2808 ASSERT(start < toxic_bit_map_len); |
|
2809 ASSERT(end < toxic_bit_map_len); |
|
2810 |
|
2811 while (start <= end) { |
|
2812 BT_ATOMIC_SET(toxic_bit_map, start); |
|
2813 ++start; |
|
2814 } |
|
2815 return (vaddr); |
|
2816 } |
|
2817 |
|
2818 void |
|
2819 device_arena_free(void *vaddr, size_t size) |
|
2820 { |
|
2821 uintptr_t v = (uintptr_t)vaddr; |
|
2822 size_t start; |
|
2823 size_t end; |
|
2824 |
|
2825 ASSERT(v >= kernelbase); |
|
2826 ASSERT(v + size <= ptable_va); |
|
2827 |
|
2828 start = btop(v - kernelbase); |
|
2829 end = btop(v + size - 1 - kernelbase); |
|
2830 ASSERT(start < toxic_bit_map_len); |
|
2831 ASSERT(end < toxic_bit_map_len); |
|
2832 |
|
2833 while (start <= end) { |
|
2834 ASSERT(BT_TEST(toxic_bit_map, start) != 0); |
|
2835 BT_ATOMIC_CLEAR(toxic_bit_map, start); |
|
2836 ++start; |
|
2837 } |
|
2838 vmem_free(heap_arena, vaddr, size); |
|
2839 } |
|
2840 |
|
2841 /* |
|
2842 * returns 1st address in range that is in device arena, or NULL |
|
2843 * if len is not NULL it returns the length of the toxic range |
|
2844 */ |
|
2845 void * |
|
2846 device_arena_contains(void *vaddr, size_t size, size_t *len) |
|
2847 { |
|
2848 uintptr_t v = (uintptr_t)vaddr; |
|
2849 uintptr_t eaddr = v + size; |
|
2850 size_t start; |
|
2851 size_t end; |
|
2852 |
|
2853 /* |
|
2854 * if called very early by kmdb, just return NULL |
|
2855 */ |
|
2856 if (toxic_bit_map == NULL) |
|
2857 return (NULL); |
|
2858 |
|
2859 /* |
|
2860 * First check if we're completely outside the bitmap range. |
|
2861 */ |
|
2862 if (v >= ptable_va || eaddr < kernelbase) |
|
2863 return (NULL); |
|
2864 |
|
2865 /* |
|
2866 * Trim ends of search to look at only what the bitmap covers. |
|
2867 */ |
|
2868 if (v < kernelbase) |
|
2869 v = kernelbase; |
|
2870 start = btop(v - kernelbase); |
|
2871 end = btop(eaddr - kernelbase); |
|
2872 if (end >= toxic_bit_map_len) |
|
2873 end = toxic_bit_map_len; |
|
2874 |
|
2875 if (bt_range(toxic_bit_map, &start, &end, end) == 0) |
|
2876 return (NULL); |
|
2877 |
|
2878 v = kernelbase + ptob(start); |
|
2879 if (len != NULL) |
|
2880 *len = ptob(end - start); |
|
2881 return ((void *)v); |
|
2882 } |
|
2883 |
|
2884 #endif |