0
|
1 |
/*
|
|
2 |
* CDDL HEADER START
|
|
3 |
*
|
|
4 |
* The contents of this file are subject to the terms of the
|
|
5 |
* Common Development and Distribution License, Version 1.0 only
|
|
6 |
* (the "License"). You may not use this file except in compliance
|
|
7 |
* with the License.
|
|
8 |
*
|
|
9 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
10 |
* or http://www.opensolaris.org/os/licensing.
|
|
11 |
* See the License for the specific language governing permissions
|
|
12 |
* and limitations under the License.
|
|
13 |
*
|
|
14 |
* When distributing Covered Code, include this CDDL HEADER in each
|
|
15 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
16 |
* If applicable, add the following below this CDDL HEADER, with the
|
|
17 |
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
18 |
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
19 |
*
|
|
20 |
* CDDL HEADER END
|
|
21 |
*/
|
|
22 |
/*
|
|
23 |
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
|
24 |
* Use is subject to license terms.
|
|
25 |
*/
|
|
26 |
|
|
27 |
#pragma ident "%Z%%M% %I% %E% SMI"
|
|
28 |
|
|
29 |
#include <sys/types.h>
|
|
30 |
#include <sys/t_lock.h>
|
|
31 |
#include <sys/param.h>
|
|
32 |
#include <sys/sysmacros.h>
|
|
33 |
#include <sys/signal.h>
|
|
34 |
#include <sys/systm.h>
|
|
35 |
#include <sys/user.h>
|
|
36 |
#include <sys/mman.h>
|
|
37 |
#include <sys/vm.h>
|
|
38 |
#include <sys/conf.h>
|
|
39 |
#include <sys/avintr.h>
|
|
40 |
#include <sys/autoconf.h>
|
|
41 |
#include <sys/disp.h>
|
|
42 |
#include <sys/class.h>
|
|
43 |
#include <sys/bitmap.h>
|
|
44 |
|
|
45 |
#include <sys/privregs.h>
|
|
46 |
|
|
47 |
#include <sys/proc.h>
|
|
48 |
#include <sys/buf.h>
|
|
49 |
#include <sys/kmem.h>
|
|
50 |
#include <sys/kstat.h>
|
|
51 |
|
|
52 |
#include <sys/reboot.h>
|
|
53 |
#include <sys/uadmin.h>
|
|
54 |
|
|
55 |
#include <sys/cred.h>
|
|
56 |
#include <sys/vnode.h>
|
|
57 |
#include <sys/file.h>
|
|
58 |
|
|
59 |
#include <sys/procfs.h>
|
|
60 |
#include <sys/acct.h>
|
|
61 |
|
|
62 |
#include <sys/vfs.h>
|
|
63 |
#include <sys/dnlc.h>
|
|
64 |
#include <sys/var.h>
|
|
65 |
#include <sys/cmn_err.h>
|
|
66 |
#include <sys/utsname.h>
|
|
67 |
#include <sys/debug.h>
|
|
68 |
#include <sys/kdi.h>
|
|
69 |
|
|
70 |
#include <sys/dumphdr.h>
|
|
71 |
#include <sys/bootconf.h>
|
|
72 |
#include <sys/varargs.h>
|
|
73 |
#include <sys/promif.h>
|
|
74 |
#include <sys/prom_emul.h> /* for create_prom_prop */
|
|
75 |
#include <sys/modctl.h> /* for "procfs" hack */
|
|
76 |
|
|
77 |
#include <sys/consdev.h>
|
|
78 |
#include <sys/frame.h>
|
|
79 |
|
|
80 |
#include <sys/sunddi.h>
|
|
81 |
#include <sys/sunndi.h>
|
|
82 |
#include <sys/ndi_impldefs.h>
|
|
83 |
#include <sys/ddidmareq.h>
|
|
84 |
#include <sys/psw.h>
|
|
85 |
#include <sys/regset.h>
|
|
86 |
#include <sys/clock.h>
|
|
87 |
#include <sys/pte.h>
|
|
88 |
#include <sys/mmu.h>
|
|
89 |
#include <sys/tss.h>
|
|
90 |
#include <sys/stack.h>
|
|
91 |
#include <sys/trap.h>
|
|
92 |
#include <sys/pic.h>
|
|
93 |
#include <sys/fp.h>
|
|
94 |
#include <vm/anon.h>
|
|
95 |
#include <vm/as.h>
|
|
96 |
#include <vm/page.h>
|
|
97 |
#include <vm/seg.h>
|
|
98 |
#include <vm/seg_dev.h>
|
|
99 |
#include <vm/seg_kmem.h>
|
|
100 |
#include <vm/seg_kpm.h>
|
|
101 |
#include <vm/seg_map.h>
|
|
102 |
#include <vm/seg_vn.h>
|
|
103 |
#include <vm/seg_kp.h>
|
|
104 |
#include <sys/memnode.h>
|
|
105 |
#include <vm/vm_dep.h>
|
|
106 |
#include <sys/swap.h>
|
|
107 |
#include <sys/thread.h>
|
|
108 |
#include <sys/sysconf.h>
|
|
109 |
#include <sys/vm_machparam.h>
|
|
110 |
#include <sys/archsystm.h>
|
|
111 |
#include <sys/machsystm.h>
|
|
112 |
#include <vm/hat.h>
|
|
113 |
#include <vm/hat_i86.h>
|
|
114 |
#include <sys/pmem.h>
|
|
115 |
#include <sys/instance.h>
|
|
116 |
#include <sys/smp_impldefs.h>
|
|
117 |
#include <sys/x86_archext.h>
|
|
118 |
#include <sys/segments.h>
|
|
119 |
#include <sys/clconf.h>
|
|
120 |
#include <sys/kobj.h>
|
|
121 |
#include <sys/kobj_lex.h>
|
|
122 |
#include <sys/prom_emul.h>
|
|
123 |
#include <sys/cpc_impl.h>
|
|
124 |
#include <sys/chip.h>
|
|
125 |
#include <sys/x86_archext.h>
|
|
126 |
|
|
127 |
extern void debug_enter(char *);
|
|
128 |
extern void progressbar_init(void);
|
|
129 |
extern void progressbar_start(void);
|
|
130 |
|
|
131 |
/*
|
|
132 |
* XXX make declaration below "static" when drivers no longer use this
|
|
133 |
* interface.
|
|
134 |
*/
|
|
135 |
extern caddr_t p0_va; /* Virtual address for accessing physical page 0 */
|
|
136 |
|
|
137 |
/*
|
|
138 |
* segkp
|
|
139 |
*/
|
|
140 |
extern int segkp_fromheap;
|
|
141 |
|
|
142 |
static void kvm_init(void);
|
|
143 |
static void startup_init(void);
|
|
144 |
static void startup_memlist(void);
|
|
145 |
static void startup_modules(void);
|
|
146 |
static void startup_bop_gone(void);
|
|
147 |
static void startup_vm(void);
|
|
148 |
static void startup_end(void);
|
|
149 |
|
|
150 |
/*
|
|
151 |
* Declare these as initialized data so we can patch them.
|
|
152 |
*/
|
|
153 |
pgcnt_t physmem = 0; /* memory size in pages, patch if you want less */
|
|
154 |
pgcnt_t obp_pages; /* Memory used by PROM for its text and data */
|
|
155 |
|
|
156 |
char *kobj_file_buf;
|
|
157 |
int kobj_file_bufsize; /* set in /etc/system */
|
|
158 |
|
|
159 |
/* Global variables for MP support. Used in mp_startup */
|
|
160 |
caddr_t rm_platter_va;
|
|
161 |
uint32_t rm_platter_pa;
|
|
162 |
|
|
163 |
/*
|
|
164 |
* Some CPUs have holes in the middle of the 64-bit virtual address range.
|
|
165 |
*/
|
|
166 |
uintptr_t hole_start, hole_end;
|
|
167 |
|
|
168 |
/*
|
|
169 |
* kpm mapping window
|
|
170 |
*/
|
|
171 |
caddr_t kpm_vbase;
|
|
172 |
size_t kpm_size;
|
|
173 |
static int kpm_desired = 0; /* Do we want to try to use segkpm? */
|
|
174 |
|
|
175 |
/*
|
|
176 |
* VA range that must be preserved for boot until we release all of its
|
|
177 |
* mappings.
|
|
178 |
*/
|
|
179 |
#if defined(__amd64)
|
|
180 |
static void *kmem_setaside;
|
|
181 |
#endif
|
|
182 |
|
|
183 |
/*
|
|
184 |
* Configuration parameters set at boot time.
|
|
185 |
*/
|
|
186 |
|
|
187 |
caddr_t econtig; /* end of first block of contiguous kernel */
|
|
188 |
|
|
189 |
struct bootops *bootops = 0; /* passed in from boot */
|
|
190 |
struct bootops **bootopsp;
|
|
191 |
struct boot_syscalls *sysp; /* passed in from boot */
|
|
192 |
|
|
193 |
char bootblock_fstype[16];
|
|
194 |
|
|
195 |
char kern_bootargs[OBP_MAXPATHLEN];
|
|
196 |
|
|
197 |
/*
|
|
198 |
* new memory fragmentations are possible in startup() due to BOP_ALLOCs. this
|
|
199 |
* depends on number of BOP_ALLOC calls made and requested size, memory size
|
|
200 |
* combination and whether boot.bin memory needs to be freed.
|
|
201 |
*/
|
|
202 |
#define POSS_NEW_FRAGMENTS 12
|
|
203 |
|
|
204 |
/*
|
|
205 |
* VM data structures
|
|
206 |
*/
|
|
207 |
long page_hashsz; /* Size of page hash table (power of two) */
|
|
208 |
struct page *pp_base; /* Base of initial system page struct array */
|
|
209 |
struct page **page_hash; /* Page hash table */
|
|
210 |
struct seg ktextseg; /* Segment used for kernel executable image */
|
|
211 |
struct seg kvalloc; /* Segment used for "valloc" mapping */
|
|
212 |
struct seg kpseg; /* Segment used for pageable kernel virt mem */
|
|
213 |
struct seg kmapseg; /* Segment used for generic kernel mappings */
|
|
214 |
struct seg kdebugseg; /* Segment used for the kernel debugger */
|
|
215 |
|
|
216 |
struct seg *segkmap = &kmapseg; /* Kernel generic mapping segment */
|
|
217 |
struct seg *segkp = &kpseg; /* Pageable kernel virtual memory segment */
|
|
218 |
|
|
219 |
#if defined(__amd64)
|
|
220 |
struct seg kvseg_core; /* Segment used for the core heap */
|
|
221 |
struct seg kpmseg; /* Segment used for physical mapping */
|
|
222 |
struct seg *segkpm = &kpmseg; /* 64bit kernel physical mapping segment */
|
|
223 |
#else
|
|
224 |
struct seg *segkpm = NULL; /* Unused on IA32 */
|
|
225 |
#endif
|
|
226 |
|
|
227 |
caddr_t segkp_base; /* Base address of segkp */
|
|
228 |
#if defined(__amd64)
|
|
229 |
pgcnt_t segkpsize = btop(SEGKPDEFSIZE); /* size of segkp segment in pages */
|
|
230 |
#else
|
|
231 |
pgcnt_t segkpsize = 0;
|
|
232 |
#endif
|
|
233 |
|
|
234 |
struct memseg *memseg_base;
|
|
235 |
struct vnode unused_pages_vp;
|
|
236 |
|
|
237 |
#define FOURGB 0x100000000LL
|
|
238 |
|
|
239 |
struct memlist *memlist;
|
|
240 |
|
|
241 |
caddr_t s_text; /* start of kernel text segment */
|
|
242 |
caddr_t e_text; /* end of kernel text segment */
|
|
243 |
caddr_t s_data; /* start of kernel data segment */
|
|
244 |
caddr_t e_data; /* end of kernel data segment */
|
|
245 |
caddr_t modtext; /* start of loadable module text reserved */
|
|
246 |
caddr_t e_modtext; /* end of loadable module text reserved */
|
|
247 |
caddr_t moddata; /* start of loadable module data reserved */
|
|
248 |
caddr_t e_moddata; /* end of loadable module data reserved */
|
|
249 |
|
|
250 |
struct memlist *phys_install; /* Total installed physical memory */
|
|
251 |
struct memlist *phys_avail; /* Total available physical memory */
|
|
252 |
|
|
253 |
static void memlist_add(uint64_t, uint64_t, struct memlist *,
|
|
254 |
struct memlist **);
|
|
255 |
|
|
256 |
/*
|
|
257 |
* kphysm_init returns the number of pages that were processed
|
|
258 |
*/
|
|
259 |
static pgcnt_t kphysm_init(page_t *, struct memseg *, pgcnt_t, pgcnt_t);
|
|
260 |
|
|
261 |
#define IO_PROP_SIZE 64 /* device property size */
|
|
262 |
|
|
263 |
/*
|
|
264 |
* a couple useful roundup macros
|
|
265 |
*/
|
|
266 |
#define ROUND_UP_PAGE(x) \
|
|
267 |
((uintptr_t)P2ROUNDUP((uintptr_t)(x), (uintptr_t)MMU_PAGESIZE))
|
|
268 |
#define ROUND_UP_LPAGE(x) \
|
|
269 |
((uintptr_t)P2ROUNDUP((uintptr_t)(x), mmu.level_size[1]))
|
|
270 |
#define ROUND_UP_4MEG(x) \
|
|
271 |
((uintptr_t)P2ROUNDUP((uintptr_t)(x), (uintptr_t)FOURMB_PAGESIZE))
|
|
272 |
#define ROUND_UP_TOPLEVEL(x) \
|
|
273 |
((uintptr_t)P2ROUNDUP((uintptr_t)(x), mmu.level_size[mmu.max_level]))
|
|
274 |
|
|
275 |
/*
|
|
276 |
* 32-bit Kernel's Virtual memory layout.
|
|
277 |
* +-----------------------+
|
|
278 |
* | psm 1-1 map |
|
|
279 |
* | exec args area |
|
|
280 |
* 0xFFC00000 -|-----------------------|- ARGSBASE
|
|
281 |
* | debugger |
|
|
282 |
* 0xFF800000 -|-----------------------|- SEGDEBUGBASE
|
|
283 |
* | Kernel Data |
|
|
284 |
* 0xFEC00000 -|-----------------------|
|
|
285 |
* | Kernel Text |
|
|
286 |
* 0xFE800000 -|-----------------------|- KERNEL_TEXT
|
|
287 |
* | LUFS sinkhole |
|
|
288 |
* 0xFE000000 -|-----------------------|- lufs_addr
|
|
289 |
* --- -|-----------------------|- valloc_base + valloc_sz
|
|
290 |
* | early pp structures |
|
|
291 |
* | memsegs, memlists, |
|
|
292 |
* | page hash, etc. |
|
|
293 |
* --- -|-----------------------|- valloc_base (floating)
|
|
294 |
* | ptable_va |
|
|
295 |
* 0xFDFFE000 -|-----------------------|- ekernelheap, ptable_va
|
|
296 |
* | | (segkp is an arena under the heap)
|
|
297 |
* | |
|
|
298 |
* | kvseg |
|
|
299 |
* | |
|
|
300 |
* | |
|
|
301 |
* --- -|-----------------------|- kernelheap (floating)
|
|
302 |
* | Segkmap |
|
|
303 |
* 0xC3002000 -|-----------------------|- segkmap_start (floating)
|
|
304 |
* | Red Zone |
|
|
305 |
* 0xC3000000 -|-----------------------|- kernelbase / userlimit (floating)
|
|
306 |
* | | ||
|
|
307 |
* | Shared objects | \/
|
|
308 |
* | |
|
|
309 |
* : :
|
|
310 |
* | user data |
|
|
311 |
* |-----------------------|
|
|
312 |
* | user text |
|
|
313 |
* 0x08048000 -|-----------------------|
|
|
314 |
* | user stack |
|
|
315 |
* : :
|
|
316 |
* | invalid |
|
|
317 |
* 0x00000000 +-----------------------+
|
|
318 |
*
|
|
319 |
*
|
|
320 |
* 64-bit Kernel's Virtual memory layout. (assuming 64 bit app)
|
|
321 |
* +-----------------------+
|
|
322 |
* | psm 1-1 map |
|
|
323 |
* | exec args area |
|
|
324 |
* 0xFFFFFFFF.FFC00000 |-----------------------|- ARGSBASE
|
|
325 |
* | debugger (?) |
|
|
326 |
* 0xFFFFFFFF.FF800000 |-----------------------|- SEGDEBUGBASE
|
|
327 |
* | unused |
|
|
328 |
* +-----------------------+
|
|
329 |
* | Kernel Data |
|
|
330 |
* 0xFFFFFFFF.FBC00000 |-----------------------|
|
|
331 |
* | Kernel Text |
|
|
332 |
* 0xFFFFFFFF.FB800000 |-----------------------|- KERNEL_TEXT
|
|
333 |
* | LUFS sinkhole |
|
|
334 |
* 0xFFFFFFFF.FB000000 -|-----------------------|- lufs_addr
|
|
335 |
* --- |-----------------------|- valloc_base + valloc_sz
|
|
336 |
* | early pp structures |
|
|
337 |
* | memsegs, memlists, |
|
|
338 |
* | page hash, etc. |
|
|
339 |
* --- |-----------------------|- valloc_base
|
|
340 |
* | ptable_va |
|
|
341 |
* --- |-----------------------|- ptable_va
|
|
342 |
* | Core heap | (used for loadable modules)
|
|
343 |
* 0xFFFFFFFF.C0000000 |-----------------------|- core_base / ekernelheap
|
|
344 |
* | Kernel |
|
|
345 |
* | heap |
|
|
346 |
* 0xFFFFFXXX.XXX00000 |-----------------------|- kernelheap (floating)
|
|
347 |
* | segkmap |
|
|
348 |
* 0xFFFFFXXX.XXX00000 |-----------------------|- segkmap_start (floating)
|
|
349 |
* | device mappings |
|
|
350 |
* 0xFFFFFXXX.XXX00000 |-----------------------|- toxic_addr (floating)
|
|
351 |
* | segkp |
|
|
352 |
* --- |-----------------------|- segkp_base
|
|
353 |
* | segkpm |
|
|
354 |
* 0xFFFFFE00.00000000 |-----------------------|
|
|
355 |
* | Red Zone |
|
|
356 |
* 0xFFFFFD80.00000000 |-----------------------|- KERNELBASE
|
|
357 |
* | User stack |- User space memory
|
|
358 |
* | |
|
|
359 |
* | shared objects, etc | (grows downwards)
|
|
360 |
* : :
|
|
361 |
* | |
|
|
362 |
* 0xFFFF8000.00000000 |-----------------------|
|
|
363 |
* | |
|
|
364 |
* | VA Hole / unused |
|
|
365 |
* | |
|
|
366 |
* 0x00008000.00000000 |-----------------------|
|
|
367 |
* | |
|
|
368 |
* | |
|
|
369 |
* : :
|
|
370 |
* | user heap | (grows upwards)
|
|
371 |
* | |
|
|
372 |
* | user data |
|
|
373 |
* |-----------------------|
|
|
374 |
* | user text |
|
|
375 |
* 0x00000000.04000000 |-----------------------|
|
|
376 |
* | invalid |
|
|
377 |
* 0x00000000.00000000 +-----------------------+
|
|
378 |
*
|
|
379 |
* A 32 bit app on the 64 bit kernel sees the same layout as on the 32 bit
|
|
380 |
* kernel, except that userlimit is raised to 0xfe000000
|
|
381 |
*
|
|
382 |
* Floating values:
|
|
383 |
*
|
|
384 |
* valloc_base: start of the kernel's memory management/tracking data
|
|
385 |
* structures. This region contains page_t structures for the lowest 4GB
|
|
386 |
* of physical memory, memsegs, memlists, and the page hash.
|
|
387 |
*
|
|
388 |
* core_base: start of the kernel's "core" heap area on 64-bit systems.
|
|
389 |
* This area is intended to be used for global data as well as for module
|
|
390 |
* text/data that does not fit into the nucleus pages. The core heap is
|
|
391 |
* restricted to a 2GB range, allowing every address within it to be
|
|
392 |
* accessed using rip-relative addressing
|
|
393 |
*
|
|
394 |
* ekernelheap: end of kernelheap and start of segmap.
|
|
395 |
*
|
|
396 |
* kernelheap: start of kernel heap. On 32-bit systems, this starts right
|
|
397 |
* above a red zone that separates the user's address space from the
|
|
398 |
* kernel's. On 64-bit systems, it sits above segkp and segkpm.
|
|
399 |
*
|
|
400 |
* segkmap_start: start of segmap. The length of segmap can be modified
|
|
401 |
* by changing segmapsize in /etc/system (preferred) or eeprom (deprecated).
|
|
402 |
* The default length is 16MB on 32-bit systems and 64MB on 64-bit systems.
|
|
403 |
*
|
|
404 |
* kernelbase: On a 32-bit kernel the default value of 0xd4000000 will be
|
|
405 |
* decreased by 2X the size required for page_t. This allows the kernel
|
|
406 |
* heap to grow in size with physical memory. With sizeof(page_t) == 80
|
|
407 |
* bytes, the following shows the values of kernelbase and kernel heap
|
|
408 |
* sizes for different memory configurations (assuming default segmap and
|
|
409 |
* segkp sizes).
|
|
410 |
*
|
|
411 |
* mem size for kernelbase kernel heap
|
|
412 |
* size page_t's size
|
|
413 |
* ---- --------- ---------- -----------
|
|
414 |
* 1gb 0x01400000 0xd1800000 684MB
|
|
415 |
* 2gb 0x02800000 0xcf000000 704MB
|
|
416 |
* 4gb 0x05000000 0xca000000 744MB
|
|
417 |
* 6gb 0x07800000 0xc5000000 784MB
|
|
418 |
* 8gb 0x0a000000 0xc0000000 824MB
|
|
419 |
* 16gb 0x14000000 0xac000000 984MB
|
|
420 |
* 32gb 0x28000000 0x84000000 1304MB
|
|
421 |
* 64gb 0x50000000 0x34000000 1944MB (*)
|
|
422 |
*
|
|
423 |
* kernelbase is less than the abi minimum of 0xc0000000 for memory
|
|
424 |
* configurations above 8gb.
|
|
425 |
*
|
|
426 |
* (*) support for memory configurations above 32gb will require manual tuning
|
|
427 |
* of kernelbase to balance out the need of user applications.
|
|
428 |
*/
|
|
429 |
|
|
430 |
void init_intr_threads(struct cpu *);
|
|
431 |
|
|
432 |
/*
|
|
433 |
* Dummy spl priority masks
|
|
434 |
*/
|
|
435 |
static unsigned char dummy_cpu_pri[MAXIPL + 1] = {
|
|
436 |
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
|
|
437 |
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf
|
|
438 |
};
|
|
439 |
|
|
440 |
/* real-time-clock initialization parameters */
|
|
441 |
long gmt_lag; /* offset in seconds of gmt to local time */
|
|
442 |
extern long process_rtc_config_file(void);
|
|
443 |
|
|
444 |
char *final_kernelheap;
|
|
445 |
char *boot_kernelheap;
|
|
446 |
uintptr_t kernelbase;
|
|
447 |
uintptr_t eprom_kernelbase;
|
|
448 |
size_t segmapsize;
|
|
449 |
static uintptr_t segmap_reserved;
|
|
450 |
uintptr_t segkmap_start;
|
|
451 |
int segmapfreelists;
|
|
452 |
pgcnt_t boot_npages;
|
|
453 |
pgcnt_t npages;
|
|
454 |
size_t core_size; /* size of "core" heap */
|
|
455 |
uintptr_t core_base; /* base address of "core" heap */
|
|
456 |
|
|
457 |
/*
|
|
458 |
* List of bootstrap pages. We mark these as allocated in startup.
|
|
459 |
* release_bootstrap() will free them when we're completely done with
|
|
460 |
* the bootstrap.
|
|
461 |
*/
|
|
462 |
static page_t *bootpages, *rd_pages;
|
|
463 |
|
|
464 |
struct system_hardware system_hardware;
|
|
465 |
|
|
466 |
/*
|
|
467 |
* Enable some debugging messages concerning memory usage...
|
|
468 |
*
|
|
469 |
* XX64 There should only be one print routine once memlist usage between
|
|
470 |
* vmx and the kernel is cleaned up and there is a single memlist structure
|
|
471 |
* shared between kernel and boot.
|
|
472 |
*/
|
|
473 |
static void
|
|
474 |
print_boot_memlist(char *title, struct memlist *mp)
|
|
475 |
{
|
|
476 |
prom_printf("MEMLIST: %s:\n", title);
|
|
477 |
while (mp != NULL) {
|
|
478 |
prom_printf("\tAddress 0x%" PRIx64 ", size 0x%" PRIx64 "\n",
|
|
479 |
mp->address, mp->size);
|
|
480 |
mp = mp->next;
|
|
481 |
}
|
|
482 |
}
|
|
483 |
|
|
484 |
static void
|
|
485 |
print_kernel_memlist(char *title, struct memlist *mp)
|
|
486 |
{
|
|
487 |
prom_printf("MEMLIST: %s:\n", title);
|
|
488 |
while (mp != NULL) {
|
|
489 |
prom_printf("\tAddress 0x%" PRIx64 ", size 0x%" PRIx64 "\n",
|
|
490 |
mp->address, mp->size);
|
|
491 |
mp = mp->next;
|
|
492 |
}
|
|
493 |
}
|
|
494 |
|
|
495 |
/*
|
|
496 |
* XX64 need a comment here.. are these just default values, surely
|
|
497 |
* we read the "cpuid" type information to figure this out.
|
|
498 |
*/
|
|
499 |
int l2cache_sz = 0x80000;
|
|
500 |
int l2cache_linesz = 0x40;
|
|
501 |
int l2cache_assoc = 1;
|
|
502 |
|
|
503 |
/*
|
|
504 |
* on 64 bit we use a predifined VA range for mapping devices in the kernel
|
|
505 |
* on 32 bit the mappings are intermixed in the heap, so we use a bit map
|
|
506 |
*/
|
|
507 |
#ifdef __amd64
|
|
508 |
|
|
509 |
vmem_t *device_arena;
|
|
510 |
uintptr_t toxic_addr = (uintptr_t)NULL;
|
|
511 |
size_t toxic_size = 1 * 1024 * 1024 * 1024; /* Sparc uses 1 gig too */
|
|
512 |
|
|
513 |
#else /* __i386 */
|
|
514 |
|
|
515 |
ulong_t *toxic_bit_map; /* one bit for each 4k of VA in heap_arena */
|
|
516 |
size_t toxic_bit_map_len = 0; /* in bits */
|
|
517 |
|
|
518 |
#endif /* __i386 */
|
|
519 |
|
|
520 |
/*
|
|
521 |
* Simple boot time debug facilities
|
|
522 |
*/
|
|
523 |
static char *prm_dbg_str[] = {
|
|
524 |
"%s:%d: '%s' is 0x%x\n",
|
|
525 |
"%s:%d: '%s' is 0x%llx\n"
|
|
526 |
};
|
|
527 |
|
|
528 |
int prom_debug;
|
|
529 |
|
|
530 |
#define PRM_DEBUG(q) if (prom_debug) \
|
|
531 |
prom_printf(prm_dbg_str[sizeof (q) >> 3], "startup.c", __LINE__, #q, q);
|
|
532 |
#define PRM_POINT(q) if (prom_debug) \
|
|
533 |
prom_printf("%s:%d: %s\n", "startup.c", __LINE__, q);
|
|
534 |
|
|
535 |
/*
|
|
536 |
* This structure is used to keep track of the intial allocations
|
|
537 |
* done in startup_memlist(). The value of NUM_ALLOCATIONS needs to
|
|
538 |
* be >= the number of ADD_TO_ALLOCATIONS() executed in the code.
|
|
539 |
*/
|
|
540 |
#define NUM_ALLOCATIONS 7
|
|
541 |
int num_allocations = 0;
|
|
542 |
struct {
|
|
543 |
void **al_ptr;
|
|
544 |
size_t al_size;
|
|
545 |
} allocations[NUM_ALLOCATIONS];
|
|
546 |
size_t valloc_sz = 0;
|
|
547 |
uintptr_t valloc_base;
|
|
548 |
extern uintptr_t ptable_va;
|
|
549 |
extern size_t ptable_sz;
|
|
550 |
|
|
551 |
#define ADD_TO_ALLOCATIONS(ptr, size) { \
|
|
552 |
size = ROUND_UP_PAGE(size); \
|
|
553 |
if (num_allocations == NUM_ALLOCATIONS) \
|
|
554 |
panic("too many ADD_TO_ALLOCATIONS()"); \
|
|
555 |
allocations[num_allocations].al_ptr = (void**)&ptr; \
|
|
556 |
allocations[num_allocations].al_size = size; \
|
|
557 |
valloc_sz += size; \
|
|
558 |
++num_allocations; \
|
|
559 |
}
|
|
560 |
|
|
561 |
static void
|
|
562 |
perform_allocations(void)
|
|
563 |
{
|
|
564 |
caddr_t mem;
|
|
565 |
int i;
|
|
566 |
|
|
567 |
mem = BOP_ALLOC(bootops, (caddr_t)valloc_base, valloc_sz, BO_NO_ALIGN);
|
|
568 |
if (mem != (caddr_t)valloc_base)
|
|
569 |
panic("BOP_ALLOC() failed");
|
|
570 |
bzero(mem, valloc_sz);
|
|
571 |
for (i = 0; i < num_allocations; ++i) {
|
|
572 |
*allocations[i].al_ptr = (void *)mem;
|
|
573 |
mem += allocations[i].al_size;
|
|
574 |
}
|
|
575 |
}
|
|
576 |
|
|
577 |
/*
|
|
578 |
* Our world looks like this at startup time.
|
|
579 |
*
|
|
580 |
* In a 32-bit OS, boot loads the kernel text at 0xfe800000 and kernel data
|
|
581 |
* at 0xfec00000. On a 64-bit OS, kernel text and data are loaded at
|
|
582 |
* 0xffffffff.fe800000 and 0xffffffff.fec00000 respectively. Those
|
|
583 |
* addresses are fixed in the binary at link time.
|
|
584 |
*
|
|
585 |
* On the text page:
|
|
586 |
* unix/genunix/krtld/module text loads.
|
|
587 |
*
|
|
588 |
* On the data page:
|
|
589 |
* unix/genunix/krtld/module data loads and space for page_t's.
|
|
590 |
*/
|
|
591 |
/*
|
|
592 |
* Machine-dependent startup code
|
|
593 |
*/
|
|
594 |
void
|
|
595 |
startup(void)
|
|
596 |
{
|
|
597 |
extern void startup_bios_disk();
|
|
598 |
/*
|
|
599 |
* Make sure that nobody tries to use sekpm until we have
|
|
600 |
* initialized it properly.
|
|
601 |
*/
|
|
602 |
#if defined(__amd64)
|
|
603 |
kpm_desired = kpm_enable;
|
|
604 |
#endif
|
|
605 |
kpm_enable = 0;
|
|
606 |
|
|
607 |
progressbar_init();
|
|
608 |
startup_init();
|
|
609 |
startup_memlist();
|
|
610 |
startup_modules();
|
|
611 |
startup_bios_disk();
|
|
612 |
startup_bop_gone();
|
|
613 |
startup_vm();
|
|
614 |
startup_end();
|
|
615 |
progressbar_start();
|
|
616 |
}
|
|
617 |
|
|
618 |
static void
|
|
619 |
startup_init()
|
|
620 |
{
|
|
621 |
PRM_POINT("startup_init() starting...");
|
|
622 |
|
|
623 |
/*
|
|
624 |
* Complete the extraction of cpuid data
|
|
625 |
*/
|
|
626 |
cpuid_pass2(CPU);
|
|
627 |
|
|
628 |
(void) check_boot_version(BOP_GETVERSION(bootops));
|
|
629 |
|
|
630 |
/*
|
|
631 |
* Check for prom_debug in boot environment
|
|
632 |
*/
|
|
633 |
if (BOP_GETPROPLEN(bootops, "prom_debug") >= 0) {
|
|
634 |
++prom_debug;
|
|
635 |
PRM_POINT("prom_debug found in boot enviroment");
|
|
636 |
}
|
|
637 |
|
|
638 |
/*
|
|
639 |
* Collect node, cpu and memory configuration information.
|
|
640 |
*/
|
|
641 |
get_system_configuration();
|
|
642 |
|
|
643 |
/*
|
|
644 |
* Halt if this is an unsupported processor.
|
|
645 |
*/
|
|
646 |
if (x86_type == X86_TYPE_486 || x86_type == X86_TYPE_CYRIX_486) {
|
|
647 |
printf("\n486 processor (\"%s\") detected.\n",
|
|
648 |
CPU->cpu_brandstr);
|
|
649 |
halt("This processor is not supported by this release "
|
|
650 |
"of Solaris.");
|
|
651 |
}
|
|
652 |
|
|
653 |
/*
|
|
654 |
* Set up dummy values till psm spl code installed
|
|
655 |
*/
|
|
656 |
CPU->cpu_pri_data = dummy_cpu_pri;
|
|
657 |
|
|
658 |
PRM_POINT("startup_init() done");
|
|
659 |
}
|
|
660 |
|
|
661 |
/*
|
|
662 |
* Callback for copy_memlist_filter() to filter nucleus, kadb/kmdb, (ie.
|
|
663 |
* everything mapped above KERNEL_TEXT) pages from phys_avail. Note it
|
|
664 |
* also filters out physical page zero. There is some reliance on the
|
|
665 |
* boot loader allocating only a few contiguous physical memory chunks.
|
|
666 |
*/
|
|
667 |
static void
|
|
668 |
avail_filter(uint64_t *addr, uint64_t *size)
|
|
669 |
{
|
|
670 |
uintptr_t va;
|
|
671 |
uintptr_t next_va;
|
|
672 |
pfn_t pfn;
|
|
673 |
uint64_t pfn_addr;
|
|
674 |
uint64_t pfn_eaddr;
|
|
675 |
uint_t prot;
|
|
676 |
size_t len;
|
|
677 |
uint_t change;
|
|
678 |
|
|
679 |
if (prom_debug)
|
|
680 |
prom_printf("\tFilter: in: a=%" PRIx64 ", s=%" PRIx64 "\n",
|
|
681 |
*addr, *size);
|
|
682 |
|
|
683 |
/*
|
|
684 |
* page zero is required for BIOS.. never make it available
|
|
685 |
*/
|
|
686 |
if (*addr == 0) {
|
|
687 |
*addr += MMU_PAGESIZE;
|
|
688 |
*size -= MMU_PAGESIZE;
|
|
689 |
}
|
|
690 |
|
|
691 |
/*
|
|
692 |
* First we trim from the front of the range. Since hat_boot_probe()
|
|
693 |
* walks ranges in virtual order, but addr/size are physical, we need
|
|
694 |
* to the list until no changes are seen. This deals with the case
|
|
695 |
* where page "p" is mapped at v, page "p + PAGESIZE" is mapped at w
|
|
696 |
* but w < v.
|
|
697 |
*/
|
|
698 |
do {
|
|
699 |
change = 0;
|
|
700 |
for (va = KERNEL_TEXT;
|
|
701 |
*size > 0 && hat_boot_probe(&va, &len, &pfn, &prot) != 0;
|
|
702 |
va = next_va) {
|
|
703 |
|
|
704 |
next_va = va + len;
|
|
705 |
pfn_addr = ptob((uint64_t)pfn);
|
|
706 |
pfn_eaddr = pfn_addr + len;
|
|
707 |
|
|
708 |
if (pfn_addr <= *addr && pfn_eaddr > *addr) {
|
|
709 |
change = 1;
|
|
710 |
while (*size > 0 && len > 0) {
|
|
711 |
*addr += MMU_PAGESIZE;
|
|
712 |
*size -= MMU_PAGESIZE;
|
|
713 |
len -= MMU_PAGESIZE;
|
|
714 |
}
|
|
715 |
}
|
|
716 |
}
|
|
717 |
if (change && prom_debug)
|
|
718 |
prom_printf("\t\ttrim: a=%" PRIx64 ", s=%" PRIx64 "\n",
|
|
719 |
*addr, *size);
|
|
720 |
} while (change);
|
|
721 |
|
|
722 |
/*
|
|
723 |
* Trim pages from the end of the range.
|
|
724 |
*/
|
|
725 |
for (va = KERNEL_TEXT;
|
|
726 |
*size > 0 && hat_boot_probe(&va, &len, &pfn, &prot) != 0;
|
|
727 |
va = next_va) {
|
|
728 |
|
|
729 |
next_va = va + len;
|
|
730 |
pfn_addr = ptob((uint64_t)pfn);
|
|
731 |
|
|
732 |
if (pfn_addr >= *addr && pfn_addr < *addr + *size)
|
|
733 |
*size = pfn_addr - *addr;
|
|
734 |
}
|
|
735 |
|
|
736 |
if (prom_debug)
|
|
737 |
prom_printf("\tFilter out: a=%" PRIx64 ", s=%" PRIx64 "\n",
|
|
738 |
*addr, *size);
|
|
739 |
}
|
|
740 |
|
|
741 |
static void
|
|
742 |
kpm_init()
|
|
743 |
{
|
|
744 |
struct segkpm_crargs b;
|
|
745 |
uintptr_t start, end;
|
|
746 |
struct memlist *pmem;
|
|
747 |
|
|
748 |
/*
|
|
749 |
* These variables were all designed for sfmmu in which segkpm is
|
|
750 |
* mapped using a single pagesize - either 8KB or 4MB. On x86, we
|
|
751 |
* might use 2+ page sizes on a single machine, so none of these
|
|
752 |
* variables have a single correct value. They are set up as if we
|
|
753 |
* always use a 4KB pagesize, which should do no harm. In the long
|
|
754 |
* run, we should get rid of KPM's assumption that only a single
|
|
755 |
* pagesize is used.
|
|
756 |
*/
|
|
757 |
kpm_pgshft = MMU_PAGESHIFT;
|
|
758 |
kpm_pgsz = MMU_PAGESIZE;
|
|
759 |
kpm_pgoff = MMU_PAGEOFFSET;
|
|
760 |
kpmp2pshft = 0;
|
|
761 |
kpmpnpgs = 1;
|
|
762 |
ASSERT(((uintptr_t)kpm_vbase & (kpm_pgsz - 1)) == 0);
|
|
763 |
|
|
764 |
PRM_POINT("about to create segkpm");
|
|
765 |
rw_enter(&kas.a_lock, RW_WRITER);
|
|
766 |
|
|
767 |
if (seg_attach(&kas, kpm_vbase, kpm_size, segkpm) < 0)
|
|
768 |
panic("cannot attach segkpm");
|
|
769 |
|
|
770 |
b.prot = PROT_READ | PROT_WRITE;
|
|
771 |
b.nvcolors = 1;
|
|
772 |
|
|
773 |
if (segkpm_create(segkpm, (caddr_t)&b) != 0)
|
|
774 |
panic("segkpm_create segkpm");
|
|
775 |
|
|
776 |
rw_exit(&kas.a_lock);
|
|
777 |
|
|
778 |
/*
|
|
779 |
* Map each of the memsegs into the kpm segment, coalesing adjacent
|
|
780 |
* memsegs to allow mapping with the largest possible pages.
|
|
781 |
*/
|
|
782 |
pmem = phys_install;
|
|
783 |
start = pmem->address;
|
|
784 |
end = start + pmem->size;
|
|
785 |
for (;;) {
|
|
786 |
if (pmem == NULL || pmem->address > end) {
|
|
787 |
hat_devload(kas.a_hat, kpm_vbase + start,
|
|
788 |
end - start, mmu_btop(start),
|
|
789 |
PROT_READ | PROT_WRITE,
|
|
790 |
HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
|
|
791 |
if (pmem == NULL)
|
|
792 |
break;
|
|
793 |
start = pmem->address;
|
|
794 |
}
|
|
795 |
end = pmem->address + pmem->size;
|
|
796 |
pmem = pmem->next;
|
|
797 |
}
|
|
798 |
}
|
|
799 |
|
|
800 |
/*
|
|
801 |
* The purpose of startup memlist is to get the system to the
|
|
802 |
* point where it can use kmem_alloc()'s that operate correctly
|
|
803 |
* relying on BOP_ALLOC(). This includes allocating page_ts,
|
|
804 |
* page hash table, vmem initialized, etc.
|
|
805 |
*
|
|
806 |
* Boot's versions of physinstalled and physavail are insufficient for
|
|
807 |
* the kernel's purposes. Specifically we don't know which pages that
|
|
808 |
* are not in physavail can be reclaimed after boot is gone.
|
|
809 |
*
|
|
810 |
* This code solves the problem by dividing the address space
|
|
811 |
* into 3 regions as it takes over the MMU from the booter.
|
|
812 |
*
|
|
813 |
* 1) Any (non-nucleus) pages that are mapped at addresses above KERNEL_TEXT
|
|
814 |
* can not be used by the kernel.
|
|
815 |
*
|
|
816 |
* 2) Any free page that happens to be mapped below kernelbase
|
|
817 |
* is protected until the boot loader is released, but will then be reclaimed.
|
|
818 |
*
|
|
819 |
* 3) Boot shouldn't use any address in the remaining area between kernelbase
|
|
820 |
* and KERNEL_TEXT.
|
|
821 |
*
|
|
822 |
* In the case of multiple mappings to the same page, region 1 has precedence
|
|
823 |
* over region 2.
|
|
824 |
*/
|
|
825 |
static void
|
|
826 |
startup_memlist(void)
|
|
827 |
{
|
|
828 |
size_t memlist_sz;
|
|
829 |
size_t memseg_sz;
|
|
830 |
size_t pagehash_sz;
|
|
831 |
size_t pp_sz;
|
|
832 |
uintptr_t va;
|
|
833 |
size_t len;
|
|
834 |
uint_t prot;
|
|
835 |
pfn_t pfn;
|
|
836 |
int memblocks;
|
|
837 |
caddr_t pagecolor_mem;
|
|
838 |
size_t pagecolor_memsz;
|
|
839 |
caddr_t page_ctrs_mem;
|
|
840 |
size_t page_ctrs_size;
|
|
841 |
struct memlist *current;
|
|
842 |
extern void startup_build_mem_nodes(struct memlist *);
|
|
843 |
|
|
844 |
/* XX64 fix these - they should be in include files */
|
|
845 |
extern ulong_t cr4_value;
|
|
846 |
extern size_t page_coloring_init(uint_t, int, int);
|
|
847 |
extern void page_coloring_setup(caddr_t);
|
|
848 |
|
|
849 |
PRM_POINT("startup_memlist() starting...");
|
|
850 |
|
|
851 |
/*
|
|
852 |
* Take the most current snapshot we can by calling mem-update.
|
|
853 |
* For this to work properly, we first have to ask boot for its
|
|
854 |
* end address.
|
|
855 |
*/
|
|
856 |
if (BOP_GETPROPLEN(bootops, "memory-update") == 0)
|
|
857 |
(void) BOP_GETPROP(bootops, "memory-update", NULL);
|
|
858 |
|
|
859 |
/*
|
|
860 |
* find if the kernel is mapped on a large page
|
|
861 |
*/
|
|
862 |
va = KERNEL_TEXT;
|
|
863 |
if (hat_boot_probe(&va, &len, &pfn, &prot) == 0)
|
|
864 |
panic("Couldn't find kernel text boot mapping");
|
|
865 |
|
|
866 |
/*
|
|
867 |
* Use leftover large page nucleus text/data space for loadable modules.
|
|
868 |
* Use at most MODTEXT/MODDATA.
|
|
869 |
*/
|
|
870 |
if (len > MMU_PAGESIZE) {
|
|
871 |
|
|
872 |
moddata = (caddr_t)ROUND_UP_PAGE(e_data);
|
|
873 |
e_moddata = (caddr_t)ROUND_UP_4MEG(e_data);
|
|
874 |
if (e_moddata - moddata > MODDATA)
|
|
875 |
e_moddata = moddata + MODDATA;
|
|
876 |
|
|
877 |
modtext = (caddr_t)ROUND_UP_PAGE(e_text);
|
|
878 |
e_modtext = (caddr_t)ROUND_UP_4MEG(e_text);
|
|
879 |
if (e_modtext - modtext > MODTEXT)
|
|
880 |
e_modtext = modtext + MODTEXT;
|
|
881 |
|
|
882 |
|
|
883 |
} else {
|
|
884 |
|
|
885 |
PRM_POINT("Kernel NOT loaded on Large Page!");
|
|
886 |
e_moddata = moddata = (caddr_t)ROUND_UP_PAGE(e_data);
|
|
887 |
e_modtext = modtext = (caddr_t)ROUND_UP_PAGE(e_text);
|
|
888 |
|
|
889 |
}
|
|
890 |
econtig = e_moddata;
|
|
891 |
|
|
892 |
PRM_DEBUG(modtext);
|
|
893 |
PRM_DEBUG(e_modtext);
|
|
894 |
PRM_DEBUG(moddata);
|
|
895 |
PRM_DEBUG(e_moddata);
|
|
896 |
PRM_DEBUG(econtig);
|
|
897 |
|
|
898 |
/*
|
|
899 |
* For MP machines cr4_value must be set or the non-boot
|
|
900 |
* CPUs will not be able to start.
|
|
901 |
*/
|
|
902 |
if (x86_feature & X86_LARGEPAGE)
|
|
903 |
cr4_value = getcr4();
|
|
904 |
PRM_DEBUG(cr4_value);
|
|
905 |
|
|
906 |
/*
|
|
907 |
* Examine the boot loaders physical memory map to find out:
|
|
908 |
* - total memory in system - physinstalled
|
|
909 |
* - the max physical address - physmax
|
|
910 |
* - the number of segments the intsalled memory comes in
|
|
911 |
*/
|
|
912 |
if (prom_debug)
|
|
913 |
print_boot_memlist("boot physinstalled",
|
|
914 |
bootops->boot_mem->physinstalled);
|
|
915 |
installed_top_size(bootops->boot_mem->physinstalled, &physmax,
|
|
916 |
&physinstalled, &memblocks);
|
|
917 |
PRM_DEBUG(physmax);
|
|
918 |
PRM_DEBUG(physinstalled);
|
|
919 |
PRM_DEBUG(memblocks);
|
|
920 |
|
|
921 |
if (prom_debug)
|
|
922 |
print_boot_memlist("boot physavail",
|
|
923 |
bootops->boot_mem->physavail);
|
|
924 |
|
|
925 |
/*
|
|
926 |
* Initialize hat's mmu parameters.
|
|
927 |
* Check for enforce-prot-exec in boot environment. It's used to
|
|
928 |
* enable/disable support for the page table entry NX bit.
|
|
929 |
* The default is to enforce PROT_EXEC on processors that support NX.
|
|
930 |
* Boot seems to round up the "len", but 8 seems to be big enough.
|
|
931 |
*/
|
|
932 |
mmu_init();
|
|
933 |
|
|
934 |
#ifdef __i386
|
|
935 |
/*
|
|
936 |
* physmax is lowered if there is more memory than can be
|
|
937 |
* physically addressed in 32 bit (PAE/non-PAE) modes.
|
|
938 |
*/
|
|
939 |
if (mmu.pae_hat) {
|
|
940 |
if (PFN_ABOVE64G(physmax)) {
|
|
941 |
physinstalled -= (physmax - (PFN_64G - 1));
|
|
942 |
physmax = PFN_64G - 1;
|
|
943 |
}
|
|
944 |
} else {
|
|
945 |
if (PFN_ABOVE4G(physmax)) {
|
|
946 |
physinstalled -= (physmax - (PFN_4G - 1));
|
|
947 |
physmax = PFN_4G - 1;
|
|
948 |
}
|
|
949 |
}
|
|
950 |
#endif
|
|
951 |
|
|
952 |
startup_build_mem_nodes(bootops->boot_mem->physinstalled);
|
|
953 |
|
|
954 |
if (BOP_GETPROPLEN(bootops, "enforce-prot-exec") >= 0) {
|
|
955 |
int len = BOP_GETPROPLEN(bootops, "enforce-prot-exec");
|
|
956 |
char value[8];
|
|
957 |
|
|
958 |
if (len < 8)
|
|
959 |
(void) BOP_GETPROP(bootops, "enforce-prot-exec", value);
|
|
960 |
else
|
|
961 |
(void) strcpy(value, "");
|
|
962 |
if (strcmp(value, "off") == 0)
|
|
963 |
mmu.pt_nx = 0;
|
|
964 |
}
|
|
965 |
PRM_DEBUG(mmu.pt_nx);
|
|
966 |
|
|
967 |
/*
|
|
968 |
* We will need page_t's for every page in the system, except for
|
|
969 |
* memory mapped at or above above the start of the kernel text segment.
|
|
970 |
*
|
|
971 |
* pages above e_modtext are attributed to kernel debugger (obp_pages)
|
|
972 |
*/
|
|
973 |
npages = physinstalled - 1; /* avail_filter() skips page 0, so "- 1" */
|
|
974 |
obp_pages = 0;
|
|
975 |
va = KERNEL_TEXT;
|
|
976 |
while (hat_boot_probe(&va, &len, &pfn, &prot) != 0) {
|
|
977 |
npages -= len >> MMU_PAGESHIFT;
|
|
978 |
if (va >= (uintptr_t)e_moddata)
|
|
979 |
obp_pages += len >> MMU_PAGESHIFT;
|
|
980 |
va += len;
|
|
981 |
}
|
|
982 |
PRM_DEBUG(npages);
|
|
983 |
PRM_DEBUG(obp_pages);
|
|
984 |
|
|
985 |
/*
|
|
986 |
* If physmem is patched to be non-zero, use it instead of
|
|
987 |
* the computed value unless it is larger than the real
|
|
988 |
* amount of memory on hand.
|
|
989 |
*/
|
|
990 |
if (physmem == 0 || physmem > npages)
|
|
991 |
physmem = npages;
|
|
992 |
else
|
|
993 |
npages = physmem;
|
|
994 |
PRM_DEBUG(physmem);
|
|
995 |
|
|
996 |
/*
|
|
997 |
* We now compute the sizes of all the initial allocations for
|
|
998 |
* structures the kernel needs in order do kmem_alloc(). These
|
|
999 |
* include:
|
|
1000 |
* memsegs
|
|
1001 |
* memlists
|
|
1002 |
* page hash table
|
|
1003 |
* page_t's
|
|
1004 |
* page coloring data structs
|
|
1005 |
*/
|
|
1006 |
memseg_sz = sizeof (struct memseg) * (memblocks + POSS_NEW_FRAGMENTS);
|
|
1007 |
ADD_TO_ALLOCATIONS(memseg_base, memseg_sz);
|
|
1008 |
PRM_DEBUG(memseg_sz);
|
|
1009 |
|
|
1010 |
/*
|
|
1011 |
* Reserve space for phys_avail/phys_install memlists.
|
|
1012 |
* There's no real good way to know exactly how much room we'll need,
|
|
1013 |
* but this should be a good upper bound.
|
|
1014 |
*/
|
|
1015 |
memlist_sz = ROUND_UP_PAGE(2 * sizeof (struct memlist) *
|
|
1016 |
(memblocks + POSS_NEW_FRAGMENTS));
|
|
1017 |
ADD_TO_ALLOCATIONS(memlist, memlist_sz);
|
|
1018 |
PRM_DEBUG(memlist_sz);
|
|
1019 |
|
|
1020 |
/*
|
|
1021 |
* The page structure hash table size is a power of 2
|
|
1022 |
* such that the average hash chain length is PAGE_HASHAVELEN.
|
|
1023 |
*/
|
|
1024 |
page_hashsz = npages / PAGE_HASHAVELEN;
|
|
1025 |
page_hashsz = 1 << highbit(page_hashsz);
|
|
1026 |
pagehash_sz = sizeof (struct page *) * page_hashsz;
|
|
1027 |
ADD_TO_ALLOCATIONS(page_hash, pagehash_sz);
|
|
1028 |
PRM_DEBUG(pagehash_sz);
|
|
1029 |
|
|
1030 |
/*
|
|
1031 |
* Set aside room for the page structures themselves. Note: on
|
|
1032 |
* 64-bit systems we don't allocate page_t's for every page here.
|
|
1033 |
* We just allocate enough to map the lowest 4GB of physical
|
|
1034 |
* memory, minus those pages that are used for the "nucleus" kernel
|
|
1035 |
* text and data. The remaining pages are allocated once we can
|
|
1036 |
* map around boot.
|
|
1037 |
*
|
|
1038 |
* boot_npages is used to allocate an area big enough for our
|
|
1039 |
* initial page_t's. kphym_init may use less than that.
|
|
1040 |
*/
|
|
1041 |
boot_npages = npages;
|
|
1042 |
#if defined(__amd64)
|
|
1043 |
if (npages > mmu_btop(FOURGB - (econtig - s_text)))
|
|
1044 |
boot_npages = mmu_btop(FOURGB - (econtig - s_text));
|
|
1045 |
#endif
|
|
1046 |
PRM_DEBUG(boot_npages);
|
|
1047 |
pp_sz = sizeof (struct page) * boot_npages;
|
|
1048 |
ADD_TO_ALLOCATIONS(pp_base, pp_sz);
|
|
1049 |
PRM_DEBUG(pp_sz);
|
|
1050 |
|
|
1051 |
/*
|
|
1052 |
* determine l2 cache info and memory size for page coloring
|
|
1053 |
*/
|
|
1054 |
(void) getl2cacheinfo(CPU,
|
|
1055 |
&l2cache_sz, &l2cache_linesz, &l2cache_assoc);
|
|
1056 |
pagecolor_memsz =
|
|
1057 |
page_coloring_init(l2cache_sz, l2cache_linesz, l2cache_assoc);
|
|
1058 |
ADD_TO_ALLOCATIONS(pagecolor_mem, pagecolor_memsz);
|
|
1059 |
PRM_DEBUG(pagecolor_memsz);
|
|
1060 |
|
|
1061 |
page_ctrs_size = page_ctrs_sz();
|
|
1062 |
ADD_TO_ALLOCATIONS(page_ctrs_mem, page_ctrs_size);
|
|
1063 |
PRM_DEBUG(page_ctrs_size);
|
|
1064 |
|
|
1065 |
/*
|
|
1066 |
* valloc_base will be below kernel text
|
|
1067 |
* The extra pages are for the HAT and kmdb to map page tables.
|
|
1068 |
*/
|
|
1069 |
valloc_sz = ROUND_UP_LPAGE(valloc_sz);
|
|
1070 |
valloc_base = KERNEL_TEXT - valloc_sz;
|
|
1071 |
PRM_DEBUG(valloc_base);
|
|
1072 |
ptable_va = valloc_base - ptable_sz;
|
|
1073 |
|
|
1074 |
#if defined(__amd64)
|
|
1075 |
if (eprom_kernelbase && eprom_kernelbase != KERNELBASE)
|
|
1076 |
cmn_err(CE_NOTE, "!kernelbase cannot be changed on 64-bit "
|
|
1077 |
"systems.");
|
|
1078 |
kernelbase = (uintptr_t)KERNELBASE;
|
|
1079 |
core_base = (uintptr_t)COREHEAP_BASE;
|
|
1080 |
core_size = ptable_va - core_base;
|
|
1081 |
#else /* __i386 */
|
|
1082 |
/*
|
|
1083 |
* We configure kernelbase based on:
|
|
1084 |
*
|
|
1085 |
* 1. user specified kernelbase via eeprom command. Value cannot exceed
|
|
1086 |
* KERNELBASE_MAX. we large page align eprom_kernelbase
|
|
1087 |
*
|
|
1088 |
* 2. Default to KERNELBASE and adjust to 2X less the size for page_t.
|
|
1089 |
* On large memory systems we must lower kernelbase to allow
|
|
1090 |
* enough room for page_t's for all of memory.
|
|
1091 |
*
|
|
1092 |
* The value set here, might be changed a little later.
|
|
1093 |
*/
|
|
1094 |
if (eprom_kernelbase) {
|
|
1095 |
kernelbase = eprom_kernelbase & mmu.level_mask[1];
|
|
1096 |
if (kernelbase > KERNELBASE_MAX)
|
|
1097 |
kernelbase = KERNELBASE_MAX;
|
|
1098 |
} else {
|
|
1099 |
kernelbase = (uintptr_t)KERNELBASE;
|
|
1100 |
kernelbase -= ROUND_UP_4MEG(2 * valloc_sz);
|
|
1101 |
}
|
|
1102 |
ASSERT((kernelbase & mmu.level_offset[1]) == 0);
|
|
1103 |
core_base = ptable_va;
|
|
1104 |
core_size = 0;
|
|
1105 |
#endif
|
|
1106 |
|
|
1107 |
PRM_DEBUG(kernelbase);
|
|
1108 |
PRM_DEBUG(core_base);
|
|
1109 |
PRM_DEBUG(core_size);
|
|
1110 |
|
|
1111 |
/*
|
|
1112 |
* At this point, we can only use a portion of the kernelheap that
|
|
1113 |
* will be available after we boot. Both 32-bit and 64-bit systems
|
|
1114 |
* have this limitation, although the reasons are completely
|
|
1115 |
* different.
|
|
1116 |
*
|
|
1117 |
* On 64-bit systems, the booter only supports allocations in the
|
|
1118 |
* upper 4GB of memory, so we have to work with a reduced kernel
|
|
1119 |
* heap until we take over all allocations. The booter also sits
|
|
1120 |
* in the lower portion of that 4GB range, so we have to raise the
|
|
1121 |
* bottom of the heap even further.
|
|
1122 |
*
|
|
1123 |
* On 32-bit systems we have to leave room to place segmap below
|
|
1124 |
* the heap. We don't yet know how large segmap will be, so we
|
|
1125 |
* have to be very conservative.
|
|
1126 |
*/
|
|
1127 |
#if defined(__amd64)
|
|
1128 |
/*
|
|
1129 |
* XX64: For now, we let boot have the lower 2GB of the top 4GB
|
|
1130 |
* address range. In the long run, that should be fixed. It's
|
|
1131 |
* insane for a booter to need 2 2GB address ranges.
|
|
1132 |
*/
|
|
1133 |
boot_kernelheap = (caddr_t)(BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE);
|
|
1134 |
segmap_reserved = 0;
|
|
1135 |
|
|
1136 |
#else /* __i386 */
|
|
1137 |
segkp_fromheap = 1;
|
|
1138 |
segmap_reserved = ROUND_UP_LPAGE(MAX(segmapsize, SEGMAPMAX));
|
|
1139 |
boot_kernelheap = (caddr_t)(ROUND_UP_LPAGE(kernelbase) +
|
|
1140 |
segmap_reserved);
|
|
1141 |
#endif
|
|
1142 |
PRM_DEBUG(boot_kernelheap);
|
|
1143 |
kernelheap = boot_kernelheap;
|
|
1144 |
ekernelheap = (char *)core_base;
|
|
1145 |
|
|
1146 |
/*
|
|
1147 |
* If segmap is too large we can push the bottom of the kernel heap
|
|
1148 |
* higher than the base. Or worse, it could exceed the top of the
|
|
1149 |
* VA space entirely, causing it to wrap around.
|
|
1150 |
*/
|
|
1151 |
if (kernelheap >= ekernelheap || (uintptr_t)kernelheap < kernelbase)
|
|
1152 |
panic("too little memory available for kernelheap,"
|
|
1153 |
" use a different kernelbase");
|
|
1154 |
|
|
1155 |
/*
|
|
1156 |
* Now that we know the real value of kernelbase,
|
|
1157 |
* update variables that were initialized with a value of
|
|
1158 |
* KERNELBASE (in common/conf/param.c).
|
|
1159 |
*
|
|
1160 |
* XXX The problem with this sort of hackery is that the
|
|
1161 |
* compiler just may feel like putting the const declarations
|
|
1162 |
* (in param.c) into the .text section. Perhaps they should
|
|
1163 |
* just be declared as variables there?
|
|
1164 |
*/
|
|
1165 |
|
|
1166 |
#if defined(__amd64)
|
|
1167 |
ASSERT(_kernelbase == KERNELBASE);
|
|
1168 |
ASSERT(_userlimit == USERLIMIT);
|
|
1169 |
/*
|
|
1170 |
* As one final sanity check, verify that the "red zone" between
|
|
1171 |
* kernel and userspace is exactly the size we expected.
|
|
1172 |
*/
|
|
1173 |
ASSERT(_kernelbase == (_userlimit + (2 * 1024 * 1024)));
|
|
1174 |
#else
|
|
1175 |
*(uintptr_t *)&_kernelbase = kernelbase;
|
|
1176 |
*(uintptr_t *)&_userlimit = kernelbase;
|
|
1177 |
*(uintptr_t *)&_userlimit32 = _userlimit;
|
|
1178 |
#endif
|
|
1179 |
PRM_DEBUG(_kernelbase);
|
|
1180 |
PRM_DEBUG(_userlimit);
|
|
1181 |
PRM_DEBUG(_userlimit32);
|
|
1182 |
|
|
1183 |
/*
|
|
1184 |
* do all the initial allocations
|
|
1185 |
*/
|
|
1186 |
perform_allocations();
|
|
1187 |
|
|
1188 |
/*
|
|
1189 |
* Initialize the kernel heap. Note 3rd argument must be > 1st.
|
|
1190 |
*/
|
|
1191 |
kernelheap_init(kernelheap, ekernelheap, kernelheap + MMU_PAGESIZE,
|
|
1192 |
(void *)core_base, (void *)ptable_va);
|
|
1193 |
|
|
1194 |
/*
|
|
1195 |
* Build phys_install and phys_avail in kernel memspace.
|
|
1196 |
* - phys_install should be all memory in the system.
|
|
1197 |
* - phys_avail is phys_install minus any memory mapped before this
|
|
1198 |
* point above KERNEL_TEXT.
|
|
1199 |
*/
|
|
1200 |
current = phys_install = memlist;
|
|
1201 |
copy_memlist_filter(bootops->boot_mem->physinstalled, ¤t, NULL);
|
|
1202 |
if ((caddr_t)current > (caddr_t)memlist + memlist_sz)
|
|
1203 |
panic("physinstalled was too big!");
|
|
1204 |
if (prom_debug)
|
|
1205 |
print_kernel_memlist("phys_install", phys_install);
|
|
1206 |
|
|
1207 |
phys_avail = current;
|
|
1208 |
PRM_POINT("Building phys_avail:\n");
|
|
1209 |
copy_memlist_filter(bootops->boot_mem->physinstalled, ¤t,
|
|
1210 |
avail_filter);
|
|
1211 |
if ((caddr_t)current > (caddr_t)memlist + memlist_sz)
|
|
1212 |
panic("physavail was too big!");
|
|
1213 |
if (prom_debug)
|
|
1214 |
print_kernel_memlist("phys_avail", phys_avail);
|
|
1215 |
|
|
1216 |
/*
|
|
1217 |
* setup page coloring
|
|
1218 |
*/
|
|
1219 |
page_coloring_setup(pagecolor_mem);
|
|
1220 |
page_lock_init(); /* currently a no-op */
|
|
1221 |
|
|
1222 |
/*
|
|
1223 |
* free page list counters
|
|
1224 |
*/
|
|
1225 |
(void) page_ctrs_alloc(page_ctrs_mem);
|
|
1226 |
|
|
1227 |
/*
|
|
1228 |
* Initialize the page structures from the memory lists.
|
|
1229 |
*/
|
|
1230 |
availrmem_initial = availrmem = freemem = 0;
|
|
1231 |
PRM_POINT("Calling kphysm_init()...");
|
|
1232 |
boot_npages = kphysm_init(pp_base, memseg_base, 0, boot_npages);
|
|
1233 |
PRM_POINT("kphysm_init() done");
|
|
1234 |
PRM_DEBUG(boot_npages);
|
|
1235 |
|
|
1236 |
/*
|
|
1237 |
* Now that page_t's have been initialized, remove all the
|
|
1238 |
* initial allocation pages from the kernel free page lists.
|
|
1239 |
*/
|
|
1240 |
boot_mapin((caddr_t)valloc_base, valloc_sz);
|
|
1241 |
|
|
1242 |
/*
|
|
1243 |
* Initialize kernel memory allocator.
|
|
1244 |
*/
|
|
1245 |
kmem_init();
|
|
1246 |
|
|
1247 |
/*
|
|
1248 |
* print this out early so that we know what's going on
|
|
1249 |
*/
|
|
1250 |
cmn_err(CE_CONT, "?features: %b\n", x86_feature, FMT_X86_FEATURE);
|
|
1251 |
|
|
1252 |
/*
|
|
1253 |
* Initialize bp_mapin().
|
|
1254 |
*/
|
|
1255 |
bp_init(MMU_PAGESIZE, HAT_STORECACHING_OK);
|
|
1256 |
|
|
1257 |
#if defined(__i386)
|
|
1258 |
if (eprom_kernelbase && (eprom_kernelbase != kernelbase))
|
|
1259 |
cmn_err(CE_WARN, "kernelbase value, User specified 0x%lx, "
|
|
1260 |
"System using 0x%lx",
|
|
1261 |
(uintptr_t)eprom_kernelbase, (uintptr_t)kernelbase);
|
|
1262 |
#endif
|
|
1263 |
|
|
1264 |
#ifdef KERNELBASE_ABI_MIN
|
|
1265 |
if (kernelbase < (uintptr_t)KERNELBASE_ABI_MIN) {
|
|
1266 |
cmn_err(CE_NOTE, "!kernelbase set to 0x%lx, system is not "
|
|
1267 |
"i386 ABI compliant.", (uintptr_t)kernelbase);
|
|
1268 |
}
|
|
1269 |
#endif
|
|
1270 |
|
|
1271 |
PRM_POINT("startup_memlist() done");
|
|
1272 |
}
|
|
1273 |
|
|
1274 |
static void
|
|
1275 |
startup_modules(void)
|
|
1276 |
{
|
|
1277 |
unsigned int i;
|
|
1278 |
extern void impl_setup_ddi(void);
|
|
1279 |
extern void prom_setup(void);
|
|
1280 |
|
|
1281 |
PRM_POINT("startup_modules() starting...");
|
|
1282 |
/*
|
|
1283 |
* Initialize ten-micro second timer so that drivers will
|
|
1284 |
* not get short changed in their init phase. This was
|
|
1285 |
* not getting called until clkinit which, on fast cpu's
|
|
1286 |
* caused the drv_usecwait to be way too short.
|
|
1287 |
*/
|
|
1288 |
microfind();
|
|
1289 |
|
|
1290 |
/*
|
|
1291 |
* Read the GMT lag from /etc/rtc_config.
|
|
1292 |
*/
|
|
1293 |
gmt_lag = process_rtc_config_file();
|
|
1294 |
|
|
1295 |
/*
|
|
1296 |
* Calculate default settings of system parameters based upon
|
|
1297 |
* maxusers, yet allow to be overridden via the /etc/system file.
|
|
1298 |
*/
|
|
1299 |
param_calc(0);
|
|
1300 |
|
|
1301 |
mod_setup();
|
|
1302 |
|
|
1303 |
/*
|
|
1304 |
* Setup machine check architecture on P6
|
|
1305 |
*/
|
|
1306 |
setup_mca();
|
|
1307 |
|
|
1308 |
/*
|
|
1309 |
* Initialize system parameters.
|
|
1310 |
*/
|
|
1311 |
param_init();
|
|
1312 |
|
|
1313 |
/*
|
|
1314 |
* maxmem is the amount of physical memory we're playing with.
|
|
1315 |
*/
|
|
1316 |
maxmem = physmem;
|
|
1317 |
|
|
1318 |
/*
|
|
1319 |
* Initialize the hat layer.
|
|
1320 |
*/
|
|
1321 |
hat_init();
|
|
1322 |
|
|
1323 |
/*
|
|
1324 |
* Initialize segment management stuff.
|
|
1325 |
*/
|
|
1326 |
seg_init();
|
|
1327 |
|
|
1328 |
if (modload("fs", "specfs") == -1)
|
|
1329 |
halt("Can't load specfs");
|
|
1330 |
|
|
1331 |
if (modload("fs", "devfs") == -1)
|
|
1332 |
halt("Can't load devfs");
|
|
1333 |
|
|
1334 |
dispinit();
|
|
1335 |
|
|
1336 |
/*
|
|
1337 |
* This is needed here to initialize hw_serial[] for cluster booting.
|
|
1338 |
*/
|
|
1339 |
if ((i = modload("misc", "sysinit")) != (unsigned int)-1)
|
|
1340 |
(void) modunload(i);
|
|
1341 |
else
|
|
1342 |
cmn_err(CE_CONT, "sysinit load failed");
|
|
1343 |
|
|
1344 |
/* Read cluster configuration data. */
|
|
1345 |
clconf_init();
|
|
1346 |
|
|
1347 |
/*
|
|
1348 |
* Create a kernel device tree. First, create rootnex and
|
|
1349 |
* then invoke bus specific code to probe devices.
|
|
1350 |
*/
|
|
1351 |
setup_ddi();
|
|
1352 |
impl_setup_ddi();
|
|
1353 |
/*
|
|
1354 |
* Fake a prom tree such that /dev/openprom continues to work
|
|
1355 |
*/
|
|
1356 |
prom_setup();
|
|
1357 |
|
|
1358 |
/*
|
|
1359 |
* Load all platform specific modules
|
|
1360 |
*/
|
|
1361 |
psm_modload();
|
|
1362 |
|
|
1363 |
PRM_POINT("startup_modules() done");
|
|
1364 |
}
|
|
1365 |
|
|
1366 |
static void
|
|
1367 |
startup_bop_gone(void)
|
|
1368 |
{
|
|
1369 |
PRM_POINT("startup_bop_gone() starting...");
|
|
1370 |
|
|
1371 |
/*
|
|
1372 |
* Do final allocations of HAT data structures that need to
|
|
1373 |
* be allocated before quiescing the boot loader.
|
|
1374 |
*/
|
|
1375 |
PRM_POINT("Calling hat_kern_alloc()...");
|
|
1376 |
hat_kern_alloc();
|
|
1377 |
PRM_POINT("hat_kern_alloc() done");
|
|
1378 |
|
|
1379 |
/*
|
|
1380 |
* Setup MTRR (Memory type range registers)
|
|
1381 |
*/
|
|
1382 |
setup_mtrr();
|
|
1383 |
PRM_POINT("startup_bop_gone() done");
|
|
1384 |
}
|
|
1385 |
|
|
1386 |
/*
|
|
1387 |
* Walk through the pagetables looking for pages mapped in by boot. If the
|
|
1388 |
* setaside flag is set the pages are expected to be returned to the
|
|
1389 |
* kernel later in boot, so we add them to the bootpages list.
|
|
1390 |
*/
|
|
1391 |
static void
|
|
1392 |
protect_boot_range(uintptr_t low, uintptr_t high, int setaside)
|
|
1393 |
{
|
|
1394 |
uintptr_t va = low;
|
|
1395 |
size_t len;
|
|
1396 |
uint_t prot;
|
|
1397 |
pfn_t pfn;
|
|
1398 |
page_t *pp;
|
|
1399 |
pgcnt_t boot_protect_cnt = 0;
|
|
1400 |
|
|
1401 |
while (hat_boot_probe(&va, &len, &pfn, &prot) != 0 && va < high) {
|
|
1402 |
if (va + len >= high)
|
|
1403 |
panic("0x%lx byte mapping at 0x%p exceeds boot's "
|
|
1404 |
"legal range.", len, (void *)va);
|
|
1405 |
|
|
1406 |
while (len > 0) {
|
|
1407 |
pp = page_numtopp_alloc(pfn);
|
|
1408 |
if (pp != NULL) {
|
|
1409 |
if (setaside == 0)
|
|
1410 |
panic("Unexpected mapping by boot. "
|
|
1411 |
"addr=%p pfn=%lx\n",
|
|
1412 |
(void *)va, pfn);
|
|
1413 |
|
|
1414 |
pp->p_next = bootpages;
|
|
1415 |
bootpages = pp;
|
|
1416 |
++boot_protect_cnt;
|
|
1417 |
}
|
|
1418 |
|
|
1419 |
++pfn;
|
|
1420 |
len -= MMU_PAGESIZE;
|
|
1421 |
va += MMU_PAGESIZE;
|
|
1422 |
}
|
|
1423 |
}
|
|
1424 |
PRM_DEBUG(boot_protect_cnt);
|
|
1425 |
}
|
|
1426 |
|
|
1427 |
static void
|
|
1428 |
startup_vm(void)
|
|
1429 |
{
|
|
1430 |
struct segmap_crargs a;
|
|
1431 |
extern void hat_kern_setup(void);
|
|
1432 |
pgcnt_t pages_left;
|
|
1433 |
|
|
1434 |
PRM_POINT("startup_vm() starting...");
|
|
1435 |
|
|
1436 |
/*
|
|
1437 |
* The next two loops are done in distinct steps in order
|
|
1438 |
* to be sure that any page that is doubly mapped (both above
|
|
1439 |
* KERNEL_TEXT and below kernelbase) is dealt with correctly.
|
|
1440 |
* Note this may never happen, but it might someday.
|
|
1441 |
*/
|
|
1442 |
|
|
1443 |
bootpages = NULL;
|
|
1444 |
PRM_POINT("Protecting boot pages");
|
|
1445 |
/*
|
|
1446 |
* Protect any pages mapped above KERNEL_TEXT that somehow have
|
|
1447 |
* page_t's. This can only happen if something weird allocated
|
|
1448 |
* in this range (like kadb/kmdb).
|
|
1449 |
*/
|
|
1450 |
protect_boot_range(KERNEL_TEXT, (uintptr_t)-1, 0);
|
|
1451 |
|
|
1452 |
/*
|
|
1453 |
* Before we can take over memory allocation/mapping from the boot
|
|
1454 |
* loader we must remove from our free page lists any boot pages that
|
|
1455 |
* will stay mapped until release_bootstrap().
|
|
1456 |
*/
|
|
1457 |
protect_boot_range(0, kernelbase, 1);
|
|
1458 |
#if defined(__amd64)
|
|
1459 |
protect_boot_range(BOOT_DOUBLEMAP_BASE,
|
|
1460 |
BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE, 0);
|
|
1461 |
#endif
|
|
1462 |
|
|
1463 |
/*
|
|
1464 |
* Copy in boot's page tables, set up extra page tables for the kernel,
|
|
1465 |
* and switch to the kernel's context.
|
|
1466 |
*/
|
|
1467 |
PRM_POINT("Calling hat_kern_setup()...");
|
|
1468 |
hat_kern_setup();
|
|
1469 |
|
|
1470 |
/*
|
|
1471 |
* It is no longer safe to call BOP_ALLOC(), so make sure we don't.
|
|
1472 |
*/
|
|
1473 |
bootops->bsys_alloc = NULL;
|
|
1474 |
PRM_POINT("hat_kern_setup() done");
|
|
1475 |
|
|
1476 |
hat_cpu_online(CPU);
|
|
1477 |
|
|
1478 |
/*
|
|
1479 |
* Before we call kvm_init(), we need to establish the final size
|
|
1480 |
* of the kernel's heap. So, we need to figure out how much space
|
|
1481 |
* to set aside for segkp, segkpm, and segmap.
|
|
1482 |
*/
|
|
1483 |
final_kernelheap = (caddr_t)ROUND_UP_LPAGE(kernelbase);
|
|
1484 |
#if defined(__amd64)
|
|
1485 |
if (kpm_desired) {
|
|
1486 |
/*
|
|
1487 |
* Segkpm appears at the bottom of the kernel's address
|
|
1488 |
* range. To detect accidental overruns of the user
|
|
1489 |
* address space, we leave a "red zone" of unmapped memory
|
|
1490 |
* between kernelbase and the beginning of segkpm.
|
|
1491 |
*/
|
|
1492 |
kpm_vbase = final_kernelheap + KERNEL_REDZONE_SIZE;
|
|
1493 |
kpm_size = mmu_ptob(physmax);
|
|
1494 |
PRM_DEBUG(kpm_vbase);
|
|
1495 |
PRM_DEBUG(kpm_size);
|
|
1496 |
final_kernelheap =
|
|
1497 |
(caddr_t)ROUND_UP_TOPLEVEL(kpm_vbase + kpm_size);
|
|
1498 |
}
|
|
1499 |
|
|
1500 |
if (!segkp_fromheap) {
|
|
1501 |
size_t sz = mmu_ptob(segkpsize);
|
|
1502 |
|
|
1503 |
/*
|
|
1504 |
* determine size of segkp and adjust the bottom of the
|
|
1505 |
* kernel's heap.
|
|
1506 |
*/
|
|
1507 |
if (sz < SEGKPMINSIZE || sz > SEGKPMAXSIZE) {
|
|
1508 |
sz = SEGKPDEFSIZE;
|
|
1509 |
cmn_err(CE_WARN, "!Illegal value for segkpsize. "
|
|
1510 |
"segkpsize has been reset to %ld pages",
|
|
1511 |
mmu_btop(sz));
|
|
1512 |
}
|
|
1513 |
sz = MIN(sz, MAX(SEGKPMINSIZE, mmu_ptob(physmem)));
|
|
1514 |
|
|
1515 |
segkpsize = mmu_btop(ROUND_UP_LPAGE(sz));
|
|
1516 |
segkp_base = final_kernelheap;
|
|
1517 |
PRM_DEBUG(segkpsize);
|
|
1518 |
PRM_DEBUG(segkp_base);
|
|
1519 |
final_kernelheap = segkp_base + mmu_ptob(segkpsize);
|
|
1520 |
PRM_DEBUG(final_kernelheap);
|
|
1521 |
}
|
|
1522 |
|
|
1523 |
/*
|
|
1524 |
* put the range of VA for device mappings next
|
|
1525 |
*/
|
|
1526 |
toxic_addr = (uintptr_t)final_kernelheap;
|
|
1527 |
PRM_DEBUG(toxic_addr);
|
|
1528 |
final_kernelheap = (char *)toxic_addr + toxic_size;
|
|
1529 |
#endif
|
|
1530 |
PRM_DEBUG(final_kernelheap);
|
|
1531 |
ASSERT(final_kernelheap < boot_kernelheap);
|
|
1532 |
|
|
1533 |
/*
|
|
1534 |
* Users can change segmapsize through eeprom or /etc/system.
|
|
1535 |
* If the variable is tuned through eeprom, there is no upper
|
|
1536 |
* bound on the size of segmap. If it is tuned through
|
|
1537 |
* /etc/system on 32-bit systems, it must be no larger than we
|
|
1538 |
* planned for in startup_memlist().
|
|
1539 |
*/
|
|
1540 |
segmapsize = MAX(ROUND_UP_LPAGE(segmapsize), SEGMAPDEFAULT);
|
|
1541 |
segkmap_start = ROUND_UP_LPAGE((uintptr_t)final_kernelheap);
|
|
1542 |
|
|
1543 |
#if defined(__i386)
|
|
1544 |
if (segmapsize > segmap_reserved) {
|
|
1545 |
cmn_err(CE_NOTE, "!segmapsize may not be set > 0x%lx in "
|
|
1546 |
"/etc/system. Use eeprom.", (long)SEGMAPMAX);
|
|
1547 |
segmapsize = segmap_reserved;
|
|
1548 |
}
|
|
1549 |
/*
|
|
1550 |
* 32-bit systems don't have segkpm or segkp, so segmap appears at
|
|
1551 |
* the bottom of the kernel's address range. Set aside space for a
|
|
1552 |
* red zone just below the start of segmap.
|
|
1553 |
*/
|
|
1554 |
segkmap_start += KERNEL_REDZONE_SIZE;
|
|
1555 |
segmapsize -= KERNEL_REDZONE_SIZE;
|
|
1556 |
#endif
|
|
1557 |
final_kernelheap = (char *)(segkmap_start + segmapsize);
|
|
1558 |
|
|
1559 |
PRM_DEBUG(segkmap_start);
|
|
1560 |
PRM_DEBUG(segmapsize);
|
|
1561 |
PRM_DEBUG(final_kernelheap);
|
|
1562 |
|
|
1563 |
/*
|
|
1564 |
* Initialize VM system
|
|
1565 |
*/
|
|
1566 |
PRM_POINT("Calling kvm_init()...");
|
|
1567 |
kvm_init();
|
|
1568 |
PRM_POINT("kvm_init() done");
|
|
1569 |
|
|
1570 |
/*
|
|
1571 |
* Tell kmdb that the VM system is now working
|
|
1572 |
*/
|
|
1573 |
if (boothowto & RB_DEBUG)
|
|
1574 |
kdi_dvec_vmready();
|
|
1575 |
|
|
1576 |
/*
|
|
1577 |
* Mangle the brand string etc.
|
|
1578 |
*/
|
|
1579 |
cpuid_pass3(CPU);
|
|
1580 |
|
|
1581 |
PRM_DEBUG(final_kernelheap);
|
|
1582 |
|
|
1583 |
/*
|
|
1584 |
* Now that we can use memory outside the top 4GB (on 64-bit
|
|
1585 |
* systems) and we know the size of segmap, we can set the final
|
|
1586 |
* size of the kernel's heap. Note: on 64-bit systems we still
|
|
1587 |
* can't touch anything in the bottom half of the top 4GB range
|
|
1588 |
* because boot still has pages mapped there.
|
|
1589 |
*/
|
|
1590 |
if (final_kernelheap < boot_kernelheap) {
|
|
1591 |
kernelheap_extend(final_kernelheap, boot_kernelheap);
|
|
1592 |
#if defined(__amd64)
|
|
1593 |
kmem_setaside = vmem_xalloc(heap_arena, BOOT_DOUBLEMAP_SIZE,
|
|
1594 |
MMU_PAGESIZE, 0, 0, (void *)(BOOT_DOUBLEMAP_BASE),
|
|
1595 |
(void *)(BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE),
|
|
1596 |
VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
|
|
1597 |
PRM_DEBUG(kmem_setaside);
|
|
1598 |
if (kmem_setaside == NULL)
|
|
1599 |
panic("Could not protect boot's memory");
|
|
1600 |
#endif
|
|
1601 |
}
|
|
1602 |
/*
|
|
1603 |
* Now that the kernel heap may have grown significantly, we need
|
|
1604 |
* to make all the remaining page_t's available to back that memory.
|
|
1605 |
*
|
|
1606 |
* XX64 this should probably wait till after release boot-strap too.
|
|
1607 |
*/
|
|
1608 |
pages_left = npages - boot_npages;
|
|
1609 |
if (pages_left > 0) {
|
|
1610 |
PRM_DEBUG(pages_left);
|
|
1611 |
(void) kphysm_init(NULL, memseg_base, boot_npages, pages_left);
|
|
1612 |
}
|
|
1613 |
|
|
1614 |
#if defined(__amd64)
|
|
1615 |
|
|
1616 |
/*
|
|
1617 |
* Create the device arena for toxic (to dtrace/kmdb) mappings.
|
|
1618 |
*/
|
|
1619 |
device_arena = vmem_create("device", (void *)toxic_addr,
|
|
1620 |
toxic_size, MMU_PAGESIZE, NULL, NULL, NULL, 0, VM_SLEEP);
|
|
1621 |
|
|
1622 |
#else /* __i386 */
|
|
1623 |
|
|
1624 |
/*
|
|
1625 |
* allocate the bit map that tracks toxic pages
|
|
1626 |
*/
|
|
1627 |
toxic_bit_map_len = btop((ulong_t)(ptable_va - kernelbase));
|
|
1628 |
PRM_DEBUG(toxic_bit_map_len);
|
|
1629 |
toxic_bit_map =
|
|
1630 |
kmem_zalloc(BT_SIZEOFMAP(toxic_bit_map_len), KM_NOSLEEP);
|
|
1631 |
ASSERT(toxic_bit_map != NULL);
|
|
1632 |
PRM_DEBUG(toxic_bit_map);
|
|
1633 |
|
|
1634 |
#endif /* __i386 */
|
|
1635 |
|
|
1636 |
|
|
1637 |
/*
|
|
1638 |
* Now that we've got more VA, as well as the ability to allocate from
|
|
1639 |
* it, tell the debugger.
|
|
1640 |
*/
|
|
1641 |
if (boothowto & RB_DEBUG)
|
|
1642 |
kdi_dvec_memavail();
|
|
1643 |
|
|
1644 |
/*
|
|
1645 |
* The following code installs a special page fault handler (#pf)
|
|
1646 |
* to work around a pentium bug.
|
|
1647 |
*/
|
|
1648 |
#if !defined(__amd64)
|
|
1649 |
if (x86_type == X86_TYPE_P5) {
|
|
1650 |
gate_desc_t *newidt;
|
|
1651 |
desctbr_t newidt_r;
|
|
1652 |
|
|
1653 |
if ((newidt = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP)) == NULL)
|
|
1654 |
panic("failed to install pentium_pftrap");
|
|
1655 |
|
|
1656 |
bcopy(idt0, newidt, sizeof (idt0));
|
|
1657 |
set_gatesegd(&newidt[T_PGFLT], &pentium_pftrap,
|
|
1658 |
KCS_SEL, 0, SDT_SYSIGT, SEL_KPL);
|
|
1659 |
|
|
1660 |
(void) as_setprot(&kas, (caddr_t)newidt, MMU_PAGESIZE,
|
|
1661 |
PROT_READ|PROT_EXEC);
|
|
1662 |
|
|
1663 |
newidt_r.dtr_limit = sizeof (idt0) - 1;
|
|
1664 |
newidt_r.dtr_base = (uintptr_t)newidt;
|
|
1665 |
CPU->cpu_idt = newidt;
|
|
1666 |
wr_idtr(&newidt_r);
|
|
1667 |
}
|
|
1668 |
#endif /* !__amd64 */
|
|
1669 |
|
|
1670 |
/*
|
|
1671 |
* Map page pfn=0 for drivers, such as kd, that need to pick up
|
|
1672 |
* parameters left there by controllers/BIOS.
|
|
1673 |
*/
|
|
1674 |
PRM_POINT("setup up p0_va");
|
|
1675 |
p0_va = i86devmap(0, 1, PROT_READ);
|
|
1676 |
PRM_DEBUG(p0_va);
|
|
1677 |
|
|
1678 |
/*
|
|
1679 |
* If the following is true, someone has patched phsymem to be less
|
|
1680 |
* than the number of pages that the system actually has. Remove
|
|
1681 |
* pages until system memory is limited to the requested amount.
|
|
1682 |
* Since we have allocated page structures for all pages, we
|
|
1683 |
* correct the amount of memory we want to remove by the size of
|
|
1684 |
* the memory used to hold page structures for the non-used pages.
|
|
1685 |
*/
|
|
1686 |
if (physmem < npages) {
|
|
1687 |
uint_t diff;
|
|
1688 |
offset_t off;
|
|
1689 |
struct page *pp;
|
|
1690 |
caddr_t rand_vaddr;
|
|
1691 |
struct seg kseg;
|
|
1692 |
|
|
1693 |
cmn_err(CE_WARN, "limiting physmem to %lu pages", physmem);
|
|
1694 |
|
|
1695 |
off = 0;
|
|
1696 |
diff = npages - physmem;
|
|
1697 |
diff -= mmu_btopr(diff * sizeof (struct page));
|
|
1698 |
kseg.s_as = &kas;
|
|
1699 |
while (diff--) {
|
|
1700 |
rand_vaddr = (caddr_t)
|
|
1701 |
(((uintptr_t)&unused_pages_vp >> 7) ^
|
|
1702 |
(uintptr_t)((u_offset_t)off >> MMU_PAGESHIFT));
|
|
1703 |
pp = page_create_va(&unused_pages_vp, off, MMU_PAGESIZE,
|
|
1704 |
PG_WAIT | PG_EXCL, &kseg, rand_vaddr);
|
|
1705 |
if (pp == NULL) {
|
|
1706 |
panic("limited physmem too much!");
|
|
1707 |
/*NOTREACHED*/
|
|
1708 |
}
|
|
1709 |
page_io_unlock(pp);
|
|
1710 |
page_downgrade(pp);
|
|
1711 |
availrmem--;
|
|
1712 |
off += MMU_PAGESIZE;
|
|
1713 |
}
|
|
1714 |
}
|
|
1715 |
|
|
1716 |
cmn_err(CE_CONT, "?mem = %luK (0x%lx)\n",
|
|
1717 |
physinstalled << (MMU_PAGESHIFT - 10), ptob(physinstalled));
|
|
1718 |
|
|
1719 |
PRM_POINT("Calling hat_init_finish()...");
|
|
1720 |
hat_init_finish();
|
|
1721 |
PRM_POINT("hat_init_finish() done");
|
|
1722 |
|
|
1723 |
/*
|
|
1724 |
* Initialize the segkp segment type.
|
|
1725 |
*/
|
|
1726 |
rw_enter(&kas.a_lock, RW_WRITER);
|
|
1727 |
if (!segkp_fromheap) {
|
|
1728 |
if (seg_attach(&kas, (caddr_t)segkp_base, mmu_ptob(segkpsize),
|
|
1729 |
segkp) < 0) {
|
|
1730 |
panic("startup: cannot attach segkp");
|
|
1731 |
/*NOTREACHED*/
|
|
1732 |
}
|
|
1733 |
} else {
|
|
1734 |
/*
|
|
1735 |
* For 32 bit x86 systems, we will have segkp under the heap.
|
|
1736 |
* There will not be a segkp segment. We do, however, need
|
|
1737 |
* to fill in the seg structure.
|
|
1738 |
*/
|
|
1739 |
segkp->s_as = &kas;
|
|
1740 |
}
|
|
1741 |
if (segkp_create(segkp) != 0) {
|
|
1742 |
panic("startup: segkp_create failed");
|
|
1743 |
/*NOTREACHED*/
|
|
1744 |
}
|
|
1745 |
PRM_DEBUG(segkp);
|
|
1746 |
rw_exit(&kas.a_lock);
|
|
1747 |
|
|
1748 |
/*
|
|
1749 |
* kpm segment
|
|
1750 |
*/
|
|
1751 |
segmap_kpm = 0;
|
|
1752 |
if (kpm_desired) {
|
|
1753 |
kpm_init();
|
|
1754 |
kpm_enable = 1;
|
|
1755 |
}
|
|
1756 |
|
|
1757 |
/*
|
|
1758 |
* Now create segmap segment.
|
|
1759 |
*/
|
|
1760 |
rw_enter(&kas.a_lock, RW_WRITER);
|
|
1761 |
if (seg_attach(&kas, (caddr_t)segkmap_start, segmapsize, segkmap) < 0) {
|
|
1762 |
panic("cannot attach segkmap");
|
|
1763 |
/*NOTREACHED*/
|
|
1764 |
}
|
|
1765 |
PRM_DEBUG(segkmap);
|
|
1766 |
|
|
1767 |
/*
|
|
1768 |
* The 64 bit HAT permanently maps only segmap's page tables.
|
|
1769 |
* The 32 bit HAT maps the heap's page tables too.
|
|
1770 |
*/
|
|
1771 |
#if defined(__amd64)
|
|
1772 |
hat_kmap_init(segkmap_start, segmapsize);
|
|
1773 |
#else /* __i386 */
|
|
1774 |
ASSERT(segkmap_start + segmapsize == (uintptr_t)final_kernelheap);
|
|
1775 |
hat_kmap_init(segkmap_start, (uintptr_t)ekernelheap - segkmap_start);
|
|
1776 |
#endif /* __i386 */
|
|
1777 |
|
|
1778 |
a.prot = PROT_READ | PROT_WRITE;
|
|
1779 |
a.shmsize = 0;
|
|
1780 |
a.nfreelist = segmapfreelists;
|
|
1781 |
|
|
1782 |
if (segmap_create(segkmap, (caddr_t)&a) != 0)
|
|
1783 |
panic("segmap_create segkmap");
|
|
1784 |
rw_exit(&kas.a_lock);
|
|
1785 |
|
|
1786 |
setup_vaddr_for_ppcopy(CPU);
|
|
1787 |
|
|
1788 |
segdev_init();
|
|
1789 |
pmem_init();
|
|
1790 |
PRM_POINT("startup_vm() done");
|
|
1791 |
}
|
|
1792 |
|
|
1793 |
static void
|
|
1794 |
startup_end(void)
|
|
1795 |
{
|
|
1796 |
extern void setx86isalist(void);
|
|
1797 |
|
|
1798 |
PRM_POINT("startup_end() starting...");
|
|
1799 |
|
|
1800 |
/*
|
|
1801 |
* Perform tasks that get done after most of the VM
|
|
1802 |
* initialization has been done but before the clock
|
|
1803 |
* and other devices get started.
|
|
1804 |
*/
|
|
1805 |
kern_setup1();
|
|
1806 |
|
|
1807 |
/*
|
|
1808 |
* Perform CPC initialization for this CPU.
|
|
1809 |
*/
|
|
1810 |
kcpc_hw_init(CPU);
|
|
1811 |
|
|
1812 |
#if defined(__amd64)
|
|
1813 |
/*
|
|
1814 |
* Validate support for syscall/sysret
|
|
1815 |
* XX64 -- include SSE, SSE2, etc. here too?
|
|
1816 |
*/
|
|
1817 |
if ((x86_feature & X86_ASYSC) == 0) {
|
|
1818 |
cmn_err(CE_WARN,
|
|
1819 |
"cpu%d does not support syscall/sysret", CPU->cpu_id);
|
|
1820 |
}
|
|
1821 |
#endif
|
|
1822 |
/*
|
|
1823 |
* Configure the system.
|
|
1824 |
*/
|
|
1825 |
PRM_POINT("Calling configure()...");
|
|
1826 |
configure(); /* set up devices */
|
|
1827 |
PRM_POINT("configure() done");
|
|
1828 |
|
|
1829 |
/*
|
|
1830 |
* Set the isa_list string to the defined instruction sets we
|
|
1831 |
* support.
|
|
1832 |
*/
|
|
1833 |
setx86isalist();
|
|
1834 |
init_intr_threads(CPU);
|
|
1835 |
psm_install();
|
|
1836 |
|
|
1837 |
/*
|
|
1838 |
* We're done with bootops. We don't unmap the bootstrap yet because
|
|
1839 |
* we're still using bootsvcs.
|
|
1840 |
*/
|
|
1841 |
PRM_POINT("zeroing out bootops");
|
|
1842 |
*bootopsp = (struct bootops *)0;
|
|
1843 |
bootops = (struct bootops *)NULL;
|
|
1844 |
|
|
1845 |
PRM_POINT("Enabling interrupts");
|
|
1846 |
(*picinitf)();
|
|
1847 |
sti();
|
|
1848 |
|
|
1849 |
(void) add_avsoftintr((void *)&softlevel1_hdl, 1, softlevel1,
|
|
1850 |
"softlevel1", NULL, NULL); /* XXX to be moved later */
|
|
1851 |
|
|
1852 |
PRM_POINT("startup_end() done");
|
|
1853 |
}
|
|
1854 |
|
|
1855 |
extern char hw_serial[];
|
|
1856 |
char *_hs1107 = hw_serial;
|
|
1857 |
ulong_t _bdhs34;
|
|
1858 |
|
|
1859 |
void
|
|
1860 |
post_startup(void)
|
|
1861 |
{
|
|
1862 |
extern void memscrub_init(void);
|
|
1863 |
|
|
1864 |
/*
|
|
1865 |
* Set the system wide, processor-specific flags to be passed
|
|
1866 |
* to userland via the aux vector for performance hints and
|
|
1867 |
* instruction set extensions.
|
|
1868 |
*/
|
|
1869 |
bind_hwcap();
|
|
1870 |
|
|
1871 |
/*
|
|
1872 |
* Startup memory scrubber.
|
|
1873 |
*/
|
|
1874 |
(void) memscrub_init();
|
|
1875 |
|
|
1876 |
/*
|
|
1877 |
* Perform forceloading tasks for /etc/system.
|
|
1878 |
*/
|
|
1879 |
(void) mod_sysctl(SYS_FORCELOAD, NULL);
|
|
1880 |
|
|
1881 |
/*
|
|
1882 |
* complete mmu initialization, now that kernel and critical
|
|
1883 |
* modules have been loaded.
|
|
1884 |
*/
|
|
1885 |
(void) post_startup_mmu_initialization();
|
|
1886 |
|
|
1887 |
/*
|
|
1888 |
* ON4.0: Force /proc module in until clock interrupt handle fixed
|
|
1889 |
* ON4.0: This must be fixed or restated in /etc/systems.
|
|
1890 |
*/
|
|
1891 |
(void) modload("fs", "procfs");
|
|
1892 |
|
|
1893 |
#if defined(__i386)
|
|
1894 |
/*
|
|
1895 |
* Check for required functional Floating Point hardware,
|
|
1896 |
* unless FP hardware explicitly disabled.
|
|
1897 |
*/
|
|
1898 |
if (fpu_exists && (fpu_pentium_fdivbug || fp_kind == FP_NO))
|
|
1899 |
halt("No working FP hardware found");
|
|
1900 |
#endif
|
|
1901 |
|
|
1902 |
maxmem = freemem;
|
|
1903 |
|
|
1904 |
add_cpunode2devtree(CPU->cpu_id, CPU->cpu_m.mcpu_cpi);
|
|
1905 |
|
|
1906 |
/*
|
|
1907 |
* Perform the formal initialization of the boot chip,
|
|
1908 |
* and associate the boot cpu with it.
|
|
1909 |
* This must be done after the cpu node for CPU has been
|
|
1910 |
* added to the device tree, when the necessary probing to
|
|
1911 |
* know the chip type and chip "id" is performed.
|
|
1912 |
*/
|
|
1913 |
chip_cpu_init(CPU);
|
|
1914 |
chip_cpu_assign(CPU);
|
|
1915 |
}
|
|
1916 |
|
|
1917 |
static int
|
|
1918 |
pp_in_ramdisk(page_t *pp)
|
|
1919 |
{
|
|
1920 |
extern uint64_t ramdisk_start, ramdisk_end;
|
|
1921 |
|
|
1922 |
return ((pp->p_pagenum >= btop(ramdisk_start)) &&
|
|
1923 |
(pp->p_pagenum < btopr(ramdisk_end)));
|
|
1924 |
}
|
|
1925 |
|
|
1926 |
void
|
|
1927 |
release_bootstrap(void)
|
|
1928 |
{
|
|
1929 |
int root_is_ramdisk;
|
|
1930 |
pfn_t pfn;
|
|
1931 |
page_t *pp;
|
|
1932 |
extern void kobj_boot_unmountroot(void);
|
|
1933 |
extern dev_t rootdev;
|
|
1934 |
|
|
1935 |
/* unmount boot ramdisk and release kmem usage */
|
|
1936 |
kobj_boot_unmountroot();
|
|
1937 |
|
|
1938 |
/*
|
|
1939 |
* We're finished using the boot loader so free its pages.
|
|
1940 |
*/
|
|
1941 |
PRM_POINT("Unmapping lower boot pages");
|
|
1942 |
clear_boot_mappings(0, kernelbase);
|
|
1943 |
#if defined(__amd64)
|
|
1944 |
PRM_POINT("Unmapping upper boot pages");
|
|
1945 |
clear_boot_mappings(BOOT_DOUBLEMAP_BASE,
|
|
1946 |
BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE);
|
|
1947 |
#endif
|
|
1948 |
|
|
1949 |
/*
|
|
1950 |
* If root isn't on ramdisk, destroy the hardcoded
|
|
1951 |
* ramdisk node now and release the memory. Else,
|
|
1952 |
* ramdisk memory is kept in rd_pages.
|
|
1953 |
*/
|
|
1954 |
root_is_ramdisk = (getmajor(rootdev) == ddi_name_to_major("ramdisk"));
|
|
1955 |
if (!root_is_ramdisk) {
|
|
1956 |
dev_info_t *dip = ddi_find_devinfo("ramdisk", -1, 0);
|
|
1957 |
ASSERT(dip && ddi_get_parent(dip) == ddi_root_node());
|
|
1958 |
ndi_rele_devi(dip); /* held from ddi_find_devinfo */
|
|
1959 |
(void) ddi_remove_child(dip, 0);
|
|
1960 |
}
|
|
1961 |
|
|
1962 |
PRM_POINT("Releasing boot pages");
|
|
1963 |
while (bootpages) {
|
|
1964 |
pp = bootpages;
|
|
1965 |
bootpages = pp->p_next;
|
|
1966 |
if (root_is_ramdisk && pp_in_ramdisk(pp)) {
|
|
1967 |
pp->p_next = rd_pages;
|
|
1968 |
rd_pages = pp;
|
|
1969 |
continue;
|
|
1970 |
}
|
|
1971 |
pp->p_next = (struct page *)0;
|
|
1972 |
page_free(pp, 1);
|
|
1973 |
}
|
|
1974 |
|
|
1975 |
/*
|
|
1976 |
* Find 1 page below 1 MB so that other processors can boot up.
|
|
1977 |
* Make sure it has a kernel VA as well as a 1:1 mapping.
|
|
1978 |
* We should have just free'd one up.
|
|
1979 |
*/
|
|
1980 |
if (use_mp) {
|
|
1981 |
for (pfn = 1; pfn < btop(1*1024*1024); pfn++) {
|
|
1982 |
if (page_numtopp_alloc(pfn) == NULL)
|
|
1983 |
continue;
|
|
1984 |
rm_platter_va = i86devmap(pfn, 1,
|
|
1985 |
PROT_READ | PROT_WRITE | PROT_EXEC);
|
|
1986 |
rm_platter_pa = ptob(pfn);
|
|
1987 |
hat_devload(kas.a_hat,
|
|
1988 |
(caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
|
|
1989 |
pfn, PROT_READ | PROT_WRITE | PROT_EXEC,
|
|
1990 |
HAT_LOAD_NOCONSIST);
|
|
1991 |
break;
|
|
1992 |
}
|
|
1993 |
if (pfn == btop(1*1024*1024))
|
|
1994 |
panic("No page available for starting "
|
|
1995 |
"other processors");
|
|
1996 |
}
|
|
1997 |
|
|
1998 |
#if defined(__amd64)
|
|
1999 |
PRM_POINT("Returning boot's VA space to kernel heap");
|
|
2000 |
if (kmem_setaside != NULL)
|
|
2001 |
vmem_free(heap_arena, kmem_setaside, BOOT_DOUBLEMAP_SIZE);
|
|
2002 |
#endif
|
|
2003 |
}
|
|
2004 |
|
|
2005 |
/*
|
|
2006 |
* Initialize the platform-specific parts of a page_t.
|
|
2007 |
*/
|
|
2008 |
void
|
|
2009 |
add_physmem_cb(page_t *pp, pfn_t pnum)
|
|
2010 |
{
|
|
2011 |
pp->p_pagenum = pnum;
|
|
2012 |
pp->p_mapping = NULL;
|
|
2013 |
pp->p_embed = 0;
|
|
2014 |
pp->p_share = 0;
|
|
2015 |
pp->p_mlentry = 0;
|
|
2016 |
}
|
|
2017 |
|
|
2018 |
/*
|
|
2019 |
* kphysm_init() initializes physical memory.
|
|
2020 |
*/
|
|
2021 |
static pgcnt_t
|
|
2022 |
kphysm_init(
|
|
2023 |
page_t *inpp,
|
|
2024 |
struct memseg *memsegp,
|
|
2025 |
pgcnt_t start,
|
|
2026 |
pgcnt_t npages)
|
|
2027 |
{
|
|
2028 |
struct memlist *pmem;
|
|
2029 |
struct memseg *cur_memseg;
|
|
2030 |
struct memseg **memsegpp;
|
|
2031 |
pfn_t base_pfn;
|
|
2032 |
pgcnt_t num;
|
|
2033 |
pgcnt_t total_skipped = 0;
|
|
2034 |
pgcnt_t skipping = 0;
|
|
2035 |
pgcnt_t pages_done = 0;
|
|
2036 |
pgcnt_t largepgcnt;
|
|
2037 |
uint64_t addr;
|
|
2038 |
uint64_t size;
|
|
2039 |
page_t *pp = inpp;
|
|
2040 |
int dobreak = 0;
|
|
2041 |
extern pfn_t ddiphysmin;
|
|
2042 |
|
|
2043 |
ASSERT(page_hash != NULL && page_hashsz != 0);
|
|
2044 |
|
|
2045 |
for (cur_memseg = memsegp; cur_memseg->pages != NULL; cur_memseg++);
|
|
2046 |
ASSERT(cur_memseg == memsegp || start > 0);
|
|
2047 |
|
|
2048 |
for (pmem = phys_avail; pmem && npages; pmem = pmem->next) {
|
|
2049 |
/*
|
|
2050 |
* In a 32 bit kernel can't use higher memory if we're
|
|
2051 |
* not booting in PAE mode. This check takes care of that.
|
|
2052 |
*/
|
|
2053 |
addr = pmem->address;
|
|
2054 |
size = pmem->size;
|
|
2055 |
if (btop(addr) > physmax)
|
|
2056 |
continue;
|
|
2057 |
|
|
2058 |
/*
|
|
2059 |
* align addr and size - they may not be at page boundaries
|
|
2060 |
*/
|
|
2061 |
if ((addr & MMU_PAGEOFFSET) != 0) {
|
|
2062 |
addr += MMU_PAGEOFFSET;
|
|
2063 |
addr &= ~(uint64_t)MMU_PAGEOFFSET;
|
|
2064 |
size -= addr - pmem->address;
|
|
2065 |
}
|
|
2066 |
|
|
2067 |
/* only process pages below physmax */
|
|
2068 |
if (btop(addr + size) > physmax)
|
|
2069 |
size = ptob(physmax - btop(addr));
|
|
2070 |
|
|
2071 |
num = btop(size);
|
|
2072 |
if (num == 0)
|
|
2073 |
continue;
|
|
2074 |
|
|
2075 |
if (total_skipped < start) {
|
|
2076 |
if (start - total_skipped > num) {
|
|
2077 |
total_skipped += num;
|
|
2078 |
continue;
|
|
2079 |
}
|
|
2080 |
skipping = start - total_skipped;
|
|
2081 |
num -= skipping;
|
|
2082 |
addr += (MMU_PAGESIZE * skipping);
|
|
2083 |
total_skipped = start;
|
|
2084 |
}
|
|
2085 |
if (num == 0)
|
|
2086 |
continue;
|
|
2087 |
|
|
2088 |
if (num > npages)
|
|
2089 |
num = npages;
|
|
2090 |
|
|
2091 |
npages -= num;
|
|
2092 |
pages_done += num;
|
|
2093 |
base_pfn = btop(addr);
|
|
2094 |
|
|
2095 |
/*
|
|
2096 |
* If the caller didn't provide space for the page
|
|
2097 |
* structures, carve them out of the memseg they will
|
|
2098 |
* represent.
|
|
2099 |
*/
|
|
2100 |
if (pp == NULL) {
|
|
2101 |
pgcnt_t pp_pgs;
|
|
2102 |
|
|
2103 |
if (num <= 1)
|
|
2104 |
continue;
|
|
2105 |
|
|
2106 |
/*
|
|
2107 |
* Compute how many of the pages we need to use for
|
|
2108 |
* page_ts
|
|
2109 |
*/
|
|
2110 |
pp_pgs = (num * sizeof (page_t)) / MMU_PAGESIZE + 1;
|
|
2111 |
while (mmu_ptob(pp_pgs - 1) / sizeof (page_t) >=
|
|
2112 |
num - pp_pgs + 1)
|
|
2113 |
--pp_pgs;
|
|
2114 |
PRM_DEBUG(pp_pgs);
|
|
2115 |
|
|
2116 |
pp = vmem_alloc(heap_arena, mmu_ptob(pp_pgs),
|
|
2117 |
VM_NOSLEEP);
|
|
2118 |
if (pp == NULL) {
|
|
2119 |
cmn_err(CE_WARN, "Unable to add %ld pages to "
|
|
2120 |
"the system.", num);
|
|
2121 |
continue;
|
|
2122 |
}
|
|
2123 |
|
|
2124 |
hat_devload(kas.a_hat, (void *)pp, mmu_ptob(pp_pgs),
|
|
2125 |
base_pfn, PROT_READ | PROT_WRITE | HAT_UNORDERED_OK,
|
|
2126 |
HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
|
|
2127 |
bzero(pp, mmu_ptob(pp_pgs));
|
|
2128 |
num -= pp_pgs;
|
|
2129 |
base_pfn += pp_pgs;
|
|
2130 |
}
|
|
2131 |
|
|
2132 |
if (prom_debug)
|
|
2133 |
prom_printf("MEMSEG addr=0x%" PRIx64
|
|
2134 |
" pgs=0x%lx pfn 0x%lx-0x%lx\n",
|
|
2135 |
addr, num, base_pfn, base_pfn + num);
|
|
2136 |
|
|
2137 |
/*
|
|
2138 |
* drop pages below ddiphysmin to simplify ddi memory
|
|
2139 |
* allocation with non-zero addr_lo requests.
|
|
2140 |
*/
|
|
2141 |
if (base_pfn < ddiphysmin) {
|
|
2142 |
if (base_pfn + num <= ddiphysmin) {
|
|
2143 |
/* drop entire range below ddiphysmin */
|
|
2144 |
continue;
|
|
2145 |
}
|
|
2146 |
/* adjust range to ddiphysmin */
|
|
2147 |
pp += (ddiphysmin - base_pfn);
|
|
2148 |
num -= (ddiphysmin - base_pfn);
|
|
2149 |
base_pfn = ddiphysmin;
|
|
2150 |
}
|
|
2151 |
/*
|
|
2152 |
* Build the memsegs entry
|
|
2153 |
*/
|
|
2154 |
cur_memseg->pages = pp;
|
|
2155 |
cur_memseg->epages = pp + num;
|
|
2156 |
cur_memseg->pages_base = base_pfn;
|
|
2157 |
cur_memseg->pages_end = base_pfn + num;
|
|
2158 |
|
|
2159 |
/*
|
|
2160 |
* insert in memseg list in decreasing pfn range order.
|
|
2161 |
* Low memory is typically more fragmented such that this
|
|
2162 |
* ordering keeps the larger ranges at the front of the list
|
|
2163 |
* for code that searches memseg.
|
|
2164 |
*/
|
|
2165 |
memsegpp = &memsegs;
|
|
2166 |
for (;;) {
|
|
2167 |
if (*memsegpp == NULL) {
|
|
2168 |
/* empty memsegs */
|
|
2169 |
memsegs = cur_memseg;
|
|
2170 |
break;
|
|
2171 |
}
|
|
2172 |
/* check for continuity with start of memsegpp */
|
|
2173 |
if (cur_memseg->pages_end == (*memsegpp)->pages_base) {
|
|
2174 |
if (cur_memseg->epages == (*memsegpp)->pages) {
|
|
2175 |
/*
|
|
2176 |
* contiguous pfn and page_t's. Merge
|
|
2177 |
* cur_memseg into *memsegpp. Drop
|
|
2178 |
* cur_memseg
|
|
2179 |
*/
|
|
2180 |
(*memsegpp)->pages_base =
|
|
2181 |
cur_memseg->pages_base;
|
|
2182 |
(*memsegpp)->pages =
|
|
2183 |
cur_memseg->pages;
|
|
2184 |
/*
|
|
2185 |
* check if contiguous with the end of
|
|
2186 |
* the next memseg.
|
|
2187 |
*/
|
|
2188 |
if ((*memsegpp)->next &&
|
|
2189 |
((*memsegpp)->pages_base ==
|
|
2190 |
(*memsegpp)->next->pages_end)) {
|
|
2191 |
cur_memseg = *memsegpp;
|
|
2192 |
memsegpp = &((*memsegpp)->next);
|
|
2193 |
dobreak = 1;
|
|
2194 |
} else {
|
|
2195 |
break;
|
|
2196 |
}
|
|
2197 |
} else {
|
|
2198 |
/*
|
|
2199 |
* contiguous pfn but not page_t's.
|
|
2200 |
* drop last pfn/page_t in cur_memseg
|
|
2201 |
* to prevent creation of large pages
|
|
2202 |
* with noncontiguous page_t's if not
|
|
2203 |
* aligned to largest page boundary.
|
|
2204 |
*/
|
|
2205 |
largepgcnt = page_get_pagecnt(
|
|
2206 |
page_num_pagesizes() - 1);
|
|
2207 |
|
|
2208 |
if (cur_memseg->pages_end &
|
|
2209 |
(largepgcnt - 1)) {
|
|
2210 |
num--;
|
|
2211 |
cur_memseg->epages--;
|
|
2212 |
cur_memseg->pages_end--;
|
|
2213 |
}
|
|
2214 |
}
|
|
2215 |
}
|
|
2216 |
|
|
2217 |
/* check for continuity with end of memsegpp */
|
|
2218 |
if (cur_memseg->pages_base == (*memsegpp)->pages_end) {
|
|
2219 |
if (cur_memseg->pages == (*memsegpp)->epages) {
|
|
2220 |
/*
|
|
2221 |
* contiguous pfn and page_t's. Merge
|
|
2222 |
* cur_memseg into *memsegpp. Drop
|
|
2223 |
* cur_memseg.
|
|
2224 |
*/
|
|
2225 |
if (dobreak) {
|
|
2226 |
/* merge previously done */
|
|
2227 |
cur_memseg->pages =
|
|
2228 |
(*memsegpp)->pages;
|
|
2229 |
cur_memseg->pages_base =
|
|
2230 |
(*memsegpp)->pages_base;
|
|
2231 |
cur_memseg->next =
|
|
2232 |
(*memsegpp)->next;
|
|
2233 |
} else {
|
|
2234 |
(*memsegpp)->pages_end =
|
|
2235 |
cur_memseg->pages_end;
|
|
2236 |
(*memsegpp)->epages =
|
|
2237 |
cur_memseg->epages;
|
|
2238 |
}
|
|
2239 |
break;
|
|
2240 |
}
|
|
2241 |
/*
|
|
2242 |
* contiguous pfn but not page_t's.
|
|
2243 |
* drop first pfn/page_t in cur_memseg
|
|
2244 |
* to prevent creation of large pages
|
|
2245 |
* with noncontiguous page_t's if not
|
|
2246 |
* aligned to largest page boundary.
|
|
2247 |
*/
|
|
2248 |
largepgcnt = page_get_pagecnt(
|
|
2249 |
page_num_pagesizes() - 1);
|
|
2250 |
if (base_pfn & (largepgcnt - 1)) {
|
|
2251 |
num--;
|
|
2252 |
base_pfn++;
|
|
2253 |
cur_memseg->pages++;
|
|
2254 |
cur_memseg->pages_base++;
|
|
2255 |
pp = cur_memseg->pages;
|
|
2256 |
}
|
|
2257 |
if (dobreak)
|
|
2258 |
break;
|
|
2259 |
}
|
|
2260 |
|
|
2261 |
if (cur_memseg->pages_base >=
|
|
2262 |
(*memsegpp)->pages_end) {
|
|
2263 |
cur_memseg->next = *memsegpp;
|
|
2264 |
*memsegpp = cur_memseg;
|
|
2265 |
break;
|
|
2266 |
}
|
|
2267 |
if ((*memsegpp)->next == NULL) {
|
|
2268 |
cur_memseg->next = NULL;
|
|
2269 |
(*memsegpp)->next = cur_memseg;
|
|
2270 |
break;
|
|
2271 |
}
|
|
2272 |
memsegpp = &((*memsegpp)->next);
|
|
2273 |
ASSERT(*memsegpp != NULL);
|
|
2274 |
}
|
|
2275 |
|
|
2276 |
/*
|
|
2277 |
* add_physmem() initializes the PSM part of the page
|
|
2278 |
* struct by calling the PSM back with add_physmem_cb().
|
|
2279 |
* In addition it coalesces pages into larger pages as
|
|
2280 |
* it initializes them.
|
|
2281 |
*/
|
|
2282 |
add_physmem(pp, num, base_pfn);
|
|
2283 |
cur_memseg++;
|
|
2284 |
availrmem_initial += num;
|
|
2285 |
availrmem += num;
|
|
2286 |
|
|
2287 |
/*
|
|
2288 |
* If the caller provided the page frames to us, then
|
|
2289 |
* advance in that list. Otherwise, prepare to allocate
|
|
2290 |
* our own page frames for the next memseg.
|
|
2291 |
*/
|
|
2292 |
pp = (inpp == NULL) ? NULL : pp + num;
|
|
2293 |
}
|
|
2294 |
|
|
2295 |
PRM_DEBUG(availrmem_initial);
|
|
2296 |
PRM_DEBUG(availrmem);
|
|
2297 |
PRM_DEBUG(freemem);
|
|
2298 |
build_pfn_hash();
|
|
2299 |
return (pages_done);
|
|
2300 |
}
|
|
2301 |
|
|
2302 |
/*
|
|
2303 |
* Kernel VM initialization.
|
|
2304 |
*/
|
|
2305 |
static void
|
|
2306 |
kvm_init(void)
|
|
2307 |
{
|
|
2308 |
#ifdef DEBUG
|
|
2309 |
extern void _start();
|
|
2310 |
|
|
2311 |
ASSERT((caddr_t)_start == s_text);
|
|
2312 |
#endif
|
|
2313 |
ASSERT((((uintptr_t)s_text) & MMU_PAGEOFFSET) == 0);
|
|
2314 |
|
|
2315 |
/*
|
|
2316 |
* Put the kernel segments in kernel address space.
|
|
2317 |
*/
|
|
2318 |
rw_enter(&kas.a_lock, RW_WRITER);
|
|
2319 |
as_avlinit(&kas);
|
|
2320 |
|
|
2321 |
(void) seg_attach(&kas, s_text, e_moddata - s_text, &ktextseg);
|
|
2322 |
(void) segkmem_create(&ktextseg);
|
|
2323 |
|
|
2324 |
(void) seg_attach(&kas, (caddr_t)valloc_base, valloc_sz, &kvalloc);
|
|
2325 |
(void) segkmem_create(&kvalloc);
|
|
2326 |
|
|
2327 |
/*
|
|
2328 |
* We're about to map out /boot. This is the beginning of the
|
|
2329 |
* system resource management transition. We can no longer
|
|
2330 |
* call into /boot for I/O or memory allocations.
|
|
2331 |
*
|
|
2332 |
* XX64 - Is this still correct with kernelheap_extend() being called
|
|
2333 |
* later than this????
|
|
2334 |
*/
|
|
2335 |
(void) seg_attach(&kas, final_kernelheap,
|
|
2336 |
ekernelheap - final_kernelheap, &kvseg);
|
|
2337 |
(void) segkmem_create(&kvseg);
|
|
2338 |
|
|
2339 |
#if defined(__amd64)
|
|
2340 |
(void) seg_attach(&kas, (caddr_t)core_base, core_size, &kvseg_core);
|
|
2341 |
(void) segkmem_create(&kvseg_core);
|
|
2342 |
#endif
|
|
2343 |
|
|
2344 |
(void) seg_attach(&kas, (caddr_t)SEGDEBUGBASE, (size_t)SEGDEBUGSIZE,
|
|
2345 |
&kdebugseg);
|
|
2346 |
(void) segkmem_create(&kdebugseg);
|
|
2347 |
|
|
2348 |
rw_exit(&kas.a_lock);
|
|
2349 |
|
|
2350 |
/*
|
|
2351 |
* Ensure that the red zone at kernelbase is never accessible.
|
|
2352 |
*/
|
|
2353 |
(void) as_setprot(&kas, (caddr_t)kernelbase, KERNEL_REDZONE_SIZE, 0);
|
|
2354 |
|
|
2355 |
/*
|
|
2356 |
* Make the text writable so that it can be hot patched by DTrace.
|
|
2357 |
*/
|
|
2358 |
(void) as_setprot(&kas, s_text, e_modtext - s_text,
|
|
2359 |
PROT_READ | PROT_WRITE | PROT_EXEC);
|
|
2360 |
|
|
2361 |
/*
|
|
2362 |
* Make data writable until end.
|
|
2363 |
*/
|
|
2364 |
(void) as_setprot(&kas, s_data, e_moddata - s_data,
|
|
2365 |
PROT_READ | PROT_WRITE | PROT_EXEC);
|
|
2366 |
}
|
|
2367 |
|
|
2368 |
/*
|
|
2369 |
* These are MTTR registers supported by P6
|
|
2370 |
*/
|
|
2371 |
static struct mtrrvar mtrrphys_arr[MAX_MTRRVAR];
|
|
2372 |
static uint64_t mtrr64k, mtrr16k1, mtrr16k2;
|
|
2373 |
static uint64_t mtrr4k1, mtrr4k2, mtrr4k3;
|
|
2374 |
static uint64_t mtrr4k4, mtrr4k5, mtrr4k6;
|
|
2375 |
static uint64_t mtrr4k7, mtrr4k8, mtrrcap;
|
|
2376 |
uint64_t mtrrdef, pat_attr_reg;
|
|
2377 |
|
|
2378 |
/*
|
|
2379 |
* Disable reprogramming of MTRRs by default.
|
|
2380 |
*/
|
|
2381 |
int enable_relaxed_mtrr = 0;
|
|
2382 |
|
|
2383 |
/*
|
|
2384 |
* These must serve for Pentium, Pentium Pro (P6/Pentium II/Pentium III)
|
|
2385 |
* and Pentium 4, and yes, they are named 0, 1, 2, 4, 3 in ascending
|
|
2386 |
* address order (starting from 0x400). The Pentium 4 only implements
|
|
2387 |
* 4 sets, and while they are named 0-3 in the doc, the corresponding
|
|
2388 |
* names for P6 are 0,1,2,4. So define these arrays in address order
|
|
2389 |
* so that they work for both pre-Pentium4 and Pentium 4 processors.
|
|
2390 |
*/
|
|
2391 |
|
|
2392 |
static uint_t mci_ctl[] = {REG_MC0_CTL, REG_MC1_CTL, REG_MC2_CTL,
|
|
2393 |
REG_MC4_CTL, REG_MC3_CTL};
|
|
2394 |
static uint_t mci_status[] = {REG_MC0_STATUS, REG_MC1_STATUS, REG_MC2_STATUS,
|
|
2395 |
REG_MC4_STATUS, REG_MC3_STATUS};
|
|
2396 |
static uint_t mci_addr[] = {REG_MC0_ADDR, REG_MC1_ADDR, REG_MC2_ADDR,
|
|
2397 |
REG_MC4_ADDR, REG_MC3_ADDR};
|
|
2398 |
static int mca_cnt;
|
|
2399 |
|
|
2400 |
|
|
2401 |
void
|
|
2402 |
setup_mca()
|
|
2403 |
{
|
|
2404 |
int i;
|
|
2405 |
uint64_t allzeros;
|
|
2406 |
uint64_t allones;
|
|
2407 |
uint64_t mca_cap;
|
|
2408 |
|
|
2409 |
if (!(x86_feature & X86_MCA))
|
|
2410 |
return;
|
|
2411 |
(void) rdmsr(REG_MCG_CAP, &mca_cap);
|
|
2412 |
allones = 0xffffffffffffffffULL;
|
|
2413 |
if (mca_cap & MCG_CAP_CTL_P)
|
|
2414 |
(void) wrmsr(REG_MCG_CTL, &allones);
|
|
2415 |
mca_cnt = mca_cap & MCG_CAP_COUNT_MASK;
|
|
2416 |
if (mca_cnt > P6_MCG_CAP_COUNT)
|
|
2417 |
mca_cnt = P6_MCG_CAP_COUNT;
|
|
2418 |
for (i = 1; i < mca_cnt; i++)
|
|
2419 |
(void) wrmsr(mci_ctl[i], &allones);
|
|
2420 |
allzeros = 0;
|
|
2421 |
for (i = 0; i < mca_cnt; i++)
|
|
2422 |
(void) wrmsr(mci_status[i], &allzeros);
|
|
2423 |
setcr4(getcr4() | CR4_MCE);
|
|
2424 |
|
|
2425 |
}
|
|
2426 |
|
|
2427 |
int
|
|
2428 |
mca_exception(struct regs *rp)
|
|
2429 |
{
|
|
2430 |
uint64_t status, addr;
|
|
2431 |
uint64_t allzeros;
|
|
2432 |
uint64_t buf;
|
|
2433 |
int i, ret = 1, errcode, mserrcode;
|
|
2434 |
|
|
2435 |
allzeros = 0;
|
|
2436 |
(void) rdmsr(REG_MCG_STATUS, &buf);
|
|
2437 |
status = buf;
|
|
2438 |
if (status & MCG_STATUS_RIPV)
|
|
2439 |
ret = 0;
|
|
2440 |
if (status & MCG_STATUS_EIPV)
|
|
2441 |
cmn_err(CE_WARN, "MCE at 0x%lx", rp->r_pc);
|
|
2442 |
(void) wrmsr(REG_MCG_STATUS, &allzeros);
|
|
2443 |
for (i = 0; i < mca_cnt; i++) {
|
|
2444 |
(void) rdmsr(mci_status[i], &buf);
|
|
2445 |
status = buf;
|
|
2446 |
/*
|
|
2447 |
* If status register not valid skip this bank
|
|
2448 |
*/
|
|
2449 |
if (!(status & MCI_STATUS_VAL))
|
|
2450 |
continue;
|
|
2451 |
errcode = status & MCI_STATUS_ERRCODE;
|
|
2452 |
mserrcode = (status >> MSERRCODE_SHFT) & MCI_STATUS_ERRCODE;
|
|
2453 |
if (status & MCI_STATUS_ADDRV) {
|
|
2454 |
/*
|
|
2455 |
* If mci_addr contains the address where
|
|
2456 |
* error occurred, display the address
|
|
2457 |
*/
|
|
2458 |
(void) rdmsr(mci_addr[i], &buf);
|
|
2459 |
addr = buf;
|
|
2460 |
cmn_err(CE_WARN, "MCE: Bank %d: error code 0x%x:"\
|
|
2461 |
"addr = 0x%" PRIx64 ", model errcode = 0x%x", i,
|
|
2462 |
errcode, addr, mserrcode);
|
|
2463 |
} else {
|
|
2464 |
cmn_err(CE_WARN,
|
|
2465 |
"MCE: Bank %d: error code 0x%x, mserrcode = 0x%x",
|
|
2466 |
i, errcode, mserrcode);
|
|
2467 |
}
|
|
2468 |
(void) wrmsr(mci_status[i], &allzeros);
|
|
2469 |
}
|
|
2470 |
return (ret);
|
|
2471 |
}
|
|
2472 |
|
|
2473 |
void
|
|
2474 |
setup_mtrr()
|
|
2475 |
{
|
|
2476 |
int i, ecx;
|
|
2477 |
int vcnt;
|
|
2478 |
struct mtrrvar *mtrrphys;
|
|
2479 |
|
|
2480 |
if (!(x86_feature & X86_MTRR))
|
|
2481 |
return;
|
|
2482 |
|
|
2483 |
(void) rdmsr(REG_MTRRCAP, &mtrrcap);
|
|
2484 |
(void) rdmsr(REG_MTRRDEF, &mtrrdef);
|
|
2485 |
if (mtrrcap & MTRRCAP_FIX) {
|
|
2486 |
(void) rdmsr(REG_MTRR64K, &mtrr64k);
|
|
2487 |
(void) rdmsr(REG_MTRR16K1, &mtrr16k1);
|
|
2488 |
(void) rdmsr(REG_MTRR16K2, &mtrr16k2);
|
|
2489 |
(void) rdmsr(REG_MTRR4K1, &mtrr4k1);
|
|
2490 |
(void) rdmsr(REG_MTRR4K2, &mtrr4k2);
|
|
2491 |
(void) rdmsr(REG_MTRR4K3, &mtrr4k3);
|
|
2492 |
(void) rdmsr(REG_MTRR4K4, &mtrr4k4);
|
|
2493 |
(void) rdmsr(REG_MTRR4K5, &mtrr4k5);
|
|
2494 |
(void) rdmsr(REG_MTRR4K6, &mtrr4k6);
|
|
2495 |
(void) rdmsr(REG_MTRR4K7, &mtrr4k7);
|
|
2496 |
(void) rdmsr(REG_MTRR4K8, &mtrr4k8);
|
|
2497 |
}
|
|
2498 |
if ((vcnt = (mtrrcap & MTRRCAP_VCNTMASK)) > MAX_MTRRVAR)
|
|
2499 |
vcnt = MAX_MTRRVAR;
|
|
2500 |
|
|
2501 |
for (i = 0, ecx = REG_MTRRPHYSBASE0, mtrrphys = mtrrphys_arr;
|
|
2502 |
i < vcnt - 1; i++, ecx += 2, mtrrphys++) {
|
|
2503 |
(void) rdmsr(ecx, &mtrrphys->mtrrphys_base);
|
|
2504 |
(void) rdmsr(ecx + 1, &mtrrphys->mtrrphys_mask);
|
|
2505 |
if ((x86_feature & X86_PAT) && enable_relaxed_mtrr) {
|
|
2506 |
mtrrphys->mtrrphys_mask &= ~MTRRPHYSMASK_V;
|
|
2507 |
}
|
|
2508 |
}
|
|
2509 |
if (x86_feature & X86_PAT) {
|
|
2510 |
if (enable_relaxed_mtrr)
|
|
2511 |
mtrrdef = MTRR_TYPE_WB|MTRRDEF_FE|MTRRDEF_E;
|
|
2512 |
pat_attr_reg = PAT_DEFAULT_ATTRIBUTE;
|
|
2513 |
}
|
|
2514 |
|
|
2515 |
mtrr_sync();
|
|
2516 |
}
|
|
2517 |
|
|
2518 |
/*
|
|
2519 |
* Sync current cpu mtrr with the incore copy of mtrr.
|
|
2520 |
* This function has to be invoked with interrupts disabled
|
|
2521 |
* Currently we do not capture other cpu's. This is invoked on cpu0
|
|
2522 |
* just after reading /etc/system.
|
|
2523 |
* On other cpu's its invoked from mp_startup().
|
|
2524 |
*/
|
|
2525 |
void
|
|
2526 |
mtrr_sync()
|
|
2527 |
{
|
|
2528 |
uint64_t my_mtrrdef;
|
|
2529 |
uint_t crvalue, cr0_orig;
|
|
2530 |
int vcnt, i, ecx;
|
|
2531 |
struct mtrrvar *mtrrphys;
|
|
2532 |
|
|
2533 |
cr0_orig = crvalue = getcr0();
|
|
2534 |
crvalue |= CR0_CD;
|
|
2535 |
crvalue &= ~CR0_NW;
|
|
2536 |
setcr0(crvalue);
|
|
2537 |
invalidate_cache();
|
|
2538 |
setcr3(getcr3());
|
|
2539 |
|
|
2540 |
if (x86_feature & X86_PAT) {
|
|
2541 |
(void) wrmsr(REG_MTRRPAT, &pat_attr_reg);
|
|
2542 |
}
|
|
2543 |
(void) rdmsr(REG_MTRRDEF, &my_mtrrdef);
|
|
2544 |
my_mtrrdef &= ~MTRRDEF_E;
|
|
2545 |
(void) wrmsr(REG_MTRRDEF, &my_mtrrdef);
|
|
2546 |
if (mtrrcap & MTRRCAP_FIX) {
|
|
2547 |
(void) wrmsr(REG_MTRR64K, &mtrr64k);
|
|
2548 |
(void) wrmsr(REG_MTRR16K1, &mtrr16k1);
|
|
2549 |
(void) wrmsr(REG_MTRR16K2, &mtrr16k2);
|
|
2550 |
(void) wrmsr(REG_MTRR4K1, &mtrr4k1);
|
|
2551 |
(void) wrmsr(REG_MTRR4K2, &mtrr4k2);
|
|
2552 |
(void) wrmsr(REG_MTRR4K3, &mtrr4k3);
|
|
2553 |
(void) wrmsr(REG_MTRR4K4, &mtrr4k4);
|
|
2554 |
(void) wrmsr(REG_MTRR4K5, &mtrr4k5);
|
|
2555 |
(void) wrmsr(REG_MTRR4K6, &mtrr4k6);
|
|
2556 |
(void) wrmsr(REG_MTRR4K7, &mtrr4k7);
|
|
2557 |
(void) wrmsr(REG_MTRR4K8, &mtrr4k8);
|
|
2558 |
}
|
|
2559 |
if ((vcnt = (mtrrcap & MTRRCAP_VCNTMASK)) > MAX_MTRRVAR)
|
|
2560 |
vcnt = MAX_MTRRVAR;
|
|
2561 |
for (i = 0, ecx = REG_MTRRPHYSBASE0, mtrrphys = mtrrphys_arr;
|
|
2562 |
i < vcnt - 1; i++, ecx += 2, mtrrphys++) {
|
|
2563 |
(void) wrmsr(ecx, &mtrrphys->mtrrphys_base);
|
|
2564 |
(void) wrmsr(ecx + 1, &mtrrphys->mtrrphys_mask);
|
|
2565 |
}
|
|
2566 |
(void) wrmsr(REG_MTRRDEF, &mtrrdef);
|
|
2567 |
setcr3(getcr3());
|
|
2568 |
invalidate_cache();
|
|
2569 |
setcr0(cr0_orig);
|
|
2570 |
}
|
|
2571 |
|
|
2572 |
/*
|
|
2573 |
* resync mtrr so that BIOS is happy. Called from mdboot
|
|
2574 |
*/
|
|
2575 |
void
|
|
2576 |
mtrr_resync()
|
|
2577 |
{
|
|
2578 |
if ((x86_feature & X86_PAT) && enable_relaxed_mtrr) {
|
|
2579 |
/*
|
|
2580 |
* We could have changed the default mtrr definition.
|
|
2581 |
* Put it back to uncached which is what it is at power on
|
|
2582 |
*/
|
|
2583 |
mtrrdef = MTRR_TYPE_UC|MTRRDEF_FE|MTRRDEF_E;
|
|
2584 |
mtrr_sync();
|
|
2585 |
}
|
|
2586 |
}
|
|
2587 |
|
|
2588 |
void
|
|
2589 |
get_system_configuration()
|
|
2590 |
{
|
|
2591 |
char prop[32];
|
|
2592 |
u_longlong_t nodes_ll, cpus_pernode_ll, lvalue;
|
|
2593 |
|
|
2594 |
if (((BOP_GETPROPLEN(bootops, "nodes") > sizeof (prop)) ||
|
|
2595 |
(BOP_GETPROP(bootops, "nodes", prop) < 0) ||
|
|
2596 |
(kobj_getvalue(prop, &nodes_ll) == -1) ||
|
|
2597 |
(nodes_ll > MAXNODES)) ||
|
|
2598 |
((BOP_GETPROPLEN(bootops, "cpus_pernode") > sizeof (prop)) ||
|
|
2599 |
(BOP_GETPROP(bootops, "cpus_pernode", prop) < 0) ||
|
|
2600 |
(kobj_getvalue(prop, &cpus_pernode_ll) == -1))) {
|
|
2601 |
|
|
2602 |
system_hardware.hd_nodes = 1;
|
|
2603 |
system_hardware.hd_cpus_per_node = 0;
|
|
2604 |
} else {
|
|
2605 |
system_hardware.hd_nodes = (int)nodes_ll;
|
|
2606 |
system_hardware.hd_cpus_per_node = (int)cpus_pernode_ll;
|
|
2607 |
}
|
|
2608 |
if ((BOP_GETPROPLEN(bootops, "kernelbase") > sizeof (prop)) ||
|
|
2609 |
(BOP_GETPROP(bootops, "kernelbase", prop) < 0) ||
|
|
2610 |
(kobj_getvalue(prop, &lvalue) == -1))
|
|
2611 |
eprom_kernelbase = NULL;
|
|
2612 |
else
|
|
2613 |
eprom_kernelbase = (uintptr_t)lvalue;
|
|
2614 |
|
|
2615 |
if ((BOP_GETPROPLEN(bootops, "segmapsize") > sizeof (prop)) ||
|
|
2616 |
(BOP_GETPROP(bootops, "segmapsize", prop) < 0) ||
|
|
2617 |
(kobj_getvalue(prop, &lvalue) == -1)) {
|
|
2618 |
segmapsize = SEGMAPDEFAULT;
|
|
2619 |
} else {
|
|
2620 |
segmapsize = (uintptr_t)lvalue;
|
|
2621 |
}
|
|
2622 |
|
|
2623 |
if ((BOP_GETPROPLEN(bootops, "segmapfreelists") > sizeof (prop)) ||
|
|
2624 |
(BOP_GETPROP(bootops, "segmapfreelists", prop) < 0) ||
|
|
2625 |
(kobj_getvalue(prop, &lvalue) == -1)) {
|
|
2626 |
segmapfreelists = 0; /* use segmap driver default */
|
|
2627 |
} else {
|
|
2628 |
segmapfreelists = (int)lvalue;
|
|
2629 |
}
|
|
2630 |
}
|
|
2631 |
|
|
2632 |
/*
|
|
2633 |
* Add to a memory list.
|
|
2634 |
* start = start of new memory segment
|
|
2635 |
* len = length of new memory segment in bytes
|
|
2636 |
* new = pointer to a new struct memlist
|
|
2637 |
* memlistp = memory list to which to add segment.
|
|
2638 |
*/
|
|
2639 |
static void
|
|
2640 |
memlist_add(
|
|
2641 |
uint64_t start,
|
|
2642 |
uint64_t len,
|
|
2643 |
struct memlist *new,
|
|
2644 |
struct memlist **memlistp)
|
|
2645 |
{
|
|
2646 |
struct memlist *cur;
|
|
2647 |
uint64_t end = start + len;
|
|
2648 |
|
|
2649 |
new->address = start;
|
|
2650 |
new->size = len;
|
|
2651 |
|
|
2652 |
cur = *memlistp;
|
|
2653 |
|
|
2654 |
while (cur) {
|
|
2655 |
if (cur->address >= end) {
|
|
2656 |
new->next = cur;
|
|
2657 |
*memlistp = new;
|
|
2658 |
new->prev = cur->prev;
|
|
2659 |
cur->prev = new;
|
|
2660 |
return;
|
|
2661 |
}
|
|
2662 |
ASSERT(cur->address + cur->size <= start);
|
|
2663 |
if (cur->next == NULL) {
|
|
2664 |
cur->next = new;
|
|
2665 |
new->prev = cur;
|
|
2666 |
new->next = NULL;
|
|
2667 |
return;
|
|
2668 |
}
|
|
2669 |
memlistp = &cur->next;
|
|
2670 |
cur = cur->next;
|
|
2671 |
}
|
|
2672 |
}
|
|
2673 |
|
|
2674 |
void
|
|
2675 |
kobj_vmem_init(vmem_t **text_arena, vmem_t **data_arena)
|
|
2676 |
{
|
|
2677 |
size_t tsize = e_modtext - modtext;
|
|
2678 |
size_t dsize = e_moddata - moddata;
|
|
2679 |
|
|
2680 |
*text_arena = vmem_create("module_text", tsize ? modtext : NULL, tsize,
|
|
2681 |
1, segkmem_alloc, segkmem_free, heaptext_arena, 0, VM_SLEEP);
|
|
2682 |
*data_arena = vmem_create("module_data", dsize ? moddata : NULL, dsize,
|
|
2683 |
1, segkmem_alloc, segkmem_free, heap32_arena, 0, VM_SLEEP);
|
|
2684 |
}
|
|
2685 |
|
|
2686 |
caddr_t
|
|
2687 |
kobj_text_alloc(vmem_t *arena, size_t size)
|
|
2688 |
{
|
|
2689 |
return (vmem_alloc(arena, size, VM_SLEEP | VM_BESTFIT));
|
|
2690 |
}
|
|
2691 |
|
|
2692 |
/*ARGSUSED*/
|
|
2693 |
caddr_t
|
|
2694 |
kobj_texthole_alloc(caddr_t addr, size_t size)
|
|
2695 |
{
|
|
2696 |
panic("unexpected call to kobj_texthole_alloc()");
|
|
2697 |
/*NOTREACHED*/
|
|
2698 |
return (0);
|
|
2699 |
}
|
|
2700 |
|
|
2701 |
/*ARGSUSED*/
|
|
2702 |
void
|
|
2703 |
kobj_texthole_free(caddr_t addr, size_t size)
|
|
2704 |
{
|
|
2705 |
panic("unexpected call to kobj_texthole_free()");
|
|
2706 |
}
|
|
2707 |
|
|
2708 |
/*
|
|
2709 |
* This is called just after configure() in startup().
|
|
2710 |
*
|
|
2711 |
* The ISALIST concept is a bit hopeless on Intel, because
|
|
2712 |
* there's no guarantee of an ever-more-capable processor
|
|
2713 |
* given that various parts of the instruction set may appear
|
|
2714 |
* and disappear between different implementations.
|
|
2715 |
*
|
|
2716 |
* While it would be possible to correct it and even enhance
|
|
2717 |
* it somewhat, the explicit hardware capability bitmask allows
|
|
2718 |
* more flexibility.
|
|
2719 |
*
|
|
2720 |
* So, we just leave this alone.
|
|
2721 |
*/
|
|
2722 |
void
|
|
2723 |
setx86isalist(void)
|
|
2724 |
{
|
|
2725 |
char *tp;
|
|
2726 |
size_t len;
|
|
2727 |
extern char *isa_list;
|
|
2728 |
|
|
2729 |
#define TBUFSIZE 1024
|
|
2730 |
|
|
2731 |
tp = kmem_alloc(TBUFSIZE, KM_SLEEP);
|
|
2732 |
*tp = '\0';
|
|
2733 |
|
|
2734 |
#if defined(__amd64)
|
|
2735 |
(void) strcpy(tp, "amd64 ");
|
|
2736 |
#endif
|
|
2737 |
|
|
2738 |
switch (x86_vendor) {
|
|
2739 |
case X86_VENDOR_Intel:
|
|
2740 |
case X86_VENDOR_AMD:
|
|
2741 |
case X86_VENDOR_TM:
|
|
2742 |
if (x86_feature & X86_CMOV) {
|
|
2743 |
/*
|
|
2744 |
* Pentium Pro or later
|
|
2745 |
*/
|
|
2746 |
(void) strcat(tp, "pentium_pro");
|
|
2747 |
(void) strcat(tp, x86_feature & X86_MMX ?
|
|
2748 |
"+mmx pentium_pro " : " ");
|
|
2749 |
}
|
|
2750 |
/*FALLTHROUGH*/
|
|
2751 |
case X86_VENDOR_Cyrix:
|
|
2752 |
/*
|
|
2753 |
* The Cyrix 6x86 does not have any Pentium features
|
|
2754 |
* accessible while not at privilege level 0.
|
|
2755 |
*/
|
|
2756 |
if (x86_feature & X86_CPUID) {
|
|
2757 |
(void) strcat(tp, "pentium");
|
|
2758 |
(void) strcat(tp, x86_feature & X86_MMX ?
|
|
2759 |
"+mmx pentium " : " ");
|
|
2760 |
}
|
|
2761 |
break;
|
|
2762 |
default:
|
|
2763 |
break;
|
|
2764 |
}
|
|
2765 |
(void) strcat(tp, "i486 i386 i86");
|
|
2766 |
len = strlen(tp) + 1; /* account for NULL at end of string */
|
|
2767 |
isa_list = strcpy(kmem_alloc(len, KM_SLEEP), tp);
|
|
2768 |
kmem_free(tp, TBUFSIZE);
|
|
2769 |
|
|
2770 |
#undef TBUFSIZE
|
|
2771 |
}
|
|
2772 |
|
|
2773 |
|
|
2774 |
#ifdef __amd64
|
|
2775 |
|
|
2776 |
void *
|
|
2777 |
device_arena_alloc(size_t size, int vm_flag)
|
|
2778 |
{
|
|
2779 |
return (vmem_alloc(device_arena, size, vm_flag));
|
|
2780 |
}
|
|
2781 |
|
|
2782 |
void
|
|
2783 |
device_arena_free(void *vaddr, size_t size)
|
|
2784 |
{
|
|
2785 |
vmem_free(device_arena, vaddr, size);
|
|
2786 |
}
|
|
2787 |
|
|
2788 |
#else
|
|
2789 |
|
|
2790 |
void *
|
|
2791 |
device_arena_alloc(size_t size, int vm_flag)
|
|
2792 |
{
|
|
2793 |
caddr_t vaddr;
|
|
2794 |
uintptr_t v;
|
|
2795 |
size_t start;
|
|
2796 |
size_t end;
|
|
2797 |
|
|
2798 |
vaddr = vmem_alloc(heap_arena, size, vm_flag);
|
|
2799 |
if (vaddr == NULL)
|
|
2800 |
return (NULL);
|
|
2801 |
|
|
2802 |
v = (uintptr_t)vaddr;
|
|
2803 |
ASSERT(v >= kernelbase);
|
|
2804 |
ASSERT(v + size <= ptable_va);
|
|
2805 |
|
|
2806 |
start = btop(v - kernelbase);
|
|
2807 |
end = btop(v + size - 1 - kernelbase);
|
|
2808 |
ASSERT(start < toxic_bit_map_len);
|
|
2809 |
ASSERT(end < toxic_bit_map_len);
|
|
2810 |
|
|
2811 |
while (start <= end) {
|
|
2812 |
BT_ATOMIC_SET(toxic_bit_map, start);
|
|
2813 |
++start;
|
|
2814 |
}
|
|
2815 |
return (vaddr);
|
|
2816 |
}
|
|
2817 |
|
|
2818 |
void
|
|
2819 |
device_arena_free(void *vaddr, size_t size)
|
|
2820 |
{
|
|
2821 |
uintptr_t v = (uintptr_t)vaddr;
|
|
2822 |
size_t start;
|
|
2823 |
size_t end;
|
|
2824 |
|
|
2825 |
ASSERT(v >= kernelbase);
|
|
2826 |
ASSERT(v + size <= ptable_va);
|
|
2827 |
|
|
2828 |
start = btop(v - kernelbase);
|
|
2829 |
end = btop(v + size - 1 - kernelbase);
|
|
2830 |
ASSERT(start < toxic_bit_map_len);
|
|
2831 |
ASSERT(end < toxic_bit_map_len);
|
|
2832 |
|
|
2833 |
while (start <= end) {
|
|
2834 |
ASSERT(BT_TEST(toxic_bit_map, start) != 0);
|
|
2835 |
BT_ATOMIC_CLEAR(toxic_bit_map, start);
|
|
2836 |
++start;
|
|
2837 |
}
|
|
2838 |
vmem_free(heap_arena, vaddr, size);
|
|
2839 |
}
|
|
2840 |
|
|
2841 |
/*
|
|
2842 |
* returns 1st address in range that is in device arena, or NULL
|
|
2843 |
* if len is not NULL it returns the length of the toxic range
|
|
2844 |
*/
|
|
2845 |
void *
|
|
2846 |
device_arena_contains(void *vaddr, size_t size, size_t *len)
|
|
2847 |
{
|
|
2848 |
uintptr_t v = (uintptr_t)vaddr;
|
|
2849 |
uintptr_t eaddr = v + size;
|
|
2850 |
size_t start;
|
|
2851 |
size_t end;
|
|
2852 |
|
|
2853 |
/*
|
|
2854 |
* if called very early by kmdb, just return NULL
|
|
2855 |
*/
|
|
2856 |
if (toxic_bit_map == NULL)
|
|
2857 |
return (NULL);
|
|
2858 |
|
|
2859 |
/*
|
|
2860 |
* First check if we're completely outside the bitmap range.
|
|
2861 |
*/
|
|
2862 |
if (v >= ptable_va || eaddr < kernelbase)
|
|
2863 |
return (NULL);
|
|
2864 |
|
|
2865 |
/*
|
|
2866 |
* Trim ends of search to look at only what the bitmap covers.
|
|
2867 |
*/
|
|
2868 |
if (v < kernelbase)
|
|
2869 |
v = kernelbase;
|
|
2870 |
start = btop(v - kernelbase);
|
|
2871 |
end = btop(eaddr - kernelbase);
|
|
2872 |
if (end >= toxic_bit_map_len)
|
|
2873 |
end = toxic_bit_map_len;
|
|
2874 |
|
|
2875 |
if (bt_range(toxic_bit_map, &start, &end, end) == 0)
|
|
2876 |
return (NULL);
|
|
2877 |
|
|
2878 |
v = kernelbase + ptob(start);
|
|
2879 |
if (len != NULL)
|
|
2880 |
*len = ptob(end - start);
|
|
2881 |
return ((void *)v);
|
|
2882 |
}
|
|
2883 |
|
|
2884 |
#endif
|