author | jg |
Mon, 27 Feb 2006 10:13:14 -0800 | |
changeset 1494 | 2327b3ecb7ad |
parent 1479 | 33ca002df1d5 |
child 1582 | eb879d43ab47 |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1455
b43f098fa50c
6378953 allocation of interrupt threads could be more common
andrei
parents:
1417
diff
changeset
|
5 |
* Common Development and Distribution License (the "License"). |
b43f098fa50c
6378953 allocation of interrupt threads could be more common
andrei
parents:
1417
diff
changeset
|
6 |
* You may not use this file except in compliance with the License. |
0 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
1414
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
22 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
0 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
#include <sys/types.h> |
|
29 |
#include <sys/t_lock.h> |
|
30 |
#include <sys/param.h> |
|
31 |
#include <sys/sysmacros.h> |
|
32 |
#include <sys/signal.h> |
|
33 |
#include <sys/systm.h> |
|
34 |
#include <sys/user.h> |
|
35 |
#include <sys/mman.h> |
|
36 |
#include <sys/vm.h> |
|
37 |
#include <sys/conf.h> |
|
38 |
#include <sys/avintr.h> |
|
39 |
#include <sys/autoconf.h> |
|
40 |
#include <sys/disp.h> |
|
41 |
#include <sys/class.h> |
|
42 |
#include <sys/bitmap.h> |
|
43 |
||
44 |
#include <sys/privregs.h> |
|
45 |
||
46 |
#include <sys/proc.h> |
|
47 |
#include <sys/buf.h> |
|
48 |
#include <sys/kmem.h> |
|
49 |
#include <sys/kstat.h> |
|
50 |
||
51 |
#include <sys/reboot.h> |
|
52 |
#include <sys/uadmin.h> |
|
53 |
||
54 |
#include <sys/cred.h> |
|
55 |
#include <sys/vnode.h> |
|
56 |
#include <sys/file.h> |
|
57 |
||
58 |
#include <sys/procfs.h> |
|
59 |
#include <sys/acct.h> |
|
60 |
||
61 |
#include <sys/vfs.h> |
|
62 |
#include <sys/dnlc.h> |
|
63 |
#include <sys/var.h> |
|
64 |
#include <sys/cmn_err.h> |
|
65 |
#include <sys/utsname.h> |
|
66 |
#include <sys/debug.h> |
|
67 |
#include <sys/kdi.h> |
|
68 |
||
69 |
#include <sys/dumphdr.h> |
|
70 |
#include <sys/bootconf.h> |
|
71 |
#include <sys/varargs.h> |
|
72 |
#include <sys/promif.h> |
|
73 |
#include <sys/prom_emul.h> /* for create_prom_prop */ |
|
74 |
#include <sys/modctl.h> /* for "procfs" hack */ |
|
75 |
||
76 |
#include <sys/consdev.h> |
|
77 |
#include <sys/frame.h> |
|
78 |
||
79 |
#include <sys/sunddi.h> |
|
80 |
#include <sys/sunndi.h> |
|
81 |
#include <sys/ndi_impldefs.h> |
|
82 |
#include <sys/ddidmareq.h> |
|
83 |
#include <sys/psw.h> |
|
84 |
#include <sys/regset.h> |
|
85 |
#include <sys/clock.h> |
|
86 |
#include <sys/pte.h> |
|
87 |
#include <sys/mmu.h> |
|
88 |
#include <sys/tss.h> |
|
89 |
#include <sys/stack.h> |
|
90 |
#include <sys/trap.h> |
|
91 |
#include <sys/pic.h> |
|
92 |
#include <sys/fp.h> |
|
93 |
#include <vm/anon.h> |
|
94 |
#include <vm/as.h> |
|
95 |
#include <vm/page.h> |
|
96 |
#include <vm/seg.h> |
|
97 |
#include <vm/seg_dev.h> |
|
98 |
#include <vm/seg_kmem.h> |
|
99 |
#include <vm/seg_kpm.h> |
|
100 |
#include <vm/seg_map.h> |
|
101 |
#include <vm/seg_vn.h> |
|
102 |
#include <vm/seg_kp.h> |
|
103 |
#include <sys/memnode.h> |
|
104 |
#include <vm/vm_dep.h> |
|
105 |
#include <sys/swap.h> |
|
106 |
#include <sys/thread.h> |
|
107 |
#include <sys/sysconf.h> |
|
108 |
#include <sys/vm_machparam.h> |
|
109 |
#include <sys/archsystm.h> |
|
110 |
#include <sys/machsystm.h> |
|
111 |
#include <vm/hat.h> |
|
112 |
#include <vm/hat_i86.h> |
|
113 |
#include <sys/pmem.h> |
|
114 |
#include <sys/instance.h> |
|
115 |
#include <sys/smp_impldefs.h> |
|
116 |
#include <sys/x86_archext.h> |
|
117 |
#include <sys/segments.h> |
|
118 |
#include <sys/clconf.h> |
|
119 |
#include <sys/kobj.h> |
|
120 |
#include <sys/kobj_lex.h> |
|
121 |
#include <sys/prom_emul.h> |
|
122 |
#include <sys/cpc_impl.h> |
|
123 |
#include <sys/chip.h> |
|
124 |
#include <sys/x86_archext.h> |
|
1414
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
125 |
#include <sys/cpu_module.h> |
437 | 126 |
#include <sys/smbios.h> |
0 | 127 |
|
128 |
extern void progressbar_init(void); |
|
129 |
extern void progressbar_start(void); |
|
130 |
||
131 |
/* |
|
132 |
* XXX make declaration below "static" when drivers no longer use this |
|
133 |
* interface. |
|
134 |
*/ |
|
135 |
extern caddr_t p0_va; /* Virtual address for accessing physical page 0 */ |
|
136 |
||
137 |
/* |
|
138 |
* segkp |
|
139 |
*/ |
|
140 |
extern int segkp_fromheap; |
|
141 |
||
142 |
static void kvm_init(void); |
|
143 |
static void startup_init(void); |
|
144 |
static void startup_memlist(void); |
|
145 |
static void startup_modules(void); |
|
146 |
static void startup_bop_gone(void); |
|
147 |
static void startup_vm(void); |
|
148 |
static void startup_end(void); |
|
149 |
||
150 |
/* |
|
151 |
* Declare these as initialized data so we can patch them. |
|
152 |
*/ |
|
1479
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
153 |
#ifdef __i386 |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
154 |
/* |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
155 |
* Due to virtual address space limitations running in 32 bit mode, restrict |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
156 |
* the amount of physical memory configured to a max of PHYSMEM32 pages (16g). |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
157 |
* |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
158 |
* If the physical max memory size of 64g were allowed to be configured, the |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
159 |
* size of user virtual address space will be less than 1g. A limited user |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
160 |
* address space greatly reduces the range of applications that can run. |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
161 |
* |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
162 |
* If more physical memory than PHYSMEM32 is required, users should preferably |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
163 |
* run in 64 bit mode which has no virtual address space limitation issues. |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
164 |
* |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
165 |
* If 64 bit mode is not available (as in IA32) and/or more physical memory |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
166 |
* than PHYSMEM32 is required in 32 bit mode, physmem can be set to the desired |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
167 |
* value or to 0 (to configure all available memory) via eeprom(1M). kernelbase |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
168 |
* should also be carefully tuned to balance out the need of the user |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
169 |
* application while minimizing the risk of kernel heap exhaustion due to |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
170 |
* kernelbase being set too high. |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
171 |
*/ |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
172 |
#define PHYSMEM32 0x400000 |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
173 |
|
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
174 |
pgcnt_t physmem = PHYSMEM32; |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
175 |
#else |
0 | 176 |
pgcnt_t physmem = 0; /* memory size in pages, patch if you want less */ |
1479
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
177 |
#endif |
0 | 178 |
pgcnt_t obp_pages; /* Memory used by PROM for its text and data */ |
179 |
||
180 |
char *kobj_file_buf; |
|
181 |
int kobj_file_bufsize; /* set in /etc/system */ |
|
182 |
||
183 |
/* Global variables for MP support. Used in mp_startup */ |
|
184 |
caddr_t rm_platter_va; |
|
185 |
uint32_t rm_platter_pa; |
|
186 |
||
841
814b4a127357
6344639 system spends all its time trying and failing to make big pages
kchow
parents:
810
diff
changeset
|
187 |
int auto_lpg_disable = 1; |
814b4a127357
6344639 system spends all its time trying and failing to make big pages
kchow
parents:
810
diff
changeset
|
188 |
|
0 | 189 |
/* |
190 |
* Some CPUs have holes in the middle of the 64-bit virtual address range. |
|
191 |
*/ |
|
192 |
uintptr_t hole_start, hole_end; |
|
193 |
||
194 |
/* |
|
195 |
* kpm mapping window |
|
196 |
*/ |
|
197 |
caddr_t kpm_vbase; |
|
198 |
size_t kpm_size; |
|
199 |
static int kpm_desired = 0; /* Do we want to try to use segkpm? */ |
|
200 |
||
201 |
/* |
|
202 |
* VA range that must be preserved for boot until we release all of its |
|
203 |
* mappings. |
|
204 |
*/ |
|
205 |
#if defined(__amd64) |
|
206 |
static void *kmem_setaside; |
|
207 |
#endif |
|
208 |
||
209 |
/* |
|
210 |
* Configuration parameters set at boot time. |
|
211 |
*/ |
|
212 |
||
213 |
caddr_t econtig; /* end of first block of contiguous kernel */ |
|
214 |
||
215 |
struct bootops *bootops = 0; /* passed in from boot */ |
|
216 |
struct bootops **bootopsp; |
|
217 |
struct boot_syscalls *sysp; /* passed in from boot */ |
|
218 |
||
219 |
char bootblock_fstype[16]; |
|
220 |
||
221 |
char kern_bootargs[OBP_MAXPATHLEN]; |
|
222 |
||
223 |
/* |
|
224 |
* new memory fragmentations are possible in startup() due to BOP_ALLOCs. this |
|
225 |
* depends on number of BOP_ALLOC calls made and requested size, memory size |
|
226 |
* combination and whether boot.bin memory needs to be freed. |
|
227 |
*/ |
|
228 |
#define POSS_NEW_FRAGMENTS 12 |
|
229 |
||
230 |
/* |
|
231 |
* VM data structures |
|
232 |
*/ |
|
233 |
long page_hashsz; /* Size of page hash table (power of two) */ |
|
234 |
struct page *pp_base; /* Base of initial system page struct array */ |
|
235 |
struct page **page_hash; /* Page hash table */ |
|
236 |
struct seg ktextseg; /* Segment used for kernel executable image */ |
|
237 |
struct seg kvalloc; /* Segment used for "valloc" mapping */ |
|
238 |
struct seg kpseg; /* Segment used for pageable kernel virt mem */ |
|
239 |
struct seg kmapseg; /* Segment used for generic kernel mappings */ |
|
240 |
struct seg kdebugseg; /* Segment used for the kernel debugger */ |
|
241 |
||
242 |
struct seg *segkmap = &kmapseg; /* Kernel generic mapping segment */ |
|
243 |
struct seg *segkp = &kpseg; /* Pageable kernel virtual memory segment */ |
|
244 |
||
245 |
#if defined(__amd64) |
|
246 |
struct seg kvseg_core; /* Segment used for the core heap */ |
|
247 |
struct seg kpmseg; /* Segment used for physical mapping */ |
|
248 |
struct seg *segkpm = &kpmseg; /* 64bit kernel physical mapping segment */ |
|
249 |
#else |
|
250 |
struct seg *segkpm = NULL; /* Unused on IA32 */ |
|
251 |
#endif |
|
252 |
||
253 |
caddr_t segkp_base; /* Base address of segkp */ |
|
254 |
#if defined(__amd64) |
|
255 |
pgcnt_t segkpsize = btop(SEGKPDEFSIZE); /* size of segkp segment in pages */ |
|
256 |
#else |
|
257 |
pgcnt_t segkpsize = 0; |
|
258 |
#endif |
|
259 |
||
260 |
struct memseg *memseg_base; |
|
261 |
struct vnode unused_pages_vp; |
|
262 |
||
263 |
#define FOURGB 0x100000000LL |
|
264 |
||
265 |
struct memlist *memlist; |
|
266 |
||
267 |
caddr_t s_text; /* start of kernel text segment */ |
|
268 |
caddr_t e_text; /* end of kernel text segment */ |
|
269 |
caddr_t s_data; /* start of kernel data segment */ |
|
270 |
caddr_t e_data; /* end of kernel data segment */ |
|
271 |
caddr_t modtext; /* start of loadable module text reserved */ |
|
272 |
caddr_t e_modtext; /* end of loadable module text reserved */ |
|
273 |
caddr_t moddata; /* start of loadable module data reserved */ |
|
274 |
caddr_t e_moddata; /* end of loadable module data reserved */ |
|
275 |
||
276 |
struct memlist *phys_install; /* Total installed physical memory */ |
|
277 |
struct memlist *phys_avail; /* Total available physical memory */ |
|
278 |
||
279 |
static void memlist_add(uint64_t, uint64_t, struct memlist *, |
|
280 |
struct memlist **); |
|
281 |
||
282 |
/* |
|
283 |
* kphysm_init returns the number of pages that were processed |
|
284 |
*/ |
|
285 |
static pgcnt_t kphysm_init(page_t *, struct memseg *, pgcnt_t, pgcnt_t); |
|
286 |
||
287 |
#define IO_PROP_SIZE 64 /* device property size */ |
|
288 |
||
289 |
/* |
|
290 |
* a couple useful roundup macros |
|
291 |
*/ |
|
292 |
#define ROUND_UP_PAGE(x) \ |
|
293 |
((uintptr_t)P2ROUNDUP((uintptr_t)(x), (uintptr_t)MMU_PAGESIZE)) |
|
294 |
#define ROUND_UP_LPAGE(x) \ |
|
295 |
((uintptr_t)P2ROUNDUP((uintptr_t)(x), mmu.level_size[1])) |
|
296 |
#define ROUND_UP_4MEG(x) \ |
|
297 |
((uintptr_t)P2ROUNDUP((uintptr_t)(x), (uintptr_t)FOURMB_PAGESIZE)) |
|
298 |
#define ROUND_UP_TOPLEVEL(x) \ |
|
299 |
((uintptr_t)P2ROUNDUP((uintptr_t)(x), mmu.level_size[mmu.max_level])) |
|
300 |
||
301 |
/* |
|
302 |
* 32-bit Kernel's Virtual memory layout. |
|
303 |
* +-----------------------+ |
|
304 |
* | psm 1-1 map | |
|
305 |
* | exec args area | |
|
306 |
* 0xFFC00000 -|-----------------------|- ARGSBASE |
|
307 |
* | debugger | |
|
308 |
* 0xFF800000 -|-----------------------|- SEGDEBUGBASE |
|
309 |
* | Kernel Data | |
|
310 |
* 0xFEC00000 -|-----------------------| |
|
311 |
* | Kernel Text | |
|
312 |
* 0xFE800000 -|-----------------------|- KERNEL_TEXT |
|
313 |
* | LUFS sinkhole | |
|
314 |
* 0xFE000000 -|-----------------------|- lufs_addr |
|
315 |
* --- -|-----------------------|- valloc_base + valloc_sz |
|
316 |
* | early pp structures | |
|
317 |
* | memsegs, memlists, | |
|
318 |
* | page hash, etc. | |
|
319 |
* --- -|-----------------------|- valloc_base (floating) |
|
320 |
* | ptable_va | |
|
321 |
* 0xFDFFE000 -|-----------------------|- ekernelheap, ptable_va |
|
322 |
* | | (segkp is an arena under the heap) |
|
323 |
* | | |
|
324 |
* | kvseg | |
|
325 |
* | | |
|
326 |
* | | |
|
327 |
* --- -|-----------------------|- kernelheap (floating) |
|
328 |
* | Segkmap | |
|
329 |
* 0xC3002000 -|-----------------------|- segkmap_start (floating) |
|
330 |
* | Red Zone | |
|
331 |
* 0xC3000000 -|-----------------------|- kernelbase / userlimit (floating) |
|
332 |
* | | || |
|
333 |
* | Shared objects | \/ |
|
334 |
* | | |
|
335 |
* : : |
|
336 |
* | user data | |
|
337 |
* |-----------------------| |
|
338 |
* | user text | |
|
339 |
* 0x08048000 -|-----------------------| |
|
340 |
* | user stack | |
|
341 |
* : : |
|
342 |
* | invalid | |
|
343 |
* 0x00000000 +-----------------------+ |
|
344 |
* |
|
345 |
* |
|
346 |
* 64-bit Kernel's Virtual memory layout. (assuming 64 bit app) |
|
347 |
* +-----------------------+ |
|
348 |
* | psm 1-1 map | |
|
349 |
* | exec args area | |
|
350 |
* 0xFFFFFFFF.FFC00000 |-----------------------|- ARGSBASE |
|
351 |
* | debugger (?) | |
|
352 |
* 0xFFFFFFFF.FF800000 |-----------------------|- SEGDEBUGBASE |
|
353 |
* | unused | |
|
354 |
* +-----------------------+ |
|
355 |
* | Kernel Data | |
|
356 |
* 0xFFFFFFFF.FBC00000 |-----------------------| |
|
357 |
* | Kernel Text | |
|
358 |
* 0xFFFFFFFF.FB800000 |-----------------------|- KERNEL_TEXT |
|
359 |
* | LUFS sinkhole | |
|
360 |
* 0xFFFFFFFF.FB000000 -|-----------------------|- lufs_addr |
|
361 |
* --- |-----------------------|- valloc_base + valloc_sz |
|
362 |
* | early pp structures | |
|
363 |
* | memsegs, memlists, | |
|
364 |
* | page hash, etc. | |
|
365 |
* --- |-----------------------|- valloc_base |
|
366 |
* | ptable_va | |
|
367 |
* --- |-----------------------|- ptable_va |
|
368 |
* | Core heap | (used for loadable modules) |
|
369 |
* 0xFFFFFFFF.C0000000 |-----------------------|- core_base / ekernelheap |
|
370 |
* | Kernel | |
|
371 |
* | heap | |
|
372 |
* 0xFFFFFXXX.XXX00000 |-----------------------|- kernelheap (floating) |
|
373 |
* | segkmap | |
|
374 |
* 0xFFFFFXXX.XXX00000 |-----------------------|- segkmap_start (floating) |
|
375 |
* | device mappings | |
|
376 |
* 0xFFFFFXXX.XXX00000 |-----------------------|- toxic_addr (floating) |
|
377 |
* | segkp | |
|
378 |
* --- |-----------------------|- segkp_base |
|
379 |
* | segkpm | |
|
380 |
* 0xFFFFFE00.00000000 |-----------------------| |
|
381 |
* | Red Zone | |
|
382 |
* 0xFFFFFD80.00000000 |-----------------------|- KERNELBASE |
|
383 |
* | User stack |- User space memory |
|
384 |
* | | |
|
385 |
* | shared objects, etc | (grows downwards) |
|
386 |
* : : |
|
387 |
* | | |
|
388 |
* 0xFFFF8000.00000000 |-----------------------| |
|
389 |
* | | |
|
390 |
* | VA Hole / unused | |
|
391 |
* | | |
|
392 |
* 0x00008000.00000000 |-----------------------| |
|
393 |
* | | |
|
394 |
* | | |
|
395 |
* : : |
|
396 |
* | user heap | (grows upwards) |
|
397 |
* | | |
|
398 |
* | user data | |
|
399 |
* |-----------------------| |
|
400 |
* | user text | |
|
401 |
* 0x00000000.04000000 |-----------------------| |
|
402 |
* | invalid | |
|
403 |
* 0x00000000.00000000 +-----------------------+ |
|
404 |
* |
|
405 |
* A 32 bit app on the 64 bit kernel sees the same layout as on the 32 bit |
|
406 |
* kernel, except that userlimit is raised to 0xfe000000 |
|
407 |
* |
|
408 |
* Floating values: |
|
409 |
* |
|
410 |
* valloc_base: start of the kernel's memory management/tracking data |
|
411 |
* structures. This region contains page_t structures for the lowest 4GB |
|
412 |
* of physical memory, memsegs, memlists, and the page hash. |
|
413 |
* |
|
414 |
* core_base: start of the kernel's "core" heap area on 64-bit systems. |
|
415 |
* This area is intended to be used for global data as well as for module |
|
416 |
* text/data that does not fit into the nucleus pages. The core heap is |
|
417 |
* restricted to a 2GB range, allowing every address within it to be |
|
418 |
* accessed using rip-relative addressing |
|
419 |
* |
|
420 |
* ekernelheap: end of kernelheap and start of segmap. |
|
421 |
* |
|
422 |
* kernelheap: start of kernel heap. On 32-bit systems, this starts right |
|
423 |
* above a red zone that separates the user's address space from the |
|
424 |
* kernel's. On 64-bit systems, it sits above segkp and segkpm. |
|
425 |
* |
|
426 |
* segkmap_start: start of segmap. The length of segmap can be modified |
|
427 |
* by changing segmapsize in /etc/system (preferred) or eeprom (deprecated). |
|
428 |
* The default length is 16MB on 32-bit systems and 64MB on 64-bit systems. |
|
429 |
* |
|
430 |
* kernelbase: On a 32-bit kernel the default value of 0xd4000000 will be |
|
431 |
* decreased by 2X the size required for page_t. This allows the kernel |
|
432 |
* heap to grow in size with physical memory. With sizeof(page_t) == 80 |
|
433 |
* bytes, the following shows the values of kernelbase and kernel heap |
|
434 |
* sizes for different memory configurations (assuming default segmap and |
|
435 |
* segkp sizes). |
|
436 |
* |
|
437 |
* mem size for kernelbase kernel heap |
|
438 |
* size page_t's size |
|
439 |
* ---- --------- ---------- ----------- |
|
440 |
* 1gb 0x01400000 0xd1800000 684MB |
|
441 |
* 2gb 0x02800000 0xcf000000 704MB |
|
442 |
* 4gb 0x05000000 0xca000000 744MB |
|
443 |
* 6gb 0x07800000 0xc5000000 784MB |
|
444 |
* 8gb 0x0a000000 0xc0000000 824MB |
|
445 |
* 16gb 0x14000000 0xac000000 984MB |
|
446 |
* 32gb 0x28000000 0x84000000 1304MB |
|
447 |
* 64gb 0x50000000 0x34000000 1944MB (*) |
|
448 |
* |
|
449 |
* kernelbase is less than the abi minimum of 0xc0000000 for memory |
|
450 |
* configurations above 8gb. |
|
451 |
* |
|
452 |
* (*) support for memory configurations above 32gb will require manual tuning |
|
453 |
* of kernelbase to balance out the need of user applications. |
|
454 |
*/ |
|
455 |
||
456 |
/* real-time-clock initialization parameters */ |
|
457 |
long gmt_lag; /* offset in seconds of gmt to local time */ |
|
458 |
extern long process_rtc_config_file(void); |
|
459 |
||
460 |
char *final_kernelheap; |
|
461 |
char *boot_kernelheap; |
|
462 |
uintptr_t kernelbase; |
|
463 |
uintptr_t eprom_kernelbase; |
|
464 |
size_t segmapsize; |
|
465 |
static uintptr_t segmap_reserved; |
|
466 |
uintptr_t segkmap_start; |
|
467 |
int segmapfreelists; |
|
468 |
pgcnt_t boot_npages; |
|
469 |
pgcnt_t npages; |
|
470 |
size_t core_size; /* size of "core" heap */ |
|
471 |
uintptr_t core_base; /* base address of "core" heap */ |
|
472 |
||
473 |
/* |
|
474 |
* List of bootstrap pages. We mark these as allocated in startup. |
|
475 |
* release_bootstrap() will free them when we're completely done with |
|
476 |
* the bootstrap. |
|
477 |
*/ |
|
478 |
static page_t *bootpages, *rd_pages; |
|
479 |
||
480 |
struct system_hardware system_hardware; |
|
481 |
||
482 |
/* |
|
483 |
* Enable some debugging messages concerning memory usage... |
|
484 |
* |
|
485 |
* XX64 There should only be one print routine once memlist usage between |
|
486 |
* vmx and the kernel is cleaned up and there is a single memlist structure |
|
487 |
* shared between kernel and boot. |
|
488 |
*/ |
|
489 |
static void |
|
490 |
print_boot_memlist(char *title, struct memlist *mp) |
|
491 |
{ |
|
492 |
prom_printf("MEMLIST: %s:\n", title); |
|
493 |
while (mp != NULL) { |
|
494 |
prom_printf("\tAddress 0x%" PRIx64 ", size 0x%" PRIx64 "\n", |
|
495 |
mp->address, mp->size); |
|
496 |
mp = mp->next; |
|
497 |
} |
|
498 |
} |
|
499 |
||
500 |
static void |
|
501 |
print_kernel_memlist(char *title, struct memlist *mp) |
|
502 |
{ |
|
503 |
prom_printf("MEMLIST: %s:\n", title); |
|
504 |
while (mp != NULL) { |
|
505 |
prom_printf("\tAddress 0x%" PRIx64 ", size 0x%" PRIx64 "\n", |
|
506 |
mp->address, mp->size); |
|
507 |
mp = mp->next; |
|
508 |
} |
|
509 |
} |
|
510 |
||
511 |
/* |
|
512 |
* XX64 need a comment here.. are these just default values, surely |
|
513 |
* we read the "cpuid" type information to figure this out. |
|
514 |
*/ |
|
515 |
int l2cache_sz = 0x80000; |
|
516 |
int l2cache_linesz = 0x40; |
|
517 |
int l2cache_assoc = 1; |
|
518 |
||
519 |
/* |
|
520 |
* on 64 bit we use a predifined VA range for mapping devices in the kernel |
|
521 |
* on 32 bit the mappings are intermixed in the heap, so we use a bit map |
|
522 |
*/ |
|
523 |
#ifdef __amd64 |
|
524 |
||
525 |
vmem_t *device_arena; |
|
526 |
uintptr_t toxic_addr = (uintptr_t)NULL; |
|
527 |
size_t toxic_size = 1 * 1024 * 1024 * 1024; /* Sparc uses 1 gig too */ |
|
528 |
||
529 |
#else /* __i386 */ |
|
530 |
||
531 |
ulong_t *toxic_bit_map; /* one bit for each 4k of VA in heap_arena */ |
|
532 |
size_t toxic_bit_map_len = 0; /* in bits */ |
|
533 |
||
534 |
#endif /* __i386 */ |
|
535 |
||
536 |
/* |
|
537 |
* Simple boot time debug facilities |
|
538 |
*/ |
|
539 |
static char *prm_dbg_str[] = { |
|
540 |
"%s:%d: '%s' is 0x%x\n", |
|
541 |
"%s:%d: '%s' is 0x%llx\n" |
|
542 |
}; |
|
543 |
||
544 |
int prom_debug; |
|
545 |
||
546 |
#define PRM_DEBUG(q) if (prom_debug) \ |
|
547 |
prom_printf(prm_dbg_str[sizeof (q) >> 3], "startup.c", __LINE__, #q, q); |
|
548 |
#define PRM_POINT(q) if (prom_debug) \ |
|
549 |
prom_printf("%s:%d: %s\n", "startup.c", __LINE__, q); |
|
550 |
||
551 |
/* |
|
552 |
* This structure is used to keep track of the intial allocations |
|
553 |
* done in startup_memlist(). The value of NUM_ALLOCATIONS needs to |
|
554 |
* be >= the number of ADD_TO_ALLOCATIONS() executed in the code. |
|
555 |
*/ |
|
556 |
#define NUM_ALLOCATIONS 7 |
|
557 |
int num_allocations = 0; |
|
558 |
struct { |
|
559 |
void **al_ptr; |
|
560 |
size_t al_size; |
|
561 |
} allocations[NUM_ALLOCATIONS]; |
|
562 |
size_t valloc_sz = 0; |
|
563 |
uintptr_t valloc_base; |
|
564 |
extern uintptr_t ptable_va; |
|
565 |
extern size_t ptable_sz; |
|
566 |
||
567 |
#define ADD_TO_ALLOCATIONS(ptr, size) { \ |
|
568 |
size = ROUND_UP_PAGE(size); \ |
|
569 |
if (num_allocations == NUM_ALLOCATIONS) \ |
|
570 |
panic("too many ADD_TO_ALLOCATIONS()"); \ |
|
571 |
allocations[num_allocations].al_ptr = (void**)&ptr; \ |
|
572 |
allocations[num_allocations].al_size = size; \ |
|
573 |
valloc_sz += size; \ |
|
574 |
++num_allocations; \ |
|
575 |
} |
|
576 |
||
577 |
static void |
|
578 |
perform_allocations(void) |
|
579 |
{ |
|
580 |
caddr_t mem; |
|
581 |
int i; |
|
582 |
||
583 |
mem = BOP_ALLOC(bootops, (caddr_t)valloc_base, valloc_sz, BO_NO_ALIGN); |
|
584 |
if (mem != (caddr_t)valloc_base) |
|
585 |
panic("BOP_ALLOC() failed"); |
|
586 |
bzero(mem, valloc_sz); |
|
587 |
for (i = 0; i < num_allocations; ++i) { |
|
588 |
*allocations[i].al_ptr = (void *)mem; |
|
589 |
mem += allocations[i].al_size; |
|
590 |
} |
|
591 |
} |
|
592 |
||
593 |
/* |
|
594 |
* Our world looks like this at startup time. |
|
595 |
* |
|
596 |
* In a 32-bit OS, boot loads the kernel text at 0xfe800000 and kernel data |
|
597 |
* at 0xfec00000. On a 64-bit OS, kernel text and data are loaded at |
|
598 |
* 0xffffffff.fe800000 and 0xffffffff.fec00000 respectively. Those |
|
599 |
* addresses are fixed in the binary at link time. |
|
600 |
* |
|
601 |
* On the text page: |
|
602 |
* unix/genunix/krtld/module text loads. |
|
603 |
* |
|
604 |
* On the data page: |
|
605 |
* unix/genunix/krtld/module data loads and space for page_t's. |
|
606 |
*/ |
|
607 |
/* |
|
608 |
* Machine-dependent startup code |
|
609 |
*/ |
|
610 |
void |
|
611 |
startup(void) |
|
612 |
{ |
|
1494 | 613 |
extern void startup_bios_disk(void); |
614 |
extern void startup_pci_bios(void); |
|
0 | 615 |
/* |
616 |
* Make sure that nobody tries to use sekpm until we have |
|
617 |
* initialized it properly. |
|
618 |
*/ |
|
619 |
#if defined(__amd64) |
|
620 |
kpm_desired = kpm_enable; |
|
621 |
#endif |
|
622 |
kpm_enable = 0; |
|
623 |
||
624 |
progressbar_init(); |
|
625 |
startup_init(); |
|
626 |
startup_memlist(); |
|
1494 | 627 |
startup_pci_bios(); |
0 | 628 |
startup_modules(); |
629 |
startup_bios_disk(); |
|
630 |
startup_bop_gone(); |
|
631 |
startup_vm(); |
|
632 |
startup_end(); |
|
633 |
progressbar_start(); |
|
634 |
} |
|
635 |
||
636 |
static void |
|
637 |
startup_init() |
|
638 |
{ |
|
639 |
PRM_POINT("startup_init() starting..."); |
|
640 |
||
641 |
/* |
|
642 |
* Complete the extraction of cpuid data |
|
643 |
*/ |
|
644 |
cpuid_pass2(CPU); |
|
645 |
||
646 |
(void) check_boot_version(BOP_GETVERSION(bootops)); |
|
647 |
||
648 |
/* |
|
649 |
* Check for prom_debug in boot environment |
|
650 |
*/ |
|
651 |
if (BOP_GETPROPLEN(bootops, "prom_debug") >= 0) { |
|
652 |
++prom_debug; |
|
653 |
PRM_POINT("prom_debug found in boot enviroment"); |
|
654 |
} |
|
655 |
||
656 |
/* |
|
657 |
* Collect node, cpu and memory configuration information. |
|
658 |
*/ |
|
659 |
get_system_configuration(); |
|
660 |
||
661 |
/* |
|
662 |
* Halt if this is an unsupported processor. |
|
663 |
*/ |
|
664 |
if (x86_type == X86_TYPE_486 || x86_type == X86_TYPE_CYRIX_486) { |
|
665 |
printf("\n486 processor (\"%s\") detected.\n", |
|
666 |
CPU->cpu_brandstr); |
|
667 |
halt("This processor is not supported by this release " |
|
668 |
"of Solaris."); |
|
669 |
} |
|
670 |
||
671 |
PRM_POINT("startup_init() done"); |
|
672 |
} |
|
673 |
||
674 |
/* |
|
675 |
* Callback for copy_memlist_filter() to filter nucleus, kadb/kmdb, (ie. |
|
676 |
* everything mapped above KERNEL_TEXT) pages from phys_avail. Note it |
|
677 |
* also filters out physical page zero. There is some reliance on the |
|
678 |
* boot loader allocating only a few contiguous physical memory chunks. |
|
679 |
*/ |
|
680 |
static void |
|
681 |
avail_filter(uint64_t *addr, uint64_t *size) |
|
682 |
{ |
|
683 |
uintptr_t va; |
|
684 |
uintptr_t next_va; |
|
685 |
pfn_t pfn; |
|
686 |
uint64_t pfn_addr; |
|
687 |
uint64_t pfn_eaddr; |
|
688 |
uint_t prot; |
|
689 |
size_t len; |
|
690 |
uint_t change; |
|
691 |
||
692 |
if (prom_debug) |
|
693 |
prom_printf("\tFilter: in: a=%" PRIx64 ", s=%" PRIx64 "\n", |
|
694 |
*addr, *size); |
|
695 |
||
696 |
/* |
|
697 |
* page zero is required for BIOS.. never make it available |
|
698 |
*/ |
|
699 |
if (*addr == 0) { |
|
700 |
*addr += MMU_PAGESIZE; |
|
701 |
*size -= MMU_PAGESIZE; |
|
702 |
} |
|
703 |
||
704 |
/* |
|
705 |
* First we trim from the front of the range. Since hat_boot_probe() |
|
706 |
* walks ranges in virtual order, but addr/size are physical, we need |
|
707 |
* to the list until no changes are seen. This deals with the case |
|
708 |
* where page "p" is mapped at v, page "p + PAGESIZE" is mapped at w |
|
709 |
* but w < v. |
|
710 |
*/ |
|
711 |
do { |
|
712 |
change = 0; |
|
713 |
for (va = KERNEL_TEXT; |
|
714 |
*size > 0 && hat_boot_probe(&va, &len, &pfn, &prot) != 0; |
|
715 |
va = next_va) { |
|
716 |
||
717 |
next_va = va + len; |
|
718 |
pfn_addr = ptob((uint64_t)pfn); |
|
719 |
pfn_eaddr = pfn_addr + len; |
|
720 |
||
721 |
if (pfn_addr <= *addr && pfn_eaddr > *addr) { |
|
722 |
change = 1; |
|
723 |
while (*size > 0 && len > 0) { |
|
724 |
*addr += MMU_PAGESIZE; |
|
725 |
*size -= MMU_PAGESIZE; |
|
726 |
len -= MMU_PAGESIZE; |
|
727 |
} |
|
728 |
} |
|
729 |
} |
|
730 |
if (change && prom_debug) |
|
731 |
prom_printf("\t\ttrim: a=%" PRIx64 ", s=%" PRIx64 "\n", |
|
732 |
*addr, *size); |
|
733 |
} while (change); |
|
734 |
||
735 |
/* |
|
736 |
* Trim pages from the end of the range. |
|
737 |
*/ |
|
738 |
for (va = KERNEL_TEXT; |
|
739 |
*size > 0 && hat_boot_probe(&va, &len, &pfn, &prot) != 0; |
|
740 |
va = next_va) { |
|
741 |
||
742 |
next_va = va + len; |
|
743 |
pfn_addr = ptob((uint64_t)pfn); |
|
744 |
||
745 |
if (pfn_addr >= *addr && pfn_addr < *addr + *size) |
|
746 |
*size = pfn_addr - *addr; |
|
747 |
} |
|
748 |
||
749 |
if (prom_debug) |
|
750 |
prom_printf("\tFilter out: a=%" PRIx64 ", s=%" PRIx64 "\n", |
|
751 |
*addr, *size); |
|
752 |
} |
|
753 |
||
754 |
static void |
|
755 |
kpm_init() |
|
756 |
{ |
|
757 |
struct segkpm_crargs b; |
|
758 |
uintptr_t start, end; |
|
759 |
struct memlist *pmem; |
|
760 |
||
761 |
/* |
|
762 |
* These variables were all designed for sfmmu in which segkpm is |
|
763 |
* mapped using a single pagesize - either 8KB or 4MB. On x86, we |
|
764 |
* might use 2+ page sizes on a single machine, so none of these |
|
765 |
* variables have a single correct value. They are set up as if we |
|
766 |
* always use a 4KB pagesize, which should do no harm. In the long |
|
767 |
* run, we should get rid of KPM's assumption that only a single |
|
768 |
* pagesize is used. |
|
769 |
*/ |
|
770 |
kpm_pgshft = MMU_PAGESHIFT; |
|
771 |
kpm_pgsz = MMU_PAGESIZE; |
|
772 |
kpm_pgoff = MMU_PAGEOFFSET; |
|
773 |
kpmp2pshft = 0; |
|
774 |
kpmpnpgs = 1; |
|
775 |
ASSERT(((uintptr_t)kpm_vbase & (kpm_pgsz - 1)) == 0); |
|
776 |
||
777 |
PRM_POINT("about to create segkpm"); |
|
778 |
rw_enter(&kas.a_lock, RW_WRITER); |
|
779 |
||
780 |
if (seg_attach(&kas, kpm_vbase, kpm_size, segkpm) < 0) |
|
781 |
panic("cannot attach segkpm"); |
|
782 |
||
783 |
b.prot = PROT_READ | PROT_WRITE; |
|
784 |
b.nvcolors = 1; |
|
785 |
||
786 |
if (segkpm_create(segkpm, (caddr_t)&b) != 0) |
|
787 |
panic("segkpm_create segkpm"); |
|
788 |
||
789 |
rw_exit(&kas.a_lock); |
|
790 |
||
791 |
/* |
|
792 |
* Map each of the memsegs into the kpm segment, coalesing adjacent |
|
793 |
* memsegs to allow mapping with the largest possible pages. |
|
794 |
*/ |
|
795 |
pmem = phys_install; |
|
796 |
start = pmem->address; |
|
797 |
end = start + pmem->size; |
|
798 |
for (;;) { |
|
799 |
if (pmem == NULL || pmem->address > end) { |
|
800 |
hat_devload(kas.a_hat, kpm_vbase + start, |
|
801 |
end - start, mmu_btop(start), |
|
802 |
PROT_READ | PROT_WRITE, |
|
803 |
HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); |
|
804 |
if (pmem == NULL) |
|
805 |
break; |
|
806 |
start = pmem->address; |
|
807 |
} |
|
808 |
end = pmem->address + pmem->size; |
|
809 |
pmem = pmem->next; |
|
810 |
} |
|
811 |
} |
|
812 |
||
813 |
/* |
|
814 |
* The purpose of startup memlist is to get the system to the |
|
815 |
* point where it can use kmem_alloc()'s that operate correctly |
|
816 |
* relying on BOP_ALLOC(). This includes allocating page_ts, |
|
817 |
* page hash table, vmem initialized, etc. |
|
818 |
* |
|
819 |
* Boot's versions of physinstalled and physavail are insufficient for |
|
820 |
* the kernel's purposes. Specifically we don't know which pages that |
|
821 |
* are not in physavail can be reclaimed after boot is gone. |
|
822 |
* |
|
823 |
* This code solves the problem by dividing the address space |
|
824 |
* into 3 regions as it takes over the MMU from the booter. |
|
825 |
* |
|
826 |
* 1) Any (non-nucleus) pages that are mapped at addresses above KERNEL_TEXT |
|
827 |
* can not be used by the kernel. |
|
828 |
* |
|
829 |
* 2) Any free page that happens to be mapped below kernelbase |
|
830 |
* is protected until the boot loader is released, but will then be reclaimed. |
|
831 |
* |
|
832 |
* 3) Boot shouldn't use any address in the remaining area between kernelbase |
|
833 |
* and KERNEL_TEXT. |
|
834 |
* |
|
835 |
* In the case of multiple mappings to the same page, region 1 has precedence |
|
836 |
* over region 2. |
|
837 |
*/ |
|
838 |
static void |
|
839 |
startup_memlist(void) |
|
840 |
{ |
|
841 |
size_t memlist_sz; |
|
842 |
size_t memseg_sz; |
|
843 |
size_t pagehash_sz; |
|
844 |
size_t pp_sz; |
|
845 |
uintptr_t va; |
|
846 |
size_t len; |
|
847 |
uint_t prot; |
|
848 |
pfn_t pfn; |
|
849 |
int memblocks; |
|
850 |
caddr_t pagecolor_mem; |
|
851 |
size_t pagecolor_memsz; |
|
852 |
caddr_t page_ctrs_mem; |
|
853 |
size_t page_ctrs_size; |
|
854 |
struct memlist *current; |
|
1479
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
855 |
pgcnt_t orig_npages = 0; |
0 | 856 |
extern void startup_build_mem_nodes(struct memlist *); |
857 |
||
858 |
/* XX64 fix these - they should be in include files */ |
|
859 |
extern ulong_t cr4_value; |
|
860 |
extern size_t page_coloring_init(uint_t, int, int); |
|
861 |
extern void page_coloring_setup(caddr_t); |
|
862 |
||
863 |
PRM_POINT("startup_memlist() starting..."); |
|
864 |
||
865 |
/* |
|
866 |
* Take the most current snapshot we can by calling mem-update. |
|
867 |
* For this to work properly, we first have to ask boot for its |
|
868 |
* end address. |
|
869 |
*/ |
|
870 |
if (BOP_GETPROPLEN(bootops, "memory-update") == 0) |
|
871 |
(void) BOP_GETPROP(bootops, "memory-update", NULL); |
|
872 |
||
873 |
/* |
|
874 |
* find if the kernel is mapped on a large page |
|
875 |
*/ |
|
876 |
va = KERNEL_TEXT; |
|
877 |
if (hat_boot_probe(&va, &len, &pfn, &prot) == 0) |
|
878 |
panic("Couldn't find kernel text boot mapping"); |
|
879 |
||
880 |
/* |
|
881 |
* Use leftover large page nucleus text/data space for loadable modules. |
|
882 |
* Use at most MODTEXT/MODDATA. |
|
883 |
*/ |
|
884 |
if (len > MMU_PAGESIZE) { |
|
885 |
||
886 |
moddata = (caddr_t)ROUND_UP_PAGE(e_data); |
|
887 |
e_moddata = (caddr_t)ROUND_UP_4MEG(e_data); |
|
888 |
if (e_moddata - moddata > MODDATA) |
|
889 |
e_moddata = moddata + MODDATA; |
|
890 |
||
891 |
modtext = (caddr_t)ROUND_UP_PAGE(e_text); |
|
892 |
e_modtext = (caddr_t)ROUND_UP_4MEG(e_text); |
|
893 |
if (e_modtext - modtext > MODTEXT) |
|
894 |
e_modtext = modtext + MODTEXT; |
|
895 |
||
896 |
||
897 |
} else { |
|
898 |
||
899 |
PRM_POINT("Kernel NOT loaded on Large Page!"); |
|
900 |
e_moddata = moddata = (caddr_t)ROUND_UP_PAGE(e_data); |
|
901 |
e_modtext = modtext = (caddr_t)ROUND_UP_PAGE(e_text); |
|
902 |
||
903 |
} |
|
904 |
econtig = e_moddata; |
|
905 |
||
906 |
PRM_DEBUG(modtext); |
|
907 |
PRM_DEBUG(e_modtext); |
|
908 |
PRM_DEBUG(moddata); |
|
909 |
PRM_DEBUG(e_moddata); |
|
910 |
PRM_DEBUG(econtig); |
|
911 |
||
912 |
/* |
|
913 |
* For MP machines cr4_value must be set or the non-boot |
|
914 |
* CPUs will not be able to start. |
|
915 |
*/ |
|
916 |
if (x86_feature & X86_LARGEPAGE) |
|
917 |
cr4_value = getcr4(); |
|
918 |
PRM_DEBUG(cr4_value); |
|
919 |
||
920 |
/* |
|
921 |
* Examine the boot loaders physical memory map to find out: |
|
922 |
* - total memory in system - physinstalled |
|
923 |
* - the max physical address - physmax |
|
924 |
* - the number of segments the intsalled memory comes in |
|
925 |
*/ |
|
926 |
if (prom_debug) |
|
927 |
print_boot_memlist("boot physinstalled", |
|
928 |
bootops->boot_mem->physinstalled); |
|
929 |
installed_top_size(bootops->boot_mem->physinstalled, &physmax, |
|
930 |
&physinstalled, &memblocks); |
|
931 |
PRM_DEBUG(physmax); |
|
932 |
PRM_DEBUG(physinstalled); |
|
933 |
PRM_DEBUG(memblocks); |
|
934 |
||
935 |
if (prom_debug) |
|
936 |
print_boot_memlist("boot physavail", |
|
937 |
bootops->boot_mem->physavail); |
|
938 |
||
939 |
/* |
|
940 |
* Initialize hat's mmu parameters. |
|
941 |
* Check for enforce-prot-exec in boot environment. It's used to |
|
942 |
* enable/disable support for the page table entry NX bit. |
|
943 |
* The default is to enforce PROT_EXEC on processors that support NX. |
|
944 |
* Boot seems to round up the "len", but 8 seems to be big enough. |
|
945 |
*/ |
|
946 |
mmu_init(); |
|
947 |
||
948 |
#ifdef __i386 |
|
949 |
/* |
|
950 |
* physmax is lowered if there is more memory than can be |
|
951 |
* physically addressed in 32 bit (PAE/non-PAE) modes. |
|
952 |
*/ |
|
953 |
if (mmu.pae_hat) { |
|
954 |
if (PFN_ABOVE64G(physmax)) { |
|
955 |
physinstalled -= (physmax - (PFN_64G - 1)); |
|
956 |
physmax = PFN_64G - 1; |
|
957 |
} |
|
958 |
} else { |
|
959 |
if (PFN_ABOVE4G(physmax)) { |
|
960 |
physinstalled -= (physmax - (PFN_4G - 1)); |
|
961 |
physmax = PFN_4G - 1; |
|
962 |
} |
|
963 |
} |
|
964 |
#endif |
|
965 |
||
966 |
startup_build_mem_nodes(bootops->boot_mem->physinstalled); |
|
967 |
||
968 |
if (BOP_GETPROPLEN(bootops, "enforce-prot-exec") >= 0) { |
|
969 |
int len = BOP_GETPROPLEN(bootops, "enforce-prot-exec"); |
|
970 |
char value[8]; |
|
971 |
||
972 |
if (len < 8) |
|
973 |
(void) BOP_GETPROP(bootops, "enforce-prot-exec", value); |
|
974 |
else |
|
975 |
(void) strcpy(value, ""); |
|
976 |
if (strcmp(value, "off") == 0) |
|
977 |
mmu.pt_nx = 0; |
|
978 |
} |
|
979 |
PRM_DEBUG(mmu.pt_nx); |
|
980 |
||
981 |
/* |
|
982 |
* We will need page_t's for every page in the system, except for |
|
983 |
* memory mapped at or above above the start of the kernel text segment. |
|
984 |
* |
|
985 |
* pages above e_modtext are attributed to kernel debugger (obp_pages) |
|
986 |
*/ |
|
987 |
npages = physinstalled - 1; /* avail_filter() skips page 0, so "- 1" */ |
|
988 |
obp_pages = 0; |
|
989 |
va = KERNEL_TEXT; |
|
990 |
while (hat_boot_probe(&va, &len, &pfn, &prot) != 0) { |
|
991 |
npages -= len >> MMU_PAGESHIFT; |
|
992 |
if (va >= (uintptr_t)e_moddata) |
|
993 |
obp_pages += len >> MMU_PAGESHIFT; |
|
994 |
va += len; |
|
995 |
} |
|
996 |
PRM_DEBUG(npages); |
|
997 |
PRM_DEBUG(obp_pages); |
|
998 |
||
999 |
/* |
|
1000 |
* If physmem is patched to be non-zero, use it instead of |
|
1001 |
* the computed value unless it is larger than the real |
|
1002 |
* amount of memory on hand. |
|
1003 |
*/ |
|
1417
fa316336e215
6377034 setting physmem in /etc/system does not have desired effect on x86
kchow
parents:
1414
diff
changeset
|
1004 |
if (physmem == 0 || physmem > npages) { |
0 | 1005 |
physmem = npages; |
1417
fa316336e215
6377034 setting physmem in /etc/system does not have desired effect on x86
kchow
parents:
1414
diff
changeset
|
1006 |
} else if (physmem < npages) { |
1479
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1007 |
orig_npages = npages; |
0 | 1008 |
npages = physmem; |
1417
fa316336e215
6377034 setting physmem in /etc/system does not have desired effect on x86
kchow
parents:
1414
diff
changeset
|
1009 |
} |
0 | 1010 |
PRM_DEBUG(physmem); |
1011 |
||
1012 |
/* |
|
1013 |
* We now compute the sizes of all the initial allocations for |
|
1014 |
* structures the kernel needs in order do kmem_alloc(). These |
|
1015 |
* include: |
|
1016 |
* memsegs |
|
1017 |
* memlists |
|
1018 |
* page hash table |
|
1019 |
* page_t's |
|
1020 |
* page coloring data structs |
|
1021 |
*/ |
|
1022 |
memseg_sz = sizeof (struct memseg) * (memblocks + POSS_NEW_FRAGMENTS); |
|
1023 |
ADD_TO_ALLOCATIONS(memseg_base, memseg_sz); |
|
1024 |
PRM_DEBUG(memseg_sz); |
|
1025 |
||
1026 |
/* |
|
1027 |
* Reserve space for phys_avail/phys_install memlists. |
|
1028 |
* There's no real good way to know exactly how much room we'll need, |
|
1029 |
* but this should be a good upper bound. |
|
1030 |
*/ |
|
1031 |
memlist_sz = ROUND_UP_PAGE(2 * sizeof (struct memlist) * |
|
1032 |
(memblocks + POSS_NEW_FRAGMENTS)); |
|
1033 |
ADD_TO_ALLOCATIONS(memlist, memlist_sz); |
|
1034 |
PRM_DEBUG(memlist_sz); |
|
1035 |
||
1036 |
/* |
|
1037 |
* The page structure hash table size is a power of 2 |
|
1038 |
* such that the average hash chain length is PAGE_HASHAVELEN. |
|
1039 |
*/ |
|
1040 |
page_hashsz = npages / PAGE_HASHAVELEN; |
|
1041 |
page_hashsz = 1 << highbit(page_hashsz); |
|
1042 |
pagehash_sz = sizeof (struct page *) * page_hashsz; |
|
1043 |
ADD_TO_ALLOCATIONS(page_hash, pagehash_sz); |
|
1044 |
PRM_DEBUG(pagehash_sz); |
|
1045 |
||
1046 |
/* |
|
1047 |
* Set aside room for the page structures themselves. Note: on |
|
1048 |
* 64-bit systems we don't allocate page_t's for every page here. |
|
1049 |
* We just allocate enough to map the lowest 4GB of physical |
|
1050 |
* memory, minus those pages that are used for the "nucleus" kernel |
|
1051 |
* text and data. The remaining pages are allocated once we can |
|
1052 |
* map around boot. |
|
1053 |
* |
|
1054 |
* boot_npages is used to allocate an area big enough for our |
|
1055 |
* initial page_t's. kphym_init may use less than that. |
|
1056 |
*/ |
|
1057 |
boot_npages = npages; |
|
1058 |
#if defined(__amd64) |
|
1059 |
if (npages > mmu_btop(FOURGB - (econtig - s_text))) |
|
1060 |
boot_npages = mmu_btop(FOURGB - (econtig - s_text)); |
|
1061 |
#endif |
|
1062 |
PRM_DEBUG(boot_npages); |
|
1063 |
pp_sz = sizeof (struct page) * boot_npages; |
|
1064 |
ADD_TO_ALLOCATIONS(pp_base, pp_sz); |
|
1065 |
PRM_DEBUG(pp_sz); |
|
1066 |
||
1067 |
/* |
|
1068 |
* determine l2 cache info and memory size for page coloring |
|
1069 |
*/ |
|
1070 |
(void) getl2cacheinfo(CPU, |
|
1071 |
&l2cache_sz, &l2cache_linesz, &l2cache_assoc); |
|
1072 |
pagecolor_memsz = |
|
1073 |
page_coloring_init(l2cache_sz, l2cache_linesz, l2cache_assoc); |
|
1074 |
ADD_TO_ALLOCATIONS(pagecolor_mem, pagecolor_memsz); |
|
1075 |
PRM_DEBUG(pagecolor_memsz); |
|
1076 |
||
1077 |
page_ctrs_size = page_ctrs_sz(); |
|
1078 |
ADD_TO_ALLOCATIONS(page_ctrs_mem, page_ctrs_size); |
|
1079 |
PRM_DEBUG(page_ctrs_size); |
|
1080 |
||
1081 |
/* |
|
1082 |
* valloc_base will be below kernel text |
|
1083 |
* The extra pages are for the HAT and kmdb to map page tables. |
|
1084 |
*/ |
|
1085 |
valloc_sz = ROUND_UP_LPAGE(valloc_sz); |
|
1086 |
valloc_base = KERNEL_TEXT - valloc_sz; |
|
1087 |
PRM_DEBUG(valloc_base); |
|
1088 |
ptable_va = valloc_base - ptable_sz; |
|
1089 |
||
1090 |
#if defined(__amd64) |
|
1091 |
if (eprom_kernelbase && eprom_kernelbase != KERNELBASE) |
|
1092 |
cmn_err(CE_NOTE, "!kernelbase cannot be changed on 64-bit " |
|
1093 |
"systems."); |
|
1094 |
kernelbase = (uintptr_t)KERNELBASE; |
|
1095 |
core_base = (uintptr_t)COREHEAP_BASE; |
|
1096 |
core_size = ptable_va - core_base; |
|
1097 |
#else /* __i386 */ |
|
1098 |
/* |
|
1099 |
* We configure kernelbase based on: |
|
1100 |
* |
|
1101 |
* 1. user specified kernelbase via eeprom command. Value cannot exceed |
|
1102 |
* KERNELBASE_MAX. we large page align eprom_kernelbase |
|
1103 |
* |
|
1104 |
* 2. Default to KERNELBASE and adjust to 2X less the size for page_t. |
|
1105 |
* On large memory systems we must lower kernelbase to allow |
|
1106 |
* enough room for page_t's for all of memory. |
|
1107 |
* |
|
1108 |
* The value set here, might be changed a little later. |
|
1109 |
*/ |
|
1110 |
if (eprom_kernelbase) { |
|
1111 |
kernelbase = eprom_kernelbase & mmu.level_mask[1]; |
|
1112 |
if (kernelbase > KERNELBASE_MAX) |
|
1113 |
kernelbase = KERNELBASE_MAX; |
|
1114 |
} else { |
|
1115 |
kernelbase = (uintptr_t)KERNELBASE; |
|
1116 |
kernelbase -= ROUND_UP_4MEG(2 * valloc_sz); |
|
1117 |
} |
|
1118 |
ASSERT((kernelbase & mmu.level_offset[1]) == 0); |
|
1119 |
core_base = ptable_va; |
|
1120 |
core_size = 0; |
|
1121 |
#endif |
|
1122 |
||
1123 |
PRM_DEBUG(kernelbase); |
|
1124 |
PRM_DEBUG(core_base); |
|
1125 |
PRM_DEBUG(core_size); |
|
1126 |
||
1127 |
/* |
|
1128 |
* At this point, we can only use a portion of the kernelheap that |
|
1129 |
* will be available after we boot. Both 32-bit and 64-bit systems |
|
1130 |
* have this limitation, although the reasons are completely |
|
1131 |
* different. |
|
1132 |
* |
|
1133 |
* On 64-bit systems, the booter only supports allocations in the |
|
1134 |
* upper 4GB of memory, so we have to work with a reduced kernel |
|
1135 |
* heap until we take over all allocations. The booter also sits |
|
1136 |
* in the lower portion of that 4GB range, so we have to raise the |
|
1137 |
* bottom of the heap even further. |
|
1138 |
* |
|
1139 |
* On 32-bit systems we have to leave room to place segmap below |
|
1140 |
* the heap. We don't yet know how large segmap will be, so we |
|
1141 |
* have to be very conservative. |
|
1142 |
*/ |
|
1143 |
#if defined(__amd64) |
|
1144 |
/* |
|
1145 |
* XX64: For now, we let boot have the lower 2GB of the top 4GB |
|
1146 |
* address range. In the long run, that should be fixed. It's |
|
1147 |
* insane for a booter to need 2 2GB address ranges. |
|
1148 |
*/ |
|
1149 |
boot_kernelheap = (caddr_t)(BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE); |
|
1150 |
segmap_reserved = 0; |
|
1151 |
||
1152 |
#else /* __i386 */ |
|
1153 |
segkp_fromheap = 1; |
|
1154 |
segmap_reserved = ROUND_UP_LPAGE(MAX(segmapsize, SEGMAPMAX)); |
|
1155 |
boot_kernelheap = (caddr_t)(ROUND_UP_LPAGE(kernelbase) + |
|
1156 |
segmap_reserved); |
|
1157 |
#endif |
|
1158 |
PRM_DEBUG(boot_kernelheap); |
|
1159 |
kernelheap = boot_kernelheap; |
|
1160 |
ekernelheap = (char *)core_base; |
|
1161 |
||
1162 |
/* |
|
1163 |
* If segmap is too large we can push the bottom of the kernel heap |
|
1164 |
* higher than the base. Or worse, it could exceed the top of the |
|
1165 |
* VA space entirely, causing it to wrap around. |
|
1166 |
*/ |
|
1167 |
if (kernelheap >= ekernelheap || (uintptr_t)kernelheap < kernelbase) |
|
1168 |
panic("too little memory available for kernelheap," |
|
1169 |
" use a different kernelbase"); |
|
1170 |
||
1171 |
/* |
|
1172 |
* Now that we know the real value of kernelbase, |
|
1173 |
* update variables that were initialized with a value of |
|
1174 |
* KERNELBASE (in common/conf/param.c). |
|
1175 |
* |
|
1176 |
* XXX The problem with this sort of hackery is that the |
|
1177 |
* compiler just may feel like putting the const declarations |
|
1178 |
* (in param.c) into the .text section. Perhaps they should |
|
1179 |
* just be declared as variables there? |
|
1180 |
*/ |
|
1181 |
||
1182 |
#if defined(__amd64) |
|
1183 |
ASSERT(_kernelbase == KERNELBASE); |
|
1184 |
ASSERT(_userlimit == USERLIMIT); |
|
1185 |
/* |
|
1186 |
* As one final sanity check, verify that the "red zone" between |
|
1187 |
* kernel and userspace is exactly the size we expected. |
|
1188 |
*/ |
|
1189 |
ASSERT(_kernelbase == (_userlimit + (2 * 1024 * 1024))); |
|
1190 |
#else |
|
1191 |
*(uintptr_t *)&_kernelbase = kernelbase; |
|
1192 |
*(uintptr_t *)&_userlimit = kernelbase; |
|
1193 |
*(uintptr_t *)&_userlimit32 = _userlimit; |
|
1194 |
#endif |
|
1195 |
PRM_DEBUG(_kernelbase); |
|
1196 |
PRM_DEBUG(_userlimit); |
|
1197 |
PRM_DEBUG(_userlimit32); |
|
1198 |
||
1199 |
/* |
|
1200 |
* do all the initial allocations |
|
1201 |
*/ |
|
1202 |
perform_allocations(); |
|
1203 |
||
1204 |
/* |
|
1205 |
* Initialize the kernel heap. Note 3rd argument must be > 1st. |
|
1206 |
*/ |
|
1207 |
kernelheap_init(kernelheap, ekernelheap, kernelheap + MMU_PAGESIZE, |
|
1208 |
(void *)core_base, (void *)ptable_va); |
|
1209 |
||
1210 |
/* |
|
1211 |
* Build phys_install and phys_avail in kernel memspace. |
|
1212 |
* - phys_install should be all memory in the system. |
|
1213 |
* - phys_avail is phys_install minus any memory mapped before this |
|
1214 |
* point above KERNEL_TEXT. |
|
1215 |
*/ |
|
1216 |
current = phys_install = memlist; |
|
1217 |
copy_memlist_filter(bootops->boot_mem->physinstalled, ¤t, NULL); |
|
1218 |
if ((caddr_t)current > (caddr_t)memlist + memlist_sz) |
|
1219 |
panic("physinstalled was too big!"); |
|
1220 |
if (prom_debug) |
|
1221 |
print_kernel_memlist("phys_install", phys_install); |
|
1222 |
||
1223 |
phys_avail = current; |
|
1224 |
PRM_POINT("Building phys_avail:\n"); |
|
1225 |
copy_memlist_filter(bootops->boot_mem->physinstalled, ¤t, |
|
1226 |
avail_filter); |
|
1227 |
if ((caddr_t)current > (caddr_t)memlist + memlist_sz) |
|
1228 |
panic("physavail was too big!"); |
|
1229 |
if (prom_debug) |
|
1230 |
print_kernel_memlist("phys_avail", phys_avail); |
|
1231 |
||
1232 |
/* |
|
1233 |
* setup page coloring |
|
1234 |
*/ |
|
1235 |
page_coloring_setup(pagecolor_mem); |
|
1236 |
page_lock_init(); /* currently a no-op */ |
|
1237 |
||
1238 |
/* |
|
1239 |
* free page list counters |
|
1240 |
*/ |
|
1241 |
(void) page_ctrs_alloc(page_ctrs_mem); |
|
1242 |
||
1243 |
/* |
|
1244 |
* Initialize the page structures from the memory lists. |
|
1245 |
*/ |
|
1246 |
availrmem_initial = availrmem = freemem = 0; |
|
1247 |
PRM_POINT("Calling kphysm_init()..."); |
|
1248 |
boot_npages = kphysm_init(pp_base, memseg_base, 0, boot_npages); |
|
1249 |
PRM_POINT("kphysm_init() done"); |
|
1250 |
PRM_DEBUG(boot_npages); |
|
1251 |
||
1252 |
/* |
|
1253 |
* Now that page_t's have been initialized, remove all the |
|
1254 |
* initial allocation pages from the kernel free page lists. |
|
1255 |
*/ |
|
1256 |
boot_mapin((caddr_t)valloc_base, valloc_sz); |
|
1257 |
||
1258 |
/* |
|
1259 |
* Initialize kernel memory allocator. |
|
1260 |
*/ |
|
1261 |
kmem_init(); |
|
1262 |
||
1263 |
/* |
|
1264 |
* print this out early so that we know what's going on |
|
1265 |
*/ |
|
1266 |
cmn_err(CE_CONT, "?features: %b\n", x86_feature, FMT_X86_FEATURE); |
|
1267 |
||
1268 |
/* |
|
1269 |
* Initialize bp_mapin(). |
|
1270 |
*/ |
|
1271 |
bp_init(MMU_PAGESIZE, HAT_STORECACHING_OK); |
|
1272 |
||
1479
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1273 |
/* |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1274 |
* orig_npages is non-zero if physmem has been configured for less |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1275 |
* than the available memory. |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1276 |
*/ |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1277 |
if (orig_npages) { |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1278 |
#ifdef __i386 |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1279 |
/* |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1280 |
* use npages for physmem in case it has been temporarily |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1281 |
* modified via /etc/system in kmem_init/mod_read_system_file. |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1282 |
*/ |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1283 |
if (npages == PHYSMEM32) { |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1284 |
cmn_err(CE_WARN, "!Due to 32 bit virtual" |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1285 |
" address space limitations, limiting" |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1286 |
" physmem to 0x%lx of 0x%lx available pages", |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1287 |
npages, orig_npages); |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1288 |
} else { |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1289 |
cmn_err(CE_WARN, "!limiting physmem to 0x%lx of" |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1290 |
" 0x%lx available pages", npages, orig_npages); |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1291 |
} |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1292 |
#else |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1293 |
cmn_err(CE_WARN, "!limiting physmem to 0x%lx of" |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1294 |
" 0x%lx available pages", npages, orig_npages); |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1295 |
#endif |
33ca002df1d5
6373609 Solaris install fails on 16-way 64GB system with s10u1_19a.
kchow
parents:
1455
diff
changeset
|
1296 |
} |
0 | 1297 |
#if defined(__i386) |
1298 |
if (eprom_kernelbase && (eprom_kernelbase != kernelbase)) |
|
1299 |
cmn_err(CE_WARN, "kernelbase value, User specified 0x%lx, " |
|
1300 |
"System using 0x%lx", |
|
1301 |
(uintptr_t)eprom_kernelbase, (uintptr_t)kernelbase); |
|
1302 |
#endif |
|
1303 |
||
1304 |
#ifdef KERNELBASE_ABI_MIN |
|
1305 |
if (kernelbase < (uintptr_t)KERNELBASE_ABI_MIN) { |
|
1306 |
cmn_err(CE_NOTE, "!kernelbase set to 0x%lx, system is not " |
|
1307 |
"i386 ABI compliant.", (uintptr_t)kernelbase); |
|
1308 |
} |
|
1309 |
#endif |
|
1310 |
||
1311 |
PRM_POINT("startup_memlist() done"); |
|
1312 |
} |
|
1313 |
||
1314 |
static void |
|
1315 |
startup_modules(void) |
|
1316 |
{ |
|
1317 |
unsigned int i; |
|
1318 |
extern void prom_setup(void); |
|
1319 |
||
1320 |
PRM_POINT("startup_modules() starting..."); |
|
1321 |
/* |
|
1322 |
* Initialize ten-micro second timer so that drivers will |
|
1323 |
* not get short changed in their init phase. This was |
|
1324 |
* not getting called until clkinit which, on fast cpu's |
|
1325 |
* caused the drv_usecwait to be way too short. |
|
1326 |
*/ |
|
1327 |
microfind(); |
|
1328 |
||
1329 |
/* |
|
1330 |
* Read the GMT lag from /etc/rtc_config. |
|
1331 |
*/ |
|
1332 |
gmt_lag = process_rtc_config_file(); |
|
1333 |
||
1334 |
/* |
|
1335 |
* Calculate default settings of system parameters based upon |
|
1336 |
* maxusers, yet allow to be overridden via the /etc/system file. |
|
1337 |
*/ |
|
1338 |
param_calc(0); |
|
1339 |
||
1340 |
mod_setup(); |
|
1341 |
||
1342 |
/* |
|
1343 |
* Initialize system parameters. |
|
1344 |
*/ |
|
1345 |
param_init(); |
|
1346 |
||
1347 |
/* |
|
1348 |
* maxmem is the amount of physical memory we're playing with. |
|
1349 |
*/ |
|
1350 |
maxmem = physmem; |
|
1351 |
||
1352 |
/* |
|
1353 |
* Initialize the hat layer. |
|
1354 |
*/ |
|
1355 |
hat_init(); |
|
1356 |
||
1357 |
/* |
|
1358 |
* Initialize segment management stuff. |
|
1359 |
*/ |
|
1360 |
seg_init(); |
|
1361 |
||
1362 |
if (modload("fs", "specfs") == -1) |
|
1363 |
halt("Can't load specfs"); |
|
1364 |
||
1365 |
if (modload("fs", "devfs") == -1) |
|
1366 |
halt("Can't load devfs"); |
|
1367 |
||
1368 |
dispinit(); |
|
1369 |
||
1370 |
/* |
|
1371 |
* This is needed here to initialize hw_serial[] for cluster booting. |
|
1372 |
*/ |
|
1373 |
if ((i = modload("misc", "sysinit")) != (unsigned int)-1) |
|
1374 |
(void) modunload(i); |
|
1375 |
else |
|
1376 |
cmn_err(CE_CONT, "sysinit load failed"); |
|
1377 |
||
1378 |
/* Read cluster configuration data. */ |
|
1379 |
clconf_init(); |
|
1380 |
||
1381 |
/* |
|
1382 |
* Create a kernel device tree. First, create rootnex and |
|
1383 |
* then invoke bus specific code to probe devices. |
|
1384 |
*/ |
|
1385 |
setup_ddi(); |
|
1414
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1386 |
|
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1387 |
/* |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1388 |
* Set up the CPU module subsystem. Modifies the device tree, so it |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1389 |
* must be done after setup_ddi(). |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1390 |
*/ |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1391 |
cmi_init(); |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1392 |
|
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1393 |
/* |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1394 |
* Initialize the MCA handlers |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1395 |
*/ |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1396 |
if (x86_feature & X86_MCA) |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1397 |
cmi_mca_init(); |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1398 |
|
0 | 1399 |
/* |
1400 |
* Fake a prom tree such that /dev/openprom continues to work |
|
1401 |
*/ |
|
1402 |
prom_setup(); |
|
1403 |
||
1404 |
/* |
|
1405 |
* Load all platform specific modules |
|
1406 |
*/ |
|
1407 |
psm_modload(); |
|
1408 |
||
1409 |
PRM_POINT("startup_modules() done"); |
|
1410 |
} |
|
1411 |
||
1412 |
static void |
|
1413 |
startup_bop_gone(void) |
|
1414 |
{ |
|
1415 |
PRM_POINT("startup_bop_gone() starting..."); |
|
1416 |
||
1417 |
/* |
|
1418 |
* Do final allocations of HAT data structures that need to |
|
1419 |
* be allocated before quiescing the boot loader. |
|
1420 |
*/ |
|
1421 |
PRM_POINT("Calling hat_kern_alloc()..."); |
|
1422 |
hat_kern_alloc(); |
|
1423 |
PRM_POINT("hat_kern_alloc() done"); |
|
1424 |
||
1425 |
/* |
|
1426 |
* Setup MTRR (Memory type range registers) |
|
1427 |
*/ |
|
1428 |
setup_mtrr(); |
|
1429 |
PRM_POINT("startup_bop_gone() done"); |
|
1430 |
} |
|
1431 |
||
1432 |
/* |
|
1433 |
* Walk through the pagetables looking for pages mapped in by boot. If the |
|
1434 |
* setaside flag is set the pages are expected to be returned to the |
|
1435 |
* kernel later in boot, so we add them to the bootpages list. |
|
1436 |
*/ |
|
1437 |
static void |
|
1438 |
protect_boot_range(uintptr_t low, uintptr_t high, int setaside) |
|
1439 |
{ |
|
1440 |
uintptr_t va = low; |
|
1441 |
size_t len; |
|
1442 |
uint_t prot; |
|
1443 |
pfn_t pfn; |
|
1444 |
page_t *pp; |
|
1445 |
pgcnt_t boot_protect_cnt = 0; |
|
1446 |
||
1447 |
while (hat_boot_probe(&va, &len, &pfn, &prot) != 0 && va < high) { |
|
1448 |
if (va + len >= high) |
|
1449 |
panic("0x%lx byte mapping at 0x%p exceeds boot's " |
|
1450 |
"legal range.", len, (void *)va); |
|
1451 |
||
1452 |
while (len > 0) { |
|
1453 |
pp = page_numtopp_alloc(pfn); |
|
1454 |
if (pp != NULL) { |
|
1455 |
if (setaside == 0) |
|
1456 |
panic("Unexpected mapping by boot. " |
|
1457 |
"addr=%p pfn=%lx\n", |
|
1458 |
(void *)va, pfn); |
|
1459 |
||
1460 |
pp->p_next = bootpages; |
|
1461 |
bootpages = pp; |
|
1462 |
++boot_protect_cnt; |
|
1463 |
} |
|
1464 |
||
1465 |
++pfn; |
|
1466 |
len -= MMU_PAGESIZE; |
|
1467 |
va += MMU_PAGESIZE; |
|
1468 |
} |
|
1469 |
} |
|
1470 |
PRM_DEBUG(boot_protect_cnt); |
|
1471 |
} |
|
1472 |
||
1473 |
static void |
|
1474 |
startup_vm(void) |
|
1475 |
{ |
|
1476 |
struct segmap_crargs a; |
|
1477 |
extern void hat_kern_setup(void); |
|
1478 |
pgcnt_t pages_left; |
|
1479 |
||
423
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1480 |
extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1481 |
extern pgcnt_t auto_lpg_min_physmem; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1482 |
|
0 | 1483 |
PRM_POINT("startup_vm() starting..."); |
1484 |
||
1485 |
/* |
|
1486 |
* The next two loops are done in distinct steps in order |
|
1487 |
* to be sure that any page that is doubly mapped (both above |
|
1488 |
* KERNEL_TEXT and below kernelbase) is dealt with correctly. |
|
1489 |
* Note this may never happen, but it might someday. |
|
1490 |
*/ |
|
1491 |
||
1492 |
bootpages = NULL; |
|
1493 |
PRM_POINT("Protecting boot pages"); |
|
1494 |
/* |
|
1495 |
* Protect any pages mapped above KERNEL_TEXT that somehow have |
|
1496 |
* page_t's. This can only happen if something weird allocated |
|
1497 |
* in this range (like kadb/kmdb). |
|
1498 |
*/ |
|
1499 |
protect_boot_range(KERNEL_TEXT, (uintptr_t)-1, 0); |
|
1500 |
||
1501 |
/* |
|
1502 |
* Before we can take over memory allocation/mapping from the boot |
|
1503 |
* loader we must remove from our free page lists any boot pages that |
|
1504 |
* will stay mapped until release_bootstrap(). |
|
1505 |
*/ |
|
1506 |
protect_boot_range(0, kernelbase, 1); |
|
1507 |
#if defined(__amd64) |
|
1508 |
protect_boot_range(BOOT_DOUBLEMAP_BASE, |
|
1509 |
BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE, 0); |
|
1510 |
#endif |
|
1511 |
||
1512 |
/* |
|
1513 |
* Copy in boot's page tables, set up extra page tables for the kernel, |
|
1514 |
* and switch to the kernel's context. |
|
1515 |
*/ |
|
1516 |
PRM_POINT("Calling hat_kern_setup()..."); |
|
1517 |
hat_kern_setup(); |
|
1518 |
||
1519 |
/* |
|
1520 |
* It is no longer safe to call BOP_ALLOC(), so make sure we don't. |
|
1521 |
*/ |
|
1522 |
bootops->bsys_alloc = NULL; |
|
1523 |
PRM_POINT("hat_kern_setup() done"); |
|
1524 |
||
1525 |
hat_cpu_online(CPU); |
|
1526 |
||
1527 |
/* |
|
1528 |
* Before we call kvm_init(), we need to establish the final size |
|
1529 |
* of the kernel's heap. So, we need to figure out how much space |
|
1530 |
* to set aside for segkp, segkpm, and segmap. |
|
1531 |
*/ |
|
1532 |
final_kernelheap = (caddr_t)ROUND_UP_LPAGE(kernelbase); |
|
1533 |
#if defined(__amd64) |
|
1534 |
if (kpm_desired) { |
|
1535 |
/* |
|
1536 |
* Segkpm appears at the bottom of the kernel's address |
|
1537 |
* range. To detect accidental overruns of the user |
|
1538 |
* address space, we leave a "red zone" of unmapped memory |
|
1539 |
* between kernelbase and the beginning of segkpm. |
|
1540 |
*/ |
|
1541 |
kpm_vbase = final_kernelheap + KERNEL_REDZONE_SIZE; |
|
1542 |
kpm_size = mmu_ptob(physmax); |
|
1543 |
PRM_DEBUG(kpm_vbase); |
|
1544 |
PRM_DEBUG(kpm_size); |
|
1545 |
final_kernelheap = |
|
1546 |
(caddr_t)ROUND_UP_TOPLEVEL(kpm_vbase + kpm_size); |
|
1547 |
} |
|
1548 |
||
1549 |
if (!segkp_fromheap) { |
|
1550 |
size_t sz = mmu_ptob(segkpsize); |
|
1551 |
||
1552 |
/* |
|
1553 |
* determine size of segkp and adjust the bottom of the |
|
1554 |
* kernel's heap. |
|
1555 |
*/ |
|
1556 |
if (sz < SEGKPMINSIZE || sz > SEGKPMAXSIZE) { |
|
1557 |
sz = SEGKPDEFSIZE; |
|
1558 |
cmn_err(CE_WARN, "!Illegal value for segkpsize. " |
|
1559 |
"segkpsize has been reset to %ld pages", |
|
1560 |
mmu_btop(sz)); |
|
1561 |
} |
|
1562 |
sz = MIN(sz, MAX(SEGKPMINSIZE, mmu_ptob(physmem))); |
|
1563 |
||
1564 |
segkpsize = mmu_btop(ROUND_UP_LPAGE(sz)); |
|
1565 |
segkp_base = final_kernelheap; |
|
1566 |
PRM_DEBUG(segkpsize); |
|
1567 |
PRM_DEBUG(segkp_base); |
|
1568 |
final_kernelheap = segkp_base + mmu_ptob(segkpsize); |
|
1569 |
PRM_DEBUG(final_kernelheap); |
|
1570 |
} |
|
1571 |
||
1572 |
/* |
|
1573 |
* put the range of VA for device mappings next |
|
1574 |
*/ |
|
1575 |
toxic_addr = (uintptr_t)final_kernelheap; |
|
1576 |
PRM_DEBUG(toxic_addr); |
|
1577 |
final_kernelheap = (char *)toxic_addr + toxic_size; |
|
1578 |
#endif |
|
1579 |
PRM_DEBUG(final_kernelheap); |
|
1580 |
ASSERT(final_kernelheap < boot_kernelheap); |
|
1581 |
||
1582 |
/* |
|
1583 |
* Users can change segmapsize through eeprom or /etc/system. |
|
1584 |
* If the variable is tuned through eeprom, there is no upper |
|
1585 |
* bound on the size of segmap. If it is tuned through |
|
1586 |
* /etc/system on 32-bit systems, it must be no larger than we |
|
1587 |
* planned for in startup_memlist(). |
|
1588 |
*/ |
|
1589 |
segmapsize = MAX(ROUND_UP_LPAGE(segmapsize), SEGMAPDEFAULT); |
|
1590 |
segkmap_start = ROUND_UP_LPAGE((uintptr_t)final_kernelheap); |
|
1591 |
||
1592 |
#if defined(__i386) |
|
1593 |
if (segmapsize > segmap_reserved) { |
|
1594 |
cmn_err(CE_NOTE, "!segmapsize may not be set > 0x%lx in " |
|
1595 |
"/etc/system. Use eeprom.", (long)SEGMAPMAX); |
|
1596 |
segmapsize = segmap_reserved; |
|
1597 |
} |
|
1598 |
/* |
|
1599 |
* 32-bit systems don't have segkpm or segkp, so segmap appears at |
|
1600 |
* the bottom of the kernel's address range. Set aside space for a |
|
1601 |
* red zone just below the start of segmap. |
|
1602 |
*/ |
|
1603 |
segkmap_start += KERNEL_REDZONE_SIZE; |
|
1604 |
segmapsize -= KERNEL_REDZONE_SIZE; |
|
1605 |
#endif |
|
1606 |
final_kernelheap = (char *)(segkmap_start + segmapsize); |
|
1607 |
||
1608 |
PRM_DEBUG(segkmap_start); |
|
1609 |
PRM_DEBUG(segmapsize); |
|
1610 |
PRM_DEBUG(final_kernelheap); |
|
1611 |
||
1612 |
/* |
|
1613 |
* Initialize VM system |
|
1614 |
*/ |
|
1615 |
PRM_POINT("Calling kvm_init()..."); |
|
1616 |
kvm_init(); |
|
1617 |
PRM_POINT("kvm_init() done"); |
|
1618 |
||
1619 |
/* |
|
1620 |
* Tell kmdb that the VM system is now working |
|
1621 |
*/ |
|
1622 |
if (boothowto & RB_DEBUG) |
|
1623 |
kdi_dvec_vmready(); |
|
1624 |
||
1625 |
/* |
|
1626 |
* Mangle the brand string etc. |
|
1627 |
*/ |
|
1628 |
cpuid_pass3(CPU); |
|
1629 |
||
1630 |
PRM_DEBUG(final_kernelheap); |
|
1631 |
||
1632 |
/* |
|
1633 |
* Now that we can use memory outside the top 4GB (on 64-bit |
|
1634 |
* systems) and we know the size of segmap, we can set the final |
|
1635 |
* size of the kernel's heap. Note: on 64-bit systems we still |
|
1636 |
* can't touch anything in the bottom half of the top 4GB range |
|
1637 |
* because boot still has pages mapped there. |
|
1638 |
*/ |
|
1639 |
if (final_kernelheap < boot_kernelheap) { |
|
1640 |
kernelheap_extend(final_kernelheap, boot_kernelheap); |
|
1641 |
#if defined(__amd64) |
|
1642 |
kmem_setaside = vmem_xalloc(heap_arena, BOOT_DOUBLEMAP_SIZE, |
|
1643 |
MMU_PAGESIZE, 0, 0, (void *)(BOOT_DOUBLEMAP_BASE), |
|
1644 |
(void *)(BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE), |
|
1645 |
VM_NOSLEEP | VM_BESTFIT | VM_PANIC); |
|
1646 |
PRM_DEBUG(kmem_setaside); |
|
1647 |
if (kmem_setaside == NULL) |
|
1648 |
panic("Could not protect boot's memory"); |
|
1649 |
#endif |
|
1650 |
} |
|
1651 |
/* |
|
1652 |
* Now that the kernel heap may have grown significantly, we need |
|
1653 |
* to make all the remaining page_t's available to back that memory. |
|
1654 |
* |
|
1655 |
* XX64 this should probably wait till after release boot-strap too. |
|
1656 |
*/ |
|
1657 |
pages_left = npages - boot_npages; |
|
1658 |
if (pages_left > 0) { |
|
1659 |
PRM_DEBUG(pages_left); |
|
1660 |
(void) kphysm_init(NULL, memseg_base, boot_npages, pages_left); |
|
1661 |
} |
|
1662 |
||
1663 |
#if defined(__amd64) |
|
1664 |
||
1665 |
/* |
|
1666 |
* Create the device arena for toxic (to dtrace/kmdb) mappings. |
|
1667 |
*/ |
|
1668 |
device_arena = vmem_create("device", (void *)toxic_addr, |
|
1669 |
toxic_size, MMU_PAGESIZE, NULL, NULL, NULL, 0, VM_SLEEP); |
|
1670 |
||
1671 |
#else /* __i386 */ |
|
1672 |
||
1673 |
/* |
|
1674 |
* allocate the bit map that tracks toxic pages |
|
1675 |
*/ |
|
1676 |
toxic_bit_map_len = btop((ulong_t)(ptable_va - kernelbase)); |
|
1677 |
PRM_DEBUG(toxic_bit_map_len); |
|
1678 |
toxic_bit_map = |
|
1679 |
kmem_zalloc(BT_SIZEOFMAP(toxic_bit_map_len), KM_NOSLEEP); |
|
1680 |
ASSERT(toxic_bit_map != NULL); |
|
1681 |
PRM_DEBUG(toxic_bit_map); |
|
1682 |
||
1683 |
#endif /* __i386 */ |
|
1684 |
||
1685 |
||
1686 |
/* |
|
1687 |
* Now that we've got more VA, as well as the ability to allocate from |
|
1688 |
* it, tell the debugger. |
|
1689 |
*/ |
|
1690 |
if (boothowto & RB_DEBUG) |
|
1691 |
kdi_dvec_memavail(); |
|
1692 |
||
1693 |
/* |
|
1694 |
* The following code installs a special page fault handler (#pf) |
|
1695 |
* to work around a pentium bug. |
|
1696 |
*/ |
|
1697 |
#if !defined(__amd64) |
|
1698 |
if (x86_type == X86_TYPE_P5) { |
|
1699 |
gate_desc_t *newidt; |
|
1700 |
desctbr_t newidt_r; |
|
1701 |
||
1702 |
if ((newidt = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP)) == NULL) |
|
1703 |
panic("failed to install pentium_pftrap"); |
|
1704 |
||
1705 |
bcopy(idt0, newidt, sizeof (idt0)); |
|
1706 |
set_gatesegd(&newidt[T_PGFLT], &pentium_pftrap, |
|
1707 |
KCS_SEL, 0, SDT_SYSIGT, SEL_KPL); |
|
1708 |
||
1709 |
(void) as_setprot(&kas, (caddr_t)newidt, MMU_PAGESIZE, |
|
1710 |
PROT_READ|PROT_EXEC); |
|
1711 |
||
1712 |
newidt_r.dtr_limit = sizeof (idt0) - 1; |
|
1713 |
newidt_r.dtr_base = (uintptr_t)newidt; |
|
1714 |
CPU->cpu_idt = newidt; |
|
1715 |
wr_idtr(&newidt_r); |
|
1716 |
} |
|
1717 |
#endif /* !__amd64 */ |
|
1718 |
||
1719 |
/* |
|
1720 |
* Map page pfn=0 for drivers, such as kd, that need to pick up |
|
1721 |
* parameters left there by controllers/BIOS. |
|
1722 |
*/ |
|
1723 |
PRM_POINT("setup up p0_va"); |
|
1724 |
p0_va = i86devmap(0, 1, PROT_READ); |
|
1725 |
PRM_DEBUG(p0_va); |
|
1726 |
||
1727 |
cmn_err(CE_CONT, "?mem = %luK (0x%lx)\n", |
|
1728 |
physinstalled << (MMU_PAGESHIFT - 10), ptob(physinstalled)); |
|
1729 |
||
841
814b4a127357
6344639 system spends all its time trying and failing to make big pages
kchow
parents:
810
diff
changeset
|
1730 |
/* |
814b4a127357
6344639 system spends all its time trying and failing to make big pages
kchow
parents:
810
diff
changeset
|
1731 |
* disable automatic large pages for small memory systems or |
814b4a127357
6344639 system spends all its time trying and failing to make big pages
kchow
parents:
810
diff
changeset
|
1732 |
* when the disable flag is set. |
814b4a127357
6344639 system spends all its time trying and failing to make big pages
kchow
parents:
810
diff
changeset
|
1733 |
*/ |
814b4a127357
6344639 system spends all its time trying and failing to make big pages
kchow
parents:
810
diff
changeset
|
1734 |
if (physmem < auto_lpg_min_physmem || auto_lpg_disable) { |
423
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1735 |
exec_lpg_disable = 1; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1736 |
use_brk_lpg = 0; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1737 |
use_stk_lpg = 0; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1738 |
use_zmap_lpg = 0; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1739 |
} |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1740 |
|
0 | 1741 |
PRM_POINT("Calling hat_init_finish()..."); |
1742 |
hat_init_finish(); |
|
1743 |
PRM_POINT("hat_init_finish() done"); |
|
1744 |
||
1745 |
/* |
|
1746 |
* Initialize the segkp segment type. |
|
1747 |
*/ |
|
1748 |
rw_enter(&kas.a_lock, RW_WRITER); |
|
1749 |
if (!segkp_fromheap) { |
|
1750 |
if (seg_attach(&kas, (caddr_t)segkp_base, mmu_ptob(segkpsize), |
|
1751 |
segkp) < 0) { |
|
1752 |
panic("startup: cannot attach segkp"); |
|
1753 |
/*NOTREACHED*/ |
|
1754 |
} |
|
1755 |
} else { |
|
1756 |
/* |
|
1757 |
* For 32 bit x86 systems, we will have segkp under the heap. |
|
1758 |
* There will not be a segkp segment. We do, however, need |
|
1759 |
* to fill in the seg structure. |
|
1760 |
*/ |
|
1761 |
segkp->s_as = &kas; |
|
1762 |
} |
|
1763 |
if (segkp_create(segkp) != 0) { |
|
1764 |
panic("startup: segkp_create failed"); |
|
1765 |
/*NOTREACHED*/ |
|
1766 |
} |
|
1767 |
PRM_DEBUG(segkp); |
|
1768 |
rw_exit(&kas.a_lock); |
|
1769 |
||
1770 |
/* |
|
1771 |
* kpm segment |
|
1772 |
*/ |
|
1773 |
segmap_kpm = 0; |
|
1774 |
if (kpm_desired) { |
|
1775 |
kpm_init(); |
|
1776 |
kpm_enable = 1; |
|
1777 |
} |
|
1778 |
||
1779 |
/* |
|
1780 |
* Now create segmap segment. |
|
1781 |
*/ |
|
1782 |
rw_enter(&kas.a_lock, RW_WRITER); |
|
1783 |
if (seg_attach(&kas, (caddr_t)segkmap_start, segmapsize, segkmap) < 0) { |
|
1784 |
panic("cannot attach segkmap"); |
|
1785 |
/*NOTREACHED*/ |
|
1786 |
} |
|
1787 |
PRM_DEBUG(segkmap); |
|
1788 |
||
1789 |
/* |
|
1790 |
* The 64 bit HAT permanently maps only segmap's page tables. |
|
1791 |
* The 32 bit HAT maps the heap's page tables too. |
|
1792 |
*/ |
|
1793 |
#if defined(__amd64) |
|
1794 |
hat_kmap_init(segkmap_start, segmapsize); |
|
1795 |
#else /* __i386 */ |
|
1796 |
ASSERT(segkmap_start + segmapsize == (uintptr_t)final_kernelheap); |
|
1797 |
hat_kmap_init(segkmap_start, (uintptr_t)ekernelheap - segkmap_start); |
|
1798 |
#endif /* __i386 */ |
|
1799 |
||
1800 |
a.prot = PROT_READ | PROT_WRITE; |
|
1801 |
a.shmsize = 0; |
|
1802 |
a.nfreelist = segmapfreelists; |
|
1803 |
||
1804 |
if (segmap_create(segkmap, (caddr_t)&a) != 0) |
|
1805 |
panic("segmap_create segkmap"); |
|
1806 |
rw_exit(&kas.a_lock); |
|
1807 |
||
1808 |
setup_vaddr_for_ppcopy(CPU); |
|
1809 |
||
1810 |
segdev_init(); |
|
1811 |
pmem_init(); |
|
1812 |
PRM_POINT("startup_vm() done"); |
|
1813 |
} |
|
1814 |
||
1815 |
static void |
|
1816 |
startup_end(void) |
|
1817 |
{ |
|
1818 |
extern void setx86isalist(void); |
|
1819 |
||
1820 |
PRM_POINT("startup_end() starting..."); |
|
1821 |
||
1822 |
/* |
|
1823 |
* Perform tasks that get done after most of the VM |
|
1824 |
* initialization has been done but before the clock |
|
1825 |
* and other devices get started. |
|
1826 |
*/ |
|
1827 |
kern_setup1(); |
|
1828 |
||
1829 |
/* |
|
1830 |
* Perform CPC initialization for this CPU. |
|
1831 |
*/ |
|
1832 |
kcpc_hw_init(CPU); |
|
1833 |
||
1834 |
#if defined(__amd64) |
|
1835 |
/* |
|
1836 |
* Validate support for syscall/sysret |
|
1837 |
* XX64 -- include SSE, SSE2, etc. here too? |
|
1838 |
*/ |
|
1839 |
if ((x86_feature & X86_ASYSC) == 0) { |
|
1840 |
cmn_err(CE_WARN, |
|
1841 |
"cpu%d does not support syscall/sysret", CPU->cpu_id); |
|
1842 |
} |
|
1843 |
#endif |
|
1844 |
/* |
|
1845 |
* Configure the system. |
|
1846 |
*/ |
|
1847 |
PRM_POINT("Calling configure()..."); |
|
1848 |
configure(); /* set up devices */ |
|
1849 |
PRM_POINT("configure() done"); |
|
1850 |
||
1851 |
/* |
|
1852 |
* Set the isa_list string to the defined instruction sets we |
|
1853 |
* support. |
|
1854 |
*/ |
|
1855 |
setx86isalist(); |
|
1455
b43f098fa50c
6378953 allocation of interrupt threads could be more common
andrei
parents:
1417
diff
changeset
|
1856 |
cpu_intr_alloc(CPU, NINTR_THREADS); |
0 | 1857 |
psm_install(); |
1858 |
||
1859 |
/* |
|
1860 |
* We're done with bootops. We don't unmap the bootstrap yet because |
|
1861 |
* we're still using bootsvcs. |
|
1862 |
*/ |
|
1863 |
PRM_POINT("zeroing out bootops"); |
|
1864 |
*bootopsp = (struct bootops *)0; |
|
1865 |
bootops = (struct bootops *)NULL; |
|
1866 |
||
1867 |
PRM_POINT("Enabling interrupts"); |
|
1868 |
(*picinitf)(); |
|
1869 |
sti(); |
|
1870 |
||
1871 |
(void) add_avsoftintr((void *)&softlevel1_hdl, 1, softlevel1, |
|
1872 |
"softlevel1", NULL, NULL); /* XXX to be moved later */ |
|
1873 |
||
1874 |
PRM_POINT("startup_end() done"); |
|
1875 |
} |
|
1876 |
||
1877 |
extern char hw_serial[]; |
|
1878 |
char *_hs1107 = hw_serial; |
|
1879 |
ulong_t _bdhs34; |
|
1880 |
||
1881 |
void |
|
1882 |
post_startup(void) |
|
1883 |
{ |
|
1884 |
/* |
|
1885 |
* Set the system wide, processor-specific flags to be passed |
|
1886 |
* to userland via the aux vector for performance hints and |
|
1887 |
* instruction set extensions. |
|
1888 |
*/ |
|
1889 |
bind_hwcap(); |
|
1890 |
||
1891 |
/* |
|
437 | 1892 |
* Load the System Management BIOS into the global ksmbios handle, |
1893 |
* if an SMBIOS is present on this system. |
|
1894 |
*/ |
|
1895 |
ksmbios = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL); |
|
1896 |
||
1897 |
/* |
|
0 | 1898 |
* Startup memory scrubber. |
1899 |
*/ |
|
437 | 1900 |
memscrub_init(); |
0 | 1901 |
|
1902 |
/* |
|
1414
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1903 |
* Complete CPU module initialization |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1904 |
*/ |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1905 |
cmi_post_init(); |
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1906 |
|
b4126407ac5b
PSARC 2006/020 FMA for Athlon 64 and Opteron Processors
cindi
parents:
851
diff
changeset
|
1907 |
/* |
0 | 1908 |
* Perform forceloading tasks for /etc/system. |
1909 |
*/ |
|
1910 |
(void) mod_sysctl(SYS_FORCELOAD, NULL); |
|
1911 |
||
1912 |
/* |
|
1913 |
* ON4.0: Force /proc module in until clock interrupt handle fixed |
|
1914 |
* ON4.0: This must be fixed or restated in /etc/systems. |
|
1915 |
*/ |
|
1916 |
(void) modload("fs", "procfs"); |
|
1917 |
||
1918 |
#if defined(__i386) |
|
1919 |
/* |
|
1920 |
* Check for required functional Floating Point hardware, |
|
1921 |
* unless FP hardware explicitly disabled. |
|
1922 |
*/ |
|
1923 |
if (fpu_exists && (fpu_pentium_fdivbug || fp_kind == FP_NO)) |
|
1924 |
halt("No working FP hardware found"); |
|
1925 |
#endif |
|
1926 |
||
1927 |
maxmem = freemem; |
|
1928 |
||
1929 |
add_cpunode2devtree(CPU->cpu_id, CPU->cpu_m.mcpu_cpi); |
|
1930 |
||
1931 |
/* |
|
1932 |
* Perform the formal initialization of the boot chip, |
|
1933 |
* and associate the boot cpu with it. |
|
1934 |
* This must be done after the cpu node for CPU has been |
|
1935 |
* added to the device tree, when the necessary probing to |
|
1936 |
* know the chip type and chip "id" is performed. |
|
1937 |
*/ |
|
1938 |
chip_cpu_init(CPU); |
|
1939 |
chip_cpu_assign(CPU); |
|
1940 |
} |
|
1941 |
||
1942 |
static int |
|
1943 |
pp_in_ramdisk(page_t *pp) |
|
1944 |
{ |
|
1945 |
extern uint64_t ramdisk_start, ramdisk_end; |
|
1946 |
||
1947 |
return ((pp->p_pagenum >= btop(ramdisk_start)) && |
|
1948 |
(pp->p_pagenum < btopr(ramdisk_end))); |
|
1949 |
} |
|
1950 |
||
1951 |
void |
|
1952 |
release_bootstrap(void) |
|
1953 |
{ |
|
1954 |
int root_is_ramdisk; |
|
1955 |
pfn_t pfn; |
|
1956 |
page_t *pp; |
|
1957 |
extern void kobj_boot_unmountroot(void); |
|
1958 |
extern dev_t rootdev; |
|
1959 |
||
1960 |
/* unmount boot ramdisk and release kmem usage */ |
|
1961 |
kobj_boot_unmountroot(); |
|
1962 |
||
1963 |
/* |
|
1964 |
* We're finished using the boot loader so free its pages. |
|
1965 |
*/ |
|
1966 |
PRM_POINT("Unmapping lower boot pages"); |
|
1967 |
clear_boot_mappings(0, kernelbase); |
|
1968 |
#if defined(__amd64) |
|
1969 |
PRM_POINT("Unmapping upper boot pages"); |
|
1970 |
clear_boot_mappings(BOOT_DOUBLEMAP_BASE, |
|
1971 |
BOOT_DOUBLEMAP_BASE + BOOT_DOUBLEMAP_SIZE); |
|
1972 |
#endif |
|
1973 |
||
1974 |
/* |
|
1975 |
* If root isn't on ramdisk, destroy the hardcoded |
|
1976 |
* ramdisk node now and release the memory. Else, |
|
1977 |
* ramdisk memory is kept in rd_pages. |
|
1978 |
*/ |
|
1979 |
root_is_ramdisk = (getmajor(rootdev) == ddi_name_to_major("ramdisk")); |
|
1980 |
if (!root_is_ramdisk) { |
|
1981 |
dev_info_t *dip = ddi_find_devinfo("ramdisk", -1, 0); |
|
1982 |
ASSERT(dip && ddi_get_parent(dip) == ddi_root_node()); |
|
1983 |
ndi_rele_devi(dip); /* held from ddi_find_devinfo */ |
|
1984 |
(void) ddi_remove_child(dip, 0); |
|
1985 |
} |
|
1986 |
||
1987 |
PRM_POINT("Releasing boot pages"); |
|
1988 |
while (bootpages) { |
|
1989 |
pp = bootpages; |
|
1990 |
bootpages = pp->p_next; |
|
1991 |
if (root_is_ramdisk && pp_in_ramdisk(pp)) { |
|
1992 |
pp->p_next = rd_pages; |
|
1993 |
rd_pages = pp; |
|
1994 |
continue; |
|
1995 |
} |
|
1996 |
pp->p_next = (struct page *)0; |
|
1997 |
page_free(pp, 1); |
|
1998 |
} |
|
1999 |
||
2000 |
/* |
|
2001 |
* Find 1 page below 1 MB so that other processors can boot up. |
|
2002 |
* Make sure it has a kernel VA as well as a 1:1 mapping. |
|
2003 |
* We should have just free'd one up. |
|
2004 |
*/ |
|
2005 |
if (use_mp) { |
|
2006 |
for (pfn = 1; pfn < btop(1*1024*1024); pfn++) { |
|
2007 |
if (page_numtopp_alloc(pfn) == NULL) |
|
2008 |
continue; |
|
2009 |
rm_platter_va = i86devmap(pfn, 1, |
|
2010 |
PROT_READ | PROT_WRITE | PROT_EXEC); |
|
2011 |
rm_platter_pa = ptob(pfn); |
|
2012 |
hat_devload(kas.a_hat, |
|
2013 |
(caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE, |
|
2014 |
pfn, PROT_READ | PROT_WRITE | PROT_EXEC, |
|
2015 |
HAT_LOAD_NOCONSIST); |
|
2016 |
break; |
|
2017 |
} |
|
2018 |
if (pfn == btop(1*1024*1024)) |
|
2019 |
panic("No page available for starting " |
|
2020 |
"other processors"); |
|
2021 |
} |
|
2022 |
||
2023 |
#if defined(__amd64) |
|
2024 |
PRM_POINT("Returning boot's VA space to kernel heap"); |
|
2025 |
if (kmem_setaside != NULL) |
|
2026 |
vmem_free(heap_arena, kmem_setaside, BOOT_DOUBLEMAP_SIZE); |
|
2027 |
#endif |
|
2028 |
} |
|
2029 |
||
2030 |
/* |
|
2031 |
* Initialize the platform-specific parts of a page_t. |
|
2032 |
*/ |
|
2033 |
void |
|
2034 |
add_physmem_cb(page_t *pp, pfn_t pnum) |
|
2035 |
{ |
|
2036 |
pp->p_pagenum = pnum; |
|
2037 |
pp->p_mapping = NULL; |
|
2038 |
pp->p_embed = 0; |
|
2039 |
pp->p_share = 0; |
|
2040 |
pp->p_mlentry = 0; |
|
2041 |
} |
|
2042 |
||
2043 |
/* |
|
2044 |
* kphysm_init() initializes physical memory. |
|
2045 |
*/ |
|
2046 |
static pgcnt_t |
|
2047 |
kphysm_init( |
|
2048 |
page_t *inpp, |
|
2049 |
struct memseg *memsegp, |
|
2050 |
pgcnt_t start, |
|
2051 |
pgcnt_t npages) |
|
2052 |
{ |
|
2053 |
struct memlist *pmem; |
|
2054 |
struct memseg *cur_memseg; |
|
2055 |
struct memseg **memsegpp; |
|
2056 |
pfn_t base_pfn; |
|
2057 |
pgcnt_t num; |
|
2058 |
pgcnt_t total_skipped = 0; |
|
2059 |
pgcnt_t skipping = 0; |
|
2060 |
pgcnt_t pages_done = 0; |
|
2061 |
pgcnt_t largepgcnt; |
|
2062 |
uint64_t addr; |
|
2063 |
uint64_t size; |
|
2064 |
page_t *pp = inpp; |
|
2065 |
int dobreak = 0; |
|
2066 |
extern pfn_t ddiphysmin; |
|
2067 |
||
2068 |
ASSERT(page_hash != NULL && page_hashsz != 0); |
|
2069 |
||
2070 |
for (cur_memseg = memsegp; cur_memseg->pages != NULL; cur_memseg++); |
|
2071 |
ASSERT(cur_memseg == memsegp || start > 0); |
|
2072 |
||
2073 |
for (pmem = phys_avail; pmem && npages; pmem = pmem->next) { |
|
2074 |
/* |
|
2075 |
* In a 32 bit kernel can't use higher memory if we're |
|
2076 |
* not booting in PAE mode. This check takes care of that. |
|
2077 |
*/ |
|
2078 |
addr = pmem->address; |
|
2079 |
size = pmem->size; |
|
2080 |
if (btop(addr) > physmax) |
|
2081 |
continue; |
|
2082 |
||
2083 |
/* |
|
2084 |
* align addr and size - they may not be at page boundaries |
|
2085 |
*/ |
|
2086 |
if ((addr & MMU_PAGEOFFSET) != 0) { |
|
2087 |
addr += MMU_PAGEOFFSET; |
|
2088 |
addr &= ~(uint64_t)MMU_PAGEOFFSET; |
|
2089 |
size -= addr - pmem->address; |
|
2090 |
} |
|
2091 |
||
810
b238ce84deac
6342422 kernel panic with patch 118844-19 on Gateway 600YG
kchow
parents:
783
diff
changeset
|
2092 |
/* only process pages below or equal to physmax */ |
b238ce84deac
6342422 kernel panic with patch 118844-19 on Gateway 600YG
kchow
parents:
783
diff
changeset
|
2093 |
if ((btop(addr + size) - 1) > physmax) |
b238ce84deac
6342422 kernel panic with patch 118844-19 on Gateway 600YG
kchow
parents:
783
diff
changeset
|
2094 |
size = ptob(physmax - btop(addr) + 1); |
0 | 2095 |
|
2096 |
num = btop(size); |
|
2097 |
if (num == 0) |
|
2098 |
continue; |
|
2099 |
||
2100 |
if (total_skipped < start) { |
|
2101 |
if (start - total_skipped > num) { |
|
2102 |
total_skipped += num; |
|
2103 |
continue; |
|
2104 |
} |
|
2105 |
skipping = start - total_skipped; |
|
2106 |
num -= skipping; |
|
2107 |
addr += (MMU_PAGESIZE * skipping); |
|
2108 |
total_skipped = start; |
|
2109 |
} |
|
2110 |
if (num == 0) |
|
2111 |
continue; |
|
2112 |
||
2113 |
if (num > npages) |
|
2114 |
num = npages; |
|
2115 |
||
2116 |
npages -= num; |
|
2117 |
pages_done += num; |
|
2118 |
base_pfn = btop(addr); |
|
2119 |
||
2120 |
/* |
|
2121 |
* If the caller didn't provide space for the page |
|
2122 |
* structures, carve them out of the memseg they will |
|
2123 |
* represent. |
|
2124 |
*/ |
|
2125 |
if (pp == NULL) { |
|
2126 |
pgcnt_t pp_pgs; |
|
2127 |
||
2128 |
if (num <= 1) |
|
2129 |
continue; |
|
2130 |
||
2131 |
/* |
|
2132 |
* Compute how many of the pages we need to use for |
|
2133 |
* page_ts |
|
2134 |
*/ |
|
2135 |
pp_pgs = (num * sizeof (page_t)) / MMU_PAGESIZE + 1; |
|
2136 |
while (mmu_ptob(pp_pgs - 1) / sizeof (page_t) >= |
|
2137 |
num - pp_pgs + 1) |
|
2138 |
--pp_pgs; |
|
2139 |
PRM_DEBUG(pp_pgs); |
|
2140 |
||
2141 |
pp = vmem_alloc(heap_arena, mmu_ptob(pp_pgs), |
|
2142 |
VM_NOSLEEP); |
|
2143 |
if (pp == NULL) { |
|
2144 |
cmn_err(CE_WARN, "Unable to add %ld pages to " |
|
2145 |
"the system.", num); |
|
2146 |
continue; |
|
2147 |
} |
|
2148 |
||
2149 |
hat_devload(kas.a_hat, (void *)pp, mmu_ptob(pp_pgs), |
|
2150 |
base_pfn, PROT_READ | PROT_WRITE | HAT_UNORDERED_OK, |
|
2151 |
HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); |
|
2152 |
bzero(pp, mmu_ptob(pp_pgs)); |
|
2153 |
num -= pp_pgs; |
|
2154 |
base_pfn += pp_pgs; |
|
2155 |
} |
|
2156 |
||
2157 |
if (prom_debug) |
|
2158 |
prom_printf("MEMSEG addr=0x%" PRIx64 |
|
2159 |
" pgs=0x%lx pfn 0x%lx-0x%lx\n", |
|
2160 |
addr, num, base_pfn, base_pfn + num); |
|
2161 |
||
2162 |
/* |
|
2163 |
* drop pages below ddiphysmin to simplify ddi memory |
|
2164 |
* allocation with non-zero addr_lo requests. |
|
2165 |
*/ |
|
2166 |
if (base_pfn < ddiphysmin) { |
|
2167 |
if (base_pfn + num <= ddiphysmin) { |
|
2168 |
/* drop entire range below ddiphysmin */ |
|
2169 |
continue; |
|
2170 |
} |
|
2171 |
/* adjust range to ddiphysmin */ |
|
2172 |
pp += (ddiphysmin - base_pfn); |
|
2173 |
num -= (ddiphysmin - base_pfn); |
|
2174 |
base_pfn = ddiphysmin; |
|
2175 |
} |
|
2176 |
/* |
|
2177 |
* Build the memsegs entry |
|
2178 |
*/ |
|
2179 |
cur_memseg->pages = pp; |
|
2180 |
cur_memseg->epages = pp + num; |
|
2181 |
cur_memseg->pages_base = base_pfn; |
|
2182 |
cur_memseg->pages_end = base_pfn + num; |
|
2183 |
||
2184 |
/* |
|
2185 |
* insert in memseg list in decreasing pfn range order. |
|
2186 |
* Low memory is typically more fragmented such that this |
|
2187 |
* ordering keeps the larger ranges at the front of the list |
|
2188 |
* for code that searches memseg. |
|
2189 |
*/ |
|
2190 |
memsegpp = &memsegs; |
|
2191 |
for (;;) { |
|
2192 |
if (*memsegpp == NULL) { |
|
2193 |
/* empty memsegs */ |
|
2194 |
memsegs = cur_memseg; |
|
2195 |
break; |
|
2196 |
} |
|
2197 |
/* check for continuity with start of memsegpp */ |
|
2198 |
if (cur_memseg->pages_end == (*memsegpp)->pages_base) { |
|
2199 |
if (cur_memseg->epages == (*memsegpp)->pages) { |
|
2200 |
/* |
|
2201 |
* contiguous pfn and page_t's. Merge |
|
2202 |
* cur_memseg into *memsegpp. Drop |
|
2203 |
* cur_memseg |
|
2204 |
*/ |
|
2205 |
(*memsegpp)->pages_base = |
|
2206 |
cur_memseg->pages_base; |
|
2207 |
(*memsegpp)->pages = |
|
2208 |
cur_memseg->pages; |
|
2209 |
/* |
|
2210 |
* check if contiguous with the end of |
|
2211 |
* the next memseg. |
|
2212 |
*/ |
|
2213 |
if ((*memsegpp)->next && |
|
2214 |
((*memsegpp)->pages_base == |
|
2215 |
(*memsegpp)->next->pages_end)) { |
|
2216 |
cur_memseg = *memsegpp; |
|
2217 |
memsegpp = &((*memsegpp)->next); |
|
2218 |
dobreak = 1; |
|
2219 |
} else { |
|
2220 |
break; |
|
2221 |
} |
|
2222 |
} else { |
|
2223 |
/* |
|
2224 |
* contiguous pfn but not page_t's. |
|
2225 |
* drop last pfn/page_t in cur_memseg |
|
2226 |
* to prevent creation of large pages |
|
2227 |
* with noncontiguous page_t's if not |
|
2228 |
* aligned to largest page boundary. |
|
2229 |
*/ |
|
2230 |
largepgcnt = page_get_pagecnt( |
|
2231 |
page_num_pagesizes() - 1); |
|
2232 |
||
2233 |
if (cur_memseg->pages_end & |
|
2234 |
(largepgcnt - 1)) { |
|
2235 |
num--; |
|
2236 |
cur_memseg->epages--; |
|
2237 |
cur_memseg->pages_end--; |
|
2238 |
} |
|
2239 |
} |
|
2240 |
} |
|
2241 |
||
2242 |
/* check for continuity with end of memsegpp */ |
|
2243 |
if (cur_memseg->pages_base == (*memsegpp)->pages_end) { |
|
2244 |
if (cur_memseg->pages == (*memsegpp)->epages) { |
|
2245 |
/* |
|
2246 |
* contiguous pfn and page_t's. Merge |
|
2247 |
* cur_memseg into *memsegpp. Drop |
|
2248 |
* cur_memseg. |
|
2249 |
*/ |
|
2250 |
if (dobreak) { |
|
2251 |
/* merge previously done */ |
|
2252 |
cur_memseg->pages = |
|
2253 |
(*memsegpp)->pages; |
|
2254 |
cur_memseg->pages_base = |
|
2255 |
(*memsegpp)->pages_base; |
|
2256 |
cur_memseg->next = |
|
2257 |
(*memsegpp)->next; |
|
2258 |
} else { |
|
2259 |
(*memsegpp)->pages_end = |
|
2260 |
cur_memseg->pages_end; |
|
2261 |
(*memsegpp)->epages = |
|
2262 |
cur_memseg->epages; |
|
2263 |
} |
|
2264 |
break; |
|
2265 |
} |
|
2266 |
/* |
|
2267 |
* contiguous pfn but not page_t's. |
|
2268 |
* drop first pfn/page_t in cur_memseg |
|
2269 |
* to prevent creation of large pages |
|
2270 |
* with noncontiguous page_t's if not |
|
2271 |
* aligned to largest page boundary. |
|
2272 |
*/ |
|
2273 |
largepgcnt = page_get_pagecnt( |
|
2274 |
page_num_pagesizes() - 1); |
|
2275 |
if (base_pfn & (largepgcnt - 1)) { |
|
2276 |
num--; |
|
2277 |
base_pfn++; |
|
2278 |
cur_memseg->pages++; |
|
2279 |
cur_memseg->pages_base++; |
|
2280 |
pp = cur_memseg->pages; |
|
2281 |
} |
|
2282 |
if (dobreak) |
|
2283 |
break; |
|
2284 |
} |
|
2285 |
||
2286 |
if (cur_memseg->pages_base >= |
|
2287 |
(*memsegpp)->pages_end) { |
|
2288 |
cur_memseg->next = *memsegpp; |
|
2289 |
*memsegpp = cur_memseg; |
|
2290 |
break; |
|
2291 |
} |
|
2292 |
if ((*memsegpp)->next == NULL) { |
|
2293 |
cur_memseg->next = NULL; |
|
2294 |
(*memsegpp)->next = cur_memseg; |
|
2295 |
break; |
|
2296 |
} |
|
2297 |
memsegpp = &((*memsegpp)->next); |
|
2298 |
ASSERT(*memsegpp != NULL); |
|
2299 |
} |
|
2300 |
||
2301 |
/* |
|
2302 |
* add_physmem() initializes the PSM part of the page |
|
2303 |
* struct by calling the PSM back with add_physmem_cb(). |
|
2304 |
* In addition it coalesces pages into larger pages as |
|
2305 |
* it initializes them. |
|
2306 |
*/ |
|
2307 |
add_physmem(pp, num, base_pfn); |
|
2308 |
cur_memseg++; |
|
2309 |
availrmem_initial += num; |
|
2310 |
availrmem += num; |
|
2311 |
||
2312 |
/* |
|
2313 |
* If the caller provided the page frames to us, then |
|
2314 |
* advance in that list. Otherwise, prepare to allocate |
|
2315 |
* our own page frames for the next memseg. |
|
2316 |
*/ |
|
2317 |
pp = (inpp == NULL) ? NULL : pp + num; |
|
2318 |
} |
|
2319 |
||
2320 |
PRM_DEBUG(availrmem_initial); |
|
2321 |
PRM_DEBUG(availrmem); |
|
2322 |
PRM_DEBUG(freemem); |
|
2323 |
build_pfn_hash(); |
|
2324 |
return (pages_done); |
|
2325 |
} |
|
2326 |
||
2327 |
/* |
|
2328 |
* Kernel VM initialization. |
|
2329 |
*/ |
|
2330 |
static void |
|
2331 |
kvm_init(void) |
|
2332 |
{ |
|
2333 |
#ifdef DEBUG |
|
2334 |
extern void _start(); |
|
2335 |
||
2336 |
ASSERT((caddr_t)_start == s_text); |
|
2337 |
#endif |
|
2338 |
ASSERT((((uintptr_t)s_text) & MMU_PAGEOFFSET) == 0); |
|
2339 |
||
2340 |
/* |
|
2341 |
* Put the kernel segments in kernel address space. |
|
2342 |
*/ |
|
2343 |
rw_enter(&kas.a_lock, RW_WRITER); |
|
2344 |
as_avlinit(&kas); |
|
2345 |
||
2346 |
(void) seg_attach(&kas, s_text, e_moddata - s_text, &ktextseg); |
|
2347 |
(void) segkmem_create(&ktextseg); |
|
2348 |
||
2349 |
(void) seg_attach(&kas, (caddr_t)valloc_base, valloc_sz, &kvalloc); |
|
2350 |
(void) segkmem_create(&kvalloc); |
|
2351 |
||
2352 |
/* |
|
2353 |
* We're about to map out /boot. This is the beginning of the |
|
2354 |
* system resource management transition. We can no longer |
|
2355 |
* call into /boot for I/O or memory allocations. |
|
2356 |
* |
|
2357 |
* XX64 - Is this still correct with kernelheap_extend() being called |
|
2358 |
* later than this???? |
|
2359 |
*/ |
|
2360 |
(void) seg_attach(&kas, final_kernelheap, |
|
2361 |
ekernelheap - final_kernelheap, &kvseg); |
|
2362 |
(void) segkmem_create(&kvseg); |
|
2363 |
||
2364 |
#if defined(__amd64) |
|
2365 |
(void) seg_attach(&kas, (caddr_t)core_base, core_size, &kvseg_core); |
|
2366 |
(void) segkmem_create(&kvseg_core); |
|
2367 |
#endif |
|
2368 |
||
2369 |
(void) seg_attach(&kas, (caddr_t)SEGDEBUGBASE, (size_t)SEGDEBUGSIZE, |
|
2370 |
&kdebugseg); |
|
2371 |
(void) segkmem_create(&kdebugseg); |
|
2372 |
||
2373 |
rw_exit(&kas.a_lock); |
|
2374 |
||
2375 |
/* |
|
2376 |
* Ensure that the red zone at kernelbase is never accessible. |
|
2377 |
*/ |
|
2378 |
(void) as_setprot(&kas, (caddr_t)kernelbase, KERNEL_REDZONE_SIZE, 0); |
|
2379 |
||
2380 |
/* |
|
2381 |
* Make the text writable so that it can be hot patched by DTrace. |
|
2382 |
*/ |
|
2383 |
(void) as_setprot(&kas, s_text, e_modtext - s_text, |
|
2384 |
PROT_READ | PROT_WRITE | PROT_EXEC); |
|
2385 |
||
2386 |
/* |
|
2387 |
* Make data writable until end. |
|
2388 |
*/ |
|
2389 |
(void) as_setprot(&kas, s_data, e_moddata - s_data, |
|
2390 |
PROT_READ | PROT_WRITE | PROT_EXEC); |
|
2391 |
} |
|
2392 |
||
2393 |
/* |
|
2394 |
* These are MTTR registers supported by P6 |
|
2395 |
*/ |
|
2396 |
static struct mtrrvar mtrrphys_arr[MAX_MTRRVAR]; |
|
2397 |
static uint64_t mtrr64k, mtrr16k1, mtrr16k2; |
|
2398 |
static uint64_t mtrr4k1, mtrr4k2, mtrr4k3; |
|
2399 |
static uint64_t mtrr4k4, mtrr4k5, mtrr4k6; |
|
2400 |
static uint64_t mtrr4k7, mtrr4k8, mtrrcap; |
|
2401 |
uint64_t mtrrdef, pat_attr_reg; |
|
2402 |
||
2403 |
/* |
|
2404 |
* Disable reprogramming of MTRRs by default. |
|
2405 |
*/ |
|
2406 |
int enable_relaxed_mtrr = 0; |
|
2407 |
||
2408 |
void |
|
2409 |
setup_mtrr() |
|
2410 |
{ |
|
2411 |
int i, ecx; |
|
2412 |
int vcnt; |
|
2413 |
struct mtrrvar *mtrrphys; |
|
2414 |
||
2415 |
if (!(x86_feature & X86_MTRR)) |
|
2416 |
return; |
|
2417 |
||
770
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2418 |
mtrrcap = rdmsr(REG_MTRRCAP); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2419 |
mtrrdef = rdmsr(REG_MTRRDEF); |
0 | 2420 |
if (mtrrcap & MTRRCAP_FIX) { |
770
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2421 |
mtrr64k = rdmsr(REG_MTRR64K); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2422 |
mtrr16k1 = rdmsr(REG_MTRR16K1); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2423 |
mtrr16k2 = rdmsr(REG_MTRR16K2); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2424 |
mtrr4k1 = rdmsr(REG_MTRR4K1); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2425 |
mtrr4k2 = rdmsr(REG_MTRR4K2); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2426 |
mtrr4k3 = rdmsr(REG_MTRR4K3); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2427 |
mtrr4k4 = rdmsr(REG_MTRR4K4); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2428 |
mtrr4k5 = rdmsr(REG_MTRR4K5); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2429 |
mtrr4k6 = rdmsr(REG_MTRR4K6); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2430 |
mtrr4k7 = rdmsr(REG_MTRR4K7); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2431 |
mtrr4k8 = rdmsr(REG_MTRR4K8); |
0 | 2432 |
} |
2433 |
if ((vcnt = (mtrrcap & MTRRCAP_VCNTMASK)) > MAX_MTRRVAR) |
|
2434 |
vcnt = MAX_MTRRVAR; |
|
2435 |
||
2436 |
for (i = 0, ecx = REG_MTRRPHYSBASE0, mtrrphys = mtrrphys_arr; |
|
2437 |
i < vcnt - 1; i++, ecx += 2, mtrrphys++) { |
|
770
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2438 |
mtrrphys->mtrrphys_base = rdmsr(ecx); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2439 |
mtrrphys->mtrrphys_mask = rdmsr(ecx + 1); |
0 | 2440 |
if ((x86_feature & X86_PAT) && enable_relaxed_mtrr) { |
2441 |
mtrrphys->mtrrphys_mask &= ~MTRRPHYSMASK_V; |
|
2442 |
} |
|
2443 |
} |
|
2444 |
if (x86_feature & X86_PAT) { |
|
2445 |
if (enable_relaxed_mtrr) |
|
2446 |
mtrrdef = MTRR_TYPE_WB|MTRRDEF_FE|MTRRDEF_E; |
|
2447 |
pat_attr_reg = PAT_DEFAULT_ATTRIBUTE; |
|
2448 |
} |
|
2449 |
||
2450 |
mtrr_sync(); |
|
2451 |
} |
|
2452 |
||
2453 |
/* |
|
2454 |
* Sync current cpu mtrr with the incore copy of mtrr. |
|
2455 |
* This function has to be invoked with interrupts disabled |
|
2456 |
* Currently we do not capture other cpu's. This is invoked on cpu0 |
|
2457 |
* just after reading /etc/system. |
|
2458 |
* On other cpu's its invoked from mp_startup(). |
|
2459 |
*/ |
|
2460 |
void |
|
2461 |
mtrr_sync() |
|
2462 |
{ |
|
2463 |
uint_t crvalue, cr0_orig; |
|
2464 |
int vcnt, i, ecx; |
|
2465 |
struct mtrrvar *mtrrphys; |
|
2466 |
||
2467 |
cr0_orig = crvalue = getcr0(); |
|
2468 |
crvalue |= CR0_CD; |
|
2469 |
crvalue &= ~CR0_NW; |
|
2470 |
setcr0(crvalue); |
|
2471 |
invalidate_cache(); |
|
2472 |
setcr3(getcr3()); |
|
2473 |
||
770
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2474 |
if (x86_feature & X86_PAT) |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2475 |
wrmsr(REG_MTRRPAT, pat_attr_reg); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2476 |
|
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2477 |
wrmsr(REG_MTRRDEF, rdmsr(REG_MTRRDEF) & |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2478 |
~((uint64_t)(uintptr_t)MTRRDEF_E)); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2479 |
|
0 | 2480 |
if (mtrrcap & MTRRCAP_FIX) { |
770
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2481 |
wrmsr(REG_MTRR64K, mtrr64k); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2482 |
wrmsr(REG_MTRR16K1, mtrr16k1); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2483 |
wrmsr(REG_MTRR16K2, mtrr16k2); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2484 |
wrmsr(REG_MTRR4K1, mtrr4k1); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2485 |
wrmsr(REG_MTRR4K2, mtrr4k2); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2486 |
wrmsr(REG_MTRR4K3, mtrr4k3); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2487 |
wrmsr(REG_MTRR4K4, mtrr4k4); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2488 |
wrmsr(REG_MTRR4K5, mtrr4k5); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2489 |
wrmsr(REG_MTRR4K6, mtrr4k6); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2490 |
wrmsr(REG_MTRR4K7, mtrr4k7); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2491 |
wrmsr(REG_MTRR4K8, mtrr4k8); |
0 | 2492 |
} |
2493 |
if ((vcnt = (mtrrcap & MTRRCAP_VCNTMASK)) > MAX_MTRRVAR) |
|
2494 |
vcnt = MAX_MTRRVAR; |
|
2495 |
for (i = 0, ecx = REG_MTRRPHYSBASE0, mtrrphys = mtrrphys_arr; |
|
770
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2496 |
i < vcnt - 1; i++, ecx += 2, mtrrphys++) { |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2497 |
wrmsr(ecx, mtrrphys->mtrrphys_base); |
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2498 |
wrmsr(ecx + 1, mtrrphys->mtrrphys_mask); |
0 | 2499 |
} |
770
0eda482eb80f
6311933 rdmsr/wrmsr do not need to set/pass values via memory pointers
kucharsk
parents:
437
diff
changeset
|
2500 |
wrmsr(REG_MTRRDEF, mtrrdef); |
0 | 2501 |
setcr3(getcr3()); |
2502 |
invalidate_cache(); |
|
2503 |
setcr0(cr0_orig); |
|
2504 |
} |
|
2505 |
||
2506 |
/* |
|
2507 |
* resync mtrr so that BIOS is happy. Called from mdboot |
|
2508 |
*/ |
|
2509 |
void |
|
2510 |
mtrr_resync() |
|
2511 |
{ |
|
2512 |
if ((x86_feature & X86_PAT) && enable_relaxed_mtrr) { |
|
2513 |
/* |
|
2514 |
* We could have changed the default mtrr definition. |
|
2515 |
* Put it back to uncached which is what it is at power on |
|
2516 |
*/ |
|
2517 |
mtrrdef = MTRR_TYPE_UC|MTRRDEF_FE|MTRRDEF_E; |
|
2518 |
mtrr_sync(); |
|
2519 |
} |
|
2520 |
} |
|
2521 |
||
2522 |
void |
|
2523 |
get_system_configuration() |
|
2524 |
{ |
|
2525 |
char prop[32]; |
|
2526 |
u_longlong_t nodes_ll, cpus_pernode_ll, lvalue; |
|
2527 |
||
2528 |
if (((BOP_GETPROPLEN(bootops, "nodes") > sizeof (prop)) || |
|
2529 |
(BOP_GETPROP(bootops, "nodes", prop) < 0) || |
|
2530 |
(kobj_getvalue(prop, &nodes_ll) == -1) || |
|
2531 |
(nodes_ll > MAXNODES)) || |
|
2532 |
((BOP_GETPROPLEN(bootops, "cpus_pernode") > sizeof (prop)) || |
|
2533 |
(BOP_GETPROP(bootops, "cpus_pernode", prop) < 0) || |
|
2534 |
(kobj_getvalue(prop, &cpus_pernode_ll) == -1))) { |
|
2535 |
||
2536 |
system_hardware.hd_nodes = 1; |
|
2537 |
system_hardware.hd_cpus_per_node = 0; |
|
2538 |
} else { |
|
2539 |
system_hardware.hd_nodes = (int)nodes_ll; |
|
2540 |
system_hardware.hd_cpus_per_node = (int)cpus_pernode_ll; |
|
2541 |
} |
|
2542 |
if ((BOP_GETPROPLEN(bootops, "kernelbase") > sizeof (prop)) || |
|
2543 |
(BOP_GETPROP(bootops, "kernelbase", prop) < 0) || |
|
2544 |
(kobj_getvalue(prop, &lvalue) == -1)) |
|
2545 |
eprom_kernelbase = NULL; |
|
2546 |
else |
|
2547 |
eprom_kernelbase = (uintptr_t)lvalue; |
|
2548 |
||
2549 |
if ((BOP_GETPROPLEN(bootops, "segmapsize") > sizeof (prop)) || |
|
2550 |
(BOP_GETPROP(bootops, "segmapsize", prop) < 0) || |
|
2551 |
(kobj_getvalue(prop, &lvalue) == -1)) { |
|
2552 |
segmapsize = SEGMAPDEFAULT; |
|
2553 |
} else { |
|
2554 |
segmapsize = (uintptr_t)lvalue; |
|
2555 |
} |
|
2556 |
||
2557 |
if ((BOP_GETPROPLEN(bootops, "segmapfreelists") > sizeof (prop)) || |
|
2558 |
(BOP_GETPROP(bootops, "segmapfreelists", prop) < 0) || |
|
2559 |
(kobj_getvalue(prop, &lvalue) == -1)) { |
|
2560 |
segmapfreelists = 0; /* use segmap driver default */ |
|
2561 |
} else { |
|
2562 |
segmapfreelists = (int)lvalue; |
|
2563 |
} |
|
1417
fa316336e215
6377034 setting physmem in /etc/system does not have desired effect on x86
kchow
parents:
1414
diff
changeset
|
2564 |
|
fa316336e215
6377034 setting physmem in /etc/system does not have desired effect on x86
kchow
parents:
1414
diff
changeset
|
2565 |
if ((BOP_GETPROPLEN(bootops, "physmem") <= sizeof (prop)) && |
fa316336e215
6377034 setting physmem in /etc/system does not have desired effect on x86
kchow
parents:
1414
diff
changeset
|
2566 |
(BOP_GETPROP(bootops, "physmem", prop) >= 0) && |
fa316336e215
6377034 setting physmem in /etc/system does not have desired effect on x86
kchow
parents:
1414
diff
changeset
|
2567 |
(kobj_getvalue(prop, &lvalue) != -1)) { |
fa316336e215
6377034 setting physmem in /etc/system does not have desired effect on x86
kchow
parents:
1414
diff
changeset
|
2568 |
physmem = (uintptr_t)lvalue; |
fa316336e215
6377034 setting physmem in /etc/system does not have desired effect on x86
kchow
parents:
1414
diff
changeset
|
2569 |
} |
0 | 2570 |
} |
2571 |
||
2572 |
/* |
|
2573 |
* Add to a memory list. |
|
2574 |
* start = start of new memory segment |
|
2575 |
* len = length of new memory segment in bytes |
|
2576 |
* new = pointer to a new struct memlist |
|
2577 |
* memlistp = memory list to which to add segment. |
|
2578 |
*/ |
|
2579 |
static void |
|
2580 |
memlist_add( |
|
2581 |
uint64_t start, |
|
2582 |
uint64_t len, |
|
2583 |
struct memlist *new, |
|
2584 |
struct memlist **memlistp) |
|
2585 |
{ |
|
2586 |
struct memlist *cur; |
|
2587 |
uint64_t end = start + len; |
|
2588 |
||
2589 |
new->address = start; |
|
2590 |
new->size = len; |
|
2591 |
||
2592 |
cur = *memlistp; |
|
2593 |
||
2594 |
while (cur) { |
|
2595 |
if (cur->address >= end) { |
|
2596 |
new->next = cur; |
|
2597 |
*memlistp = new; |
|
2598 |
new->prev = cur->prev; |
|
2599 |
cur->prev = new; |
|
2600 |
return; |
|
2601 |
} |
|
2602 |
ASSERT(cur->address + cur->size <= start); |
|
2603 |
if (cur->next == NULL) { |
|
2604 |
cur->next = new; |
|
2605 |
new->prev = cur; |
|
2606 |
new->next = NULL; |
|
2607 |
return; |
|
2608 |
} |
|
2609 |
memlistp = &cur->next; |
|
2610 |
cur = cur->next; |
|
2611 |
} |
|
2612 |
} |
|
2613 |
||
2614 |
void |
|
2615 |
kobj_vmem_init(vmem_t **text_arena, vmem_t **data_arena) |
|
2616 |
{ |
|
2617 |
size_t tsize = e_modtext - modtext; |
|
2618 |
size_t dsize = e_moddata - moddata; |
|
2619 |
||
2620 |
*text_arena = vmem_create("module_text", tsize ? modtext : NULL, tsize, |
|
2621 |
1, segkmem_alloc, segkmem_free, heaptext_arena, 0, VM_SLEEP); |
|
2622 |
*data_arena = vmem_create("module_data", dsize ? moddata : NULL, dsize, |
|
2623 |
1, segkmem_alloc, segkmem_free, heap32_arena, 0, VM_SLEEP); |
|
2624 |
} |
|
2625 |
||
2626 |
caddr_t |
|
2627 |
kobj_text_alloc(vmem_t *arena, size_t size) |
|
2628 |
{ |
|
2629 |
return (vmem_alloc(arena, size, VM_SLEEP | VM_BESTFIT)); |
|
2630 |
} |
|
2631 |
||
2632 |
/*ARGSUSED*/ |
|
2633 |
caddr_t |
|
2634 |
kobj_texthole_alloc(caddr_t addr, size_t size) |
|
2635 |
{ |
|
2636 |
panic("unexpected call to kobj_texthole_alloc()"); |
|
2637 |
/*NOTREACHED*/ |
|
2638 |
return (0); |
|
2639 |
} |
|
2640 |
||
2641 |
/*ARGSUSED*/ |
|
2642 |
void |
|
2643 |
kobj_texthole_free(caddr_t addr, size_t size) |
|
2644 |
{ |
|
2645 |
panic("unexpected call to kobj_texthole_free()"); |
|
2646 |
} |
|
2647 |
||
2648 |
/* |
|
2649 |
* This is called just after configure() in startup(). |
|
2650 |
* |
|
2651 |
* The ISALIST concept is a bit hopeless on Intel, because |
|
2652 |
* there's no guarantee of an ever-more-capable processor |
|
2653 |
* given that various parts of the instruction set may appear |
|
2654 |
* and disappear between different implementations. |
|
2655 |
* |
|
2656 |
* While it would be possible to correct it and even enhance |
|
2657 |
* it somewhat, the explicit hardware capability bitmask allows |
|
2658 |
* more flexibility. |
|
2659 |
* |
|
2660 |
* So, we just leave this alone. |
|
2661 |
*/ |
|
2662 |
void |
|
2663 |
setx86isalist(void) |
|
2664 |
{ |
|
2665 |
char *tp; |
|
2666 |
size_t len; |
|
2667 |
extern char *isa_list; |
|
2668 |
||
2669 |
#define TBUFSIZE 1024 |
|
2670 |
||
2671 |
tp = kmem_alloc(TBUFSIZE, KM_SLEEP); |
|
2672 |
*tp = '\0'; |
|
2673 |
||
2674 |
#if defined(__amd64) |
|
2675 |
(void) strcpy(tp, "amd64 "); |
|
2676 |
#endif |
|
2677 |
||
2678 |
switch (x86_vendor) { |
|
2679 |
case X86_VENDOR_Intel: |
|
2680 |
case X86_VENDOR_AMD: |
|
2681 |
case X86_VENDOR_TM: |
|
2682 |
if (x86_feature & X86_CMOV) { |
|
2683 |
/* |
|
2684 |
* Pentium Pro or later |
|
2685 |
*/ |
|
2686 |
(void) strcat(tp, "pentium_pro"); |
|
2687 |
(void) strcat(tp, x86_feature & X86_MMX ? |
|
2688 |
"+mmx pentium_pro " : " "); |
|
2689 |
} |
|
2690 |
/*FALLTHROUGH*/ |
|
2691 |
case X86_VENDOR_Cyrix: |
|
2692 |
/* |
|
2693 |
* The Cyrix 6x86 does not have any Pentium features |
|
2694 |
* accessible while not at privilege level 0. |
|
2695 |
*/ |
|
2696 |
if (x86_feature & X86_CPUID) { |
|
2697 |
(void) strcat(tp, "pentium"); |
|
2698 |
(void) strcat(tp, x86_feature & X86_MMX ? |
|
2699 |
"+mmx pentium " : " "); |
|
2700 |
} |
|
2701 |
break; |
|
2702 |
default: |
|
2703 |
break; |
|
2704 |
} |
|
2705 |
(void) strcat(tp, "i486 i386 i86"); |
|
2706 |
len = strlen(tp) + 1; /* account for NULL at end of string */ |
|
2707 |
isa_list = strcpy(kmem_alloc(len, KM_SLEEP), tp); |
|
2708 |
kmem_free(tp, TBUFSIZE); |
|
2709 |
||
2710 |
#undef TBUFSIZE |
|
2711 |
} |
|
2712 |
||
2713 |
||
2714 |
#ifdef __amd64 |
|
2715 |
||
2716 |
void * |
|
2717 |
device_arena_alloc(size_t size, int vm_flag) |
|
2718 |
{ |
|
2719 |
return (vmem_alloc(device_arena, size, vm_flag)); |
|
2720 |
} |
|
2721 |
||
2722 |
void |
|
2723 |
device_arena_free(void *vaddr, size_t size) |
|
2724 |
{ |
|
2725 |
vmem_free(device_arena, vaddr, size); |
|
2726 |
} |
|
2727 |
||
2728 |
#else |
|
2729 |
||
2730 |
void * |
|
2731 |
device_arena_alloc(size_t size, int vm_flag) |
|
2732 |
{ |
|
2733 |
caddr_t vaddr; |
|
2734 |
uintptr_t v; |
|
2735 |
size_t start; |
|
2736 |
size_t end; |
|
2737 |
||
2738 |
vaddr = vmem_alloc(heap_arena, size, vm_flag); |
|
2739 |
if (vaddr == NULL) |
|
2740 |
return (NULL); |
|
2741 |
||
2742 |
v = (uintptr_t)vaddr; |
|
2743 |
ASSERT(v >= kernelbase); |
|
2744 |
ASSERT(v + size <= ptable_va); |
|
2745 |
||
2746 |
start = btop(v - kernelbase); |
|
2747 |
end = btop(v + size - 1 - kernelbase); |
|
2748 |
ASSERT(start < toxic_bit_map_len); |
|
2749 |
ASSERT(end < toxic_bit_map_len); |
|
2750 |
||
2751 |
while (start <= end) { |
|
2752 |
BT_ATOMIC_SET(toxic_bit_map, start); |
|
2753 |
++start; |
|
2754 |
} |
|
2755 |
return (vaddr); |
|
2756 |
} |
|
2757 |
||
2758 |
void |
|
2759 |
device_arena_free(void *vaddr, size_t size) |
|
2760 |
{ |
|
2761 |
uintptr_t v = (uintptr_t)vaddr; |
|
2762 |
size_t start; |
|
2763 |
size_t end; |
|
2764 |
||
2765 |
ASSERT(v >= kernelbase); |
|
2766 |
ASSERT(v + size <= ptable_va); |
|
2767 |
||
2768 |
start = btop(v - kernelbase); |
|
2769 |
end = btop(v + size - 1 - kernelbase); |
|
2770 |
ASSERT(start < toxic_bit_map_len); |
|
2771 |
ASSERT(end < toxic_bit_map_len); |
|
2772 |
||
2773 |
while (start <= end) { |
|
2774 |
ASSERT(BT_TEST(toxic_bit_map, start) != 0); |
|
2775 |
BT_ATOMIC_CLEAR(toxic_bit_map, start); |
|
2776 |
++start; |
|
2777 |
} |
|
2778 |
vmem_free(heap_arena, vaddr, size); |
|
2779 |
} |
|
2780 |
||
2781 |
/* |
|
2782 |
* returns 1st address in range that is in device arena, or NULL |
|
2783 |
* if len is not NULL it returns the length of the toxic range |
|
2784 |
*/ |
|
2785 |
void * |
|
2786 |
device_arena_contains(void *vaddr, size_t size, size_t *len) |
|
2787 |
{ |
|
2788 |
uintptr_t v = (uintptr_t)vaddr; |
|
2789 |
uintptr_t eaddr = v + size; |
|
2790 |
size_t start; |
|
2791 |
size_t end; |
|
2792 |
||
2793 |
/* |
|
2794 |
* if called very early by kmdb, just return NULL |
|
2795 |
*/ |
|
2796 |
if (toxic_bit_map == NULL) |
|
2797 |
return (NULL); |
|
2798 |
||
2799 |
/* |
|
2800 |
* First check if we're completely outside the bitmap range. |
|
2801 |
*/ |
|
2802 |
if (v >= ptable_va || eaddr < kernelbase) |
|
2803 |
return (NULL); |
|
2804 |
||
2805 |
/* |
|
2806 |
* Trim ends of search to look at only what the bitmap covers. |
|
2807 |
*/ |
|
2808 |
if (v < kernelbase) |
|
2809 |
v = kernelbase; |
|
2810 |
start = btop(v - kernelbase); |
|
2811 |
end = btop(eaddr - kernelbase); |
|
2812 |
if (end >= toxic_bit_map_len) |
|
2813 |
end = toxic_bit_map_len; |
|
2814 |
||
2815 |
if (bt_range(toxic_bit_map, &start, &end, end) == 0) |
|
2816 |
return (NULL); |
|
2817 |
||
2818 |
v = kernelbase + ptob(start); |
|
2819 |
if (len != NULL) |
|
2820 |
*len = ptob(end - start); |
|
2821 |
return ((void *)v); |
|
2822 |
} |
|
2823 |
||
2824 |
#endif |