author | andrei |
Fri, 17 Feb 2006 17:24:21 -0800 | |
changeset 1455 | b43f098fa50c |
parent 1253 | 0df630a41817 |
child 1492 | 8877aa3d25de |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1253 | 5 |
* Common Development and Distribution License (the "License"). |
1455
b43f098fa50c
6378953 allocation of interrupt threads could be more common
andrei
parents:
1253
diff
changeset
|
6 |
* You may not use this file except in compliance with the License. |
0 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
1253 | 21 |
|
0 | 22 |
/* |
1253 | 23 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
0 | 24 |
* Use is subject to license terms. |
25 |
*/ |
|
26 |
||
27 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
28 |
||
29 |
#include <sys/machsystm.h> |
|
30 |
#include <sys/archsystm.h> |
|
31 |
#include <sys/vm.h> |
|
32 |
#include <sys/cpu.h> |
|
33 |
#include <sys/atomic.h> |
|
34 |
#include <sys/reboot.h> |
|
35 |
#include <sys/kdi.h> |
|
36 |
#include <sys/bootconf.h> |
|
37 |
#include <sys/memlist_plat.h> |
|
38 |
#include <sys/memlist_impl.h> |
|
39 |
#include <sys/prom_plat.h> |
|
40 |
#include <sys/prom_isa.h> |
|
41 |
#include <sys/autoconf.h> |
|
42 |
#include <sys/intreg.h> |
|
43 |
#include <sys/ivintr.h> |
|
44 |
#include <sys/fpu/fpusystm.h> |
|
45 |
#include <sys/iommutsb.h> |
|
46 |
#include <vm/vm_dep.h> |
|
47 |
#include <vm/seg_dev.h> |
|
48 |
#include <vm/seg_kmem.h> |
|
49 |
#include <vm/seg_kpm.h> |
|
50 |
#include <vm/seg_map.h> |
|
51 |
#include <vm/seg_kp.h> |
|
52 |
#include <sys/sysconf.h> |
|
53 |
#include <vm/hat_sfmmu.h> |
|
54 |
#include <sys/kobj.h> |
|
55 |
#include <sys/sun4asi.h> |
|
56 |
#include <sys/clconf.h> |
|
57 |
#include <sys/platform_module.h> |
|
58 |
#include <sys/panic.h> |
|
59 |
#include <sys/cpu_sgnblk_defs.h> |
|
60 |
#include <sys/clock.h> |
|
61 |
#include <sys/cmn_err.h> |
|
62 |
#include <sys/promif.h> |
|
63 |
#include <sys/prom_debug.h> |
|
64 |
#include <sys/traptrace.h> |
|
65 |
#include <sys/memnode.h> |
|
66 |
#include <sys/mem_cage.h> |
|
67 |
||
68 |
extern void setup_trap_table(void); |
|
69 |
extern void cpu_intrq_setup(struct cpu *); |
|
70 |
extern void cpu_intrq_register(struct cpu *); |
|
71 |
extern void contig_mem_init(void); |
|
72 |
extern void mach_dump_buffer_init(void); |
|
73 |
extern void mach_descrip_init(void); |
|
74 |
extern void mach_memscrub(void); |
|
75 |
extern void mach_fpras(void); |
|
76 |
extern void mach_cpu_halt_idle(void); |
|
77 |
extern void mach_hw_copy_limit(void); |
|
78 |
extern void load_tod_module(void); |
|
79 |
#pragma weak load_tod_module |
|
80 |
||
81 |
extern int ndata_alloc_mmfsa(struct memlist *ndata); |
|
82 |
#pragma weak ndata_alloc_mmfsa |
|
83 |
||
84 |
extern void parse_idprom(void); |
|
85 |
extern void add_vx_handler(char *, int, void (*)(cell_t *)); |
|
86 |
extern void mem_config_init(void); |
|
87 |
extern void memseg_remap_init(void); |
|
88 |
||
89 |
/* |
|
90 |
* External Data: |
|
91 |
*/ |
|
92 |
extern int vac_size; /* cache size in bytes */ |
|
93 |
extern uint_t vac_mask; /* VAC alignment consistency mask */ |
|
94 |
extern uint_t vac_colors; |
|
95 |
||
96 |
/* |
|
97 |
* Global Data Definitions: |
|
98 |
*/ |
|
99 |
||
100 |
/* |
|
101 |
* XXX - Don't port this to new architectures |
|
102 |
* A 3rd party volume manager driver (vxdm) depends on the symbol romp. |
|
103 |
* 'romp' has no use with a prom with an IEEE 1275 client interface. |
|
104 |
* The driver doesn't use the value, but it depends on the symbol. |
|
105 |
*/ |
|
106 |
void *romp; /* veritas driver won't load without romp 4154976 */ |
|
107 |
/* |
|
108 |
* Declare these as initialized data so we can patch them. |
|
109 |
*/ |
|
110 |
pgcnt_t physmem = 0; /* memory size in pages, patch if you want less */ |
|
111 |
pgcnt_t segkpsize = |
|
112 |
btop(SEGKPDEFSIZE); /* size of segkp segment in pages */ |
|
113 |
uint_t segmap_percent = 12; /* Size of segmap segment */ |
|
114 |
||
115 |
int use_cache = 1; /* cache not reliable (605 bugs) with MP */ |
|
116 |
int vac_copyback = 1; |
|
117 |
char *cache_mode = NULL; |
|
118 |
int use_mix = 1; |
|
119 |
int prom_debug = 0; |
|
120 |
||
121 |
struct bootops *bootops = 0; /* passed in from boot in %o2 */ |
|
122 |
caddr_t boot_tba; /* %tba at boot - used by kmdb */ |
|
123 |
uint_t tba_taken_over = 0; |
|
124 |
||
125 |
caddr_t s_text; /* start of kernel text segment */ |
|
126 |
caddr_t e_text; /* end of kernel text segment */ |
|
127 |
caddr_t s_data; /* start of kernel data segment */ |
|
128 |
caddr_t e_data; /* end of kernel data segment */ |
|
129 |
||
130 |
caddr_t modtext; /* beginning of module text */ |
|
131 |
size_t modtext_sz; /* size of module text */ |
|
132 |
caddr_t moddata; /* beginning of module data reserve */ |
|
133 |
caddr_t e_moddata; /* end of module data reserve */ |
|
134 |
||
135 |
/* |
|
136 |
* End of first block of contiguous kernel in 32-bit virtual address space |
|
137 |
*/ |
|
138 |
caddr_t econtig32; /* end of first blk of contiguous kernel */ |
|
139 |
||
140 |
caddr_t ncbase; /* beginning of non-cached segment */ |
|
141 |
caddr_t ncend; /* end of non-cached segment */ |
|
142 |
caddr_t sdata; /* beginning of data segment */ |
|
143 |
||
144 |
caddr_t extra_etva; /* beginning of unused nucleus text */ |
|
145 |
pgcnt_t extra_etpg; /* number of pages of unused nucleus text */ |
|
146 |
||
147 |
size_t ndata_remain_sz; /* bytes from end of data to 4MB boundary */ |
|
148 |
caddr_t nalloc_base; /* beginning of nucleus allocation */ |
|
149 |
caddr_t nalloc_end; /* end of nucleus allocatable memory */ |
|
150 |
caddr_t valloc_base; /* beginning of kvalloc segment */ |
|
151 |
||
152 |
caddr_t kmem64_base; /* base of kernel mem segment in 64-bit space */ |
|
153 |
caddr_t kmem64_end; /* end of kernel mem segment in 64-bit space */ |
|
154 |
||
155 |
uintptr_t shm_alignment = 0; /* VAC address consistency modulus */ |
|
156 |
struct memlist *phys_install; /* Total installed physical memory */ |
|
157 |
struct memlist *phys_avail; /* Available (unreserved) physical memory */ |
|
158 |
struct memlist *virt_avail; /* Available (unmapped?) virtual memory */ |
|
159 |
struct memlist ndata; /* memlist of nucleus allocatable memory */ |
|
160 |
int memexp_flag; /* memory expansion card flag */ |
|
161 |
uint64_t ecache_flushaddr; /* physical address used for flushing E$ */ |
|
162 |
pgcnt_t obp_pages; /* Physical pages used by OBP */ |
|
163 |
||
164 |
/* |
|
165 |
* VM data structures |
|
166 |
*/ |
|
167 |
long page_hashsz; /* Size of page hash table (power of two) */ |
|
168 |
struct page *pp_base; /* Base of system page struct array */ |
|
169 |
size_t pp_sz; /* Size in bytes of page struct array */ |
|
170 |
struct page **page_hash; /* Page hash table */ |
|
171 |
struct seg ktextseg; /* Segment used for kernel executable image */ |
|
172 |
struct seg kvalloc; /* Segment used for "valloc" mapping */ |
|
173 |
struct seg kpseg; /* Segment used for pageable kernel virt mem */ |
|
174 |
struct seg ktexthole; /* Segment used for nucleus text hole */ |
|
175 |
struct seg kmapseg; /* Segment used for generic kernel mappings */ |
|
176 |
struct seg kpmseg; /* Segment used for physical mapping */ |
|
177 |
struct seg kdebugseg; /* Segment used for the kernel debugger */ |
|
178 |
||
179 |
uintptr_t kpm_pp_base; /* Base of system kpm_page array */ |
|
180 |
size_t kpm_pp_sz; /* Size of system kpm_page array */ |
|
181 |
pgcnt_t kpm_npages; /* How many kpm pages are managed */ |
|
182 |
||
183 |
struct seg *segkp = &kpseg; /* Pageable kernel virtual memory segment */ |
|
184 |
struct seg *segkmap = &kmapseg; /* Kernel generic mapping segment */ |
|
185 |
struct seg *segkpm = &kpmseg; /* 64bit kernel physical mapping segment */ |
|
186 |
||
187 |
/* |
|
188 |
* debugger pages (if allocated) |
|
189 |
*/ |
|
190 |
struct vnode kdebugvp; |
|
191 |
||
192 |
/* |
|
193 |
* Segment for relocated kernel structures in 64-bit large RAM kernels |
|
194 |
*/ |
|
195 |
struct seg kmem64; |
|
196 |
||
197 |
struct memseg *memseg_base; |
|
198 |
size_t memseg_sz; /* Used to translate a va to page */ |
|
199 |
struct vnode unused_pages_vp; |
|
200 |
||
201 |
/* |
|
202 |
* VM data structures allocated early during boot. |
|
203 |
*/ |
|
204 |
size_t pagehash_sz; |
|
205 |
uint64_t memlist_sz; |
|
206 |
||
207 |
char tbr_wr_addr_inited = 0; |
|
208 |
||
209 |
||
210 |
/* |
|
211 |
* Static Routines: |
|
212 |
*/ |
|
213 |
static void memlist_add(uint64_t, uint64_t, struct memlist **, |
|
214 |
struct memlist **); |
|
215 |
static void kphysm_init(page_t *, struct memseg *, pgcnt_t, uintptr_t, |
|
216 |
pgcnt_t); |
|
217 |
static void kvm_init(void); |
|
218 |
||
219 |
static void startup_init(void); |
|
220 |
static void startup_memlist(void); |
|
221 |
static void startup_modules(void); |
|
222 |
static void startup_bop_gone(void); |
|
223 |
static void startup_vm(void); |
|
224 |
static void startup_end(void); |
|
225 |
static void setup_cage_params(void); |
|
1253 | 226 |
static void startup_create_io_node(void); |
0 | 227 |
|
228 |
static pgcnt_t npages; |
|
229 |
static struct memlist *memlist; |
|
230 |
void *memlist_end; |
|
231 |
||
232 |
static pgcnt_t bop_alloc_pages; |
|
233 |
static caddr_t hblk_base; |
|
234 |
uint_t hblk_alloc_dynamic = 0; |
|
235 |
uint_t hblk1_min = H1MIN; |
|
236 |
uint_t hblk8_min; |
|
237 |
||
238 |
||
239 |
/* |
|
240 |
* Hooks for unsupported platforms and down-rev firmware |
|
241 |
*/ |
|
242 |
int iam_positron(void); |
|
243 |
#pragma weak iam_positron |
|
244 |
static void do_prom_version_check(void); |
|
245 |
static void kpm_init(void); |
|
246 |
static void kpm_npages_setup(int); |
|
247 |
static void kpm_memseg_init(void); |
|
248 |
||
249 |
/* |
|
250 |
* After receiving a thermal interrupt, this is the number of seconds |
|
251 |
* to delay before shutting off the system, assuming |
|
252 |
* shutdown fails. Use /etc/system to change the delay if this isn't |
|
253 |
* large enough. |
|
254 |
*/ |
|
255 |
int thermal_powerdown_delay = 1200; |
|
256 |
||
257 |
/* |
|
258 |
* Used to hold off page relocations into the cage until OBP has completed |
|
259 |
* its boot-time handoff of its resources to the kernel. |
|
260 |
*/ |
|
261 |
int page_relocate_ready = 0; |
|
262 |
||
263 |
/* |
|
264 |
* Enable some debugging messages concerning memory usage... |
|
265 |
*/ |
|
266 |
#ifdef DEBUGGING_MEM |
|
267 |
static int debugging_mem; |
|
268 |
static void |
|
269 |
printmemlist(char *title, struct memlist *list) |
|
270 |
{ |
|
271 |
if (!debugging_mem) |
|
272 |
return; |
|
273 |
||
274 |
printf("%s\n", title); |
|
275 |
||
276 |
while (list) { |
|
277 |
prom_printf("\taddr = 0x%x %8x, size = 0x%x %8x\n", |
|
278 |
(uint32_t)(list->address >> 32), (uint32_t)list->address, |
|
279 |
(uint32_t)(list->size >> 32), (uint32_t)(list->size)); |
|
280 |
list = list->next; |
|
281 |
} |
|
282 |
} |
|
283 |
||
284 |
void |
|
285 |
printmemseg(struct memseg *memseg) |
|
286 |
{ |
|
287 |
if (!debugging_mem) |
|
288 |
return; |
|
289 |
||
290 |
printf("memseg\n"); |
|
291 |
||
292 |
while (memseg) { |
|
293 |
prom_printf("\tpage = 0x%p, epage = 0x%p, " |
|
294 |
"pfn = 0x%x, epfn = 0x%x\n", |
|
295 |
memseg->pages, memseg->epages, |
|
296 |
memseg->pages_base, memseg->pages_end); |
|
297 |
memseg = memseg->next; |
|
298 |
} |
|
299 |
} |
|
300 |
||
301 |
#define debug_pause(str) halt((str)) |
|
302 |
#define MPRINTF(str) if (debugging_mem) prom_printf((str)) |
|
303 |
#define MPRINTF1(str, a) if (debugging_mem) prom_printf((str), (a)) |
|
304 |
#define MPRINTF2(str, a, b) if (debugging_mem) prom_printf((str), (a), (b)) |
|
305 |
#define MPRINTF3(str, a, b, c) \ |
|
306 |
if (debugging_mem) prom_printf((str), (a), (b), (c)) |
|
307 |
#else /* DEBUGGING_MEM */ |
|
308 |
#define MPRINTF(str) |
|
309 |
#define MPRINTF1(str, a) |
|
310 |
#define MPRINTF2(str, a, b) |
|
311 |
#define MPRINTF3(str, a, b, c) |
|
312 |
#endif /* DEBUGGING_MEM */ |
|
313 |
||
314 |
/* Simple message to indicate that the bootops pointer has been zeroed */ |
|
315 |
#ifdef DEBUG |
|
316 |
static int bootops_gone_on = 0; |
|
317 |
#define BOOTOPS_GONE() \ |
|
318 |
if (bootops_gone_on) \ |
|
319 |
prom_printf("The bootops vec is zeroed now!\n"); |
|
320 |
#else |
|
321 |
#define BOOTOPS_GONE() |
|
322 |
#endif /* DEBUG */ |
|
323 |
||
324 |
/* |
|
325 |
* Monitor pages may not be where this says they are. |
|
326 |
* and the debugger may not be there either. |
|
327 |
* |
|
328 |
* Note that 'pages' here are *physical* pages, which are 8k on sun4u. |
|
329 |
* |
|
330 |
* Physical memory layout |
|
331 |
* (not necessarily contiguous) |
|
332 |
* (THIS IS SOMEWHAT WRONG) |
|
333 |
* /-----------------------\ |
|
334 |
* | monitor pages | |
|
335 |
* availmem -|-----------------------| |
|
336 |
* | | |
|
337 |
* | page pool | |
|
338 |
* | | |
|
339 |
* |-----------------------| |
|
340 |
* | configured tables | |
|
341 |
* | buffers | |
|
342 |
* firstaddr -|-----------------------| |
|
343 |
* | hat data structures | |
|
344 |
* |-----------------------| |
|
345 |
* | kernel data, bss | |
|
346 |
* |-----------------------| |
|
347 |
* | interrupt stack | |
|
348 |
* |-----------------------| |
|
349 |
* | kernel text (RO) | |
|
350 |
* |-----------------------| |
|
351 |
* | trap table (4k) | |
|
352 |
* |-----------------------| |
|
353 |
* page 1 | panicbuf | |
|
354 |
* |-----------------------| |
|
355 |
* page 0 | reclaimed | |
|
356 |
* |_______________________| |
|
357 |
* |
|
358 |
* |
|
359 |
* |
|
360 |
* Kernel's Virtual Memory Layout. |
|
361 |
* /-----------------------\ |
|
362 |
* 0xFFFFFFFF.FFFFFFFF -| |- |
|
363 |
* | OBP's virtual page | |
|
364 |
* | tables | |
|
365 |
* 0xFFFFFFFC.00000000 -|-----------------------|- |
|
366 |
* : : |
|
367 |
* : : |
|
368 |
* 0xFFFFFE00.00000000 -|-----------------------|- |
|
369 |
* | | Ultrasparc I/II support |
|
370 |
* | segkpm segment | up to 2TB of physical |
|
371 |
* | (64-bit kernel ONLY) | memory, VAC has 2 colors |
|
372 |
* | | |
|
373 |
* 0xFFFFFA00.00000000 -|-----------------------|- 2TB segkpm alignment |
|
374 |
* : : |
|
375 |
* : : |
|
376 |
* 0xFFFFF810.00000000 -|-----------------------|- hole_end |
|
377 |
* | | ^ |
|
378 |
* | UltraSPARC I/II call | | |
|
379 |
* | bug requires an extra | | |
|
380 |
* | 4 GB of space between | | |
|
381 |
* | hole and used RAM | | |
|
382 |
* | | | |
|
383 |
* 0xFFFFF800.00000000 -|-----------------------|- | |
|
384 |
* | | | |
|
385 |
* | Virtual Address Hole | UltraSPARC |
|
386 |
* | on UltraSPARC I/II | I/II * ONLY * |
|
387 |
* | | | |
|
388 |
* 0x00000800.00000000 -|-----------------------|- | |
|
389 |
* | | | |
|
390 |
* | UltraSPARC I/II call | | |
|
391 |
* | bug requires an extra | | |
|
392 |
* | 4 GB of space between | | |
|
393 |
* | hole and used RAM | | |
|
394 |
* | | v |
|
395 |
* 0x000007FF.00000000 -|-----------------------|- hole_start ----- |
|
396 |
* : : ^ |
|
397 |
* : : | |
|
398 |
* 0x00000XXX.XXXXXXXX -|-----------------------|- kmem64_end | |
|
399 |
* | | | |
|
400 |
* | 64-bit kernel ONLY | | |
|
401 |
* | | | |
|
402 |
* | kmem64 segment | | |
|
403 |
* | | | |
|
404 |
* | (Relocated extra HME | Approximately |
|
405 |
* | block allocations, | 1 TB of virtual |
|
406 |
* | memnode freelists, | address space |
|
407 |
* | HME hash buckets, | | |
|
408 |
* | mml_table, kpmp_table,| | |
|
409 |
* | page_t array and | | |
|
410 |
* | hashblock pool to | | |
|
411 |
* | avoid hard-coded | | |
|
412 |
* | 32-bit vaddr | | |
|
413 |
* | limitations) | | |
|
414 |
* | | v |
|
415 |
* 0x00000700.00000000 -|-----------------------|- SYSLIMIT (kmem64_base) |
|
416 |
* | | |
|
417 |
* | segkmem segment | (SYSLIMIT - SYSBASE = 4TB) |
|
418 |
* | | |
|
419 |
* 0x00000300.00000000 -|-----------------------|- SYSBASE |
|
420 |
* : : |
|
421 |
* : : |
|
422 |
* -|-----------------------|- |
|
423 |
* | | |
|
424 |
* | segmap segment | SEGMAPSIZE (1/8th physmem, |
|
425 |
* | | 256G MAX) |
|
426 |
* 0x000002a7.50000000 -|-----------------------|- SEGMAPBASE |
|
427 |
* : : |
|
428 |
* : : |
|
429 |
* -|-----------------------|- |
|
430 |
* | | |
|
431 |
* | segkp | SEGKPSIZE (2GB) |
|
432 |
* | | |
|
433 |
* | | |
|
434 |
* 0x000002a1.00000000 -|-----------------------|- SEGKPBASE |
|
435 |
* | | |
|
436 |
* 0x000002a0.00000000 -|-----------------------|- MEMSCRUBBASE |
|
437 |
* | | (SEGKPBASE - 0x400000) |
|
438 |
* 0x0000029F.FFE00000 -|-----------------------|- ARGSBASE |
|
439 |
* | | (MEMSCRUBBASE - NCARGS) |
|
440 |
* 0x0000029F.FFD80000 -|-----------------------|- PPMAPBASE |
|
441 |
* | | (ARGSBASE - PPMAPSIZE) |
|
442 |
* 0x0000029F.FFD00000 -|-----------------------|- PPMAP_FAST_BASE |
|
443 |
* | | |
|
444 |
* 0x0000029F.FF980000 -|-----------------------|- PIOMAPBASE |
|
445 |
* | | |
|
446 |
* 0x0000029F.FF580000 -|-----------------------|- NARG_BASE |
|
447 |
* : : |
|
448 |
* : : |
|
449 |
* 0x00000000.FFFFFFFF -|-----------------------|- OFW_END_ADDR |
|
450 |
* | | |
|
451 |
* | OBP | |
|
452 |
* | | |
|
453 |
* 0x00000000.F0000000 -|-----------------------|- OFW_START_ADDR |
|
454 |
* | kmdb | |
|
455 |
* 0x00000000.EDD00000 -|-----------------------|- SEGDEBUGBASE |
|
456 |
* : : |
|
457 |
* : : |
|
458 |
* 0x00000000.7c000000 -|-----------------------|- SYSLIMIT32 |
|
459 |
* | | |
|
460 |
* | segkmem32 segment | (SYSLIMIT32 - SYSBASE32 = |
|
461 |
* | | ~64MB) |
|
462 |
* 0x00000000.78002000 -|-----------------------| |
|
463 |
* | panicbuf | |
|
464 |
* 0x00000000.78000000 -|-----------------------|- SYSBASE32 |
|
465 |
* : : |
|
466 |
* : : |
|
467 |
* | | |
|
468 |
* |-----------------------|- econtig32 |
|
469 |
* | vm structures | |
|
470 |
* 0x00000000.01C00000 |-----------------------|- nalloc_end |
|
471 |
* | TSBs | |
|
472 |
* |-----------------------|- end/nalloc_base |
|
473 |
* | kernel data & bss | |
|
474 |
* 0x00000000.01800000 -|-----------------------| |
|
475 |
* : nucleus text hole : |
|
476 |
* 0x00000000.01400000 -|-----------------------| |
|
477 |
* : : |
|
478 |
* |-----------------------| |
|
479 |
* | module text | |
|
480 |
* |-----------------------|- e_text/modtext |
|
481 |
* | kernel text | |
|
482 |
* |-----------------------| |
|
483 |
* | trap table (48k) | |
|
484 |
* 0x00000000.01000000 -|-----------------------|- KERNELBASE |
|
485 |
* | reserved for trapstat |} TSTAT_TOTAL_SIZE |
|
486 |
* |-----------------------| |
|
487 |
* | | |
|
488 |
* | invalid | |
|
489 |
* | | |
|
490 |
* 0x00000000.00000000 _|_______________________| |
|
491 |
* |
|
492 |
* |
|
493 |
* |
|
494 |
* 32-bit User Virtual Memory Layout. |
|
495 |
* /-----------------------\ |
|
496 |
* | | |
|
497 |
* | invalid | |
|
498 |
* | | |
|
499 |
* 0xFFC00000 -|-----------------------|- USERLIMIT |
|
500 |
* | user stack | |
|
501 |
* : : |
|
502 |
* : : |
|
503 |
* : : |
|
504 |
* | user data | |
|
505 |
* -|-----------------------|- |
|
506 |
* | user text | |
|
507 |
* 0x00002000 -|-----------------------|- |
|
508 |
* | invalid | |
|
509 |
* 0x00000000 _|_______________________| |
|
510 |
* |
|
511 |
* |
|
512 |
* |
|
513 |
* 64-bit User Virtual Memory Layout. |
|
514 |
* /-----------------------\ |
|
515 |
* | | |
|
516 |
* | invalid | |
|
517 |
* | | |
|
518 |
* 0xFFFFFFFF.80000000 -|-----------------------|- USERLIMIT |
|
519 |
* | user stack | |
|
520 |
* : : |
|
521 |
* : : |
|
522 |
* : : |
|
523 |
* | user data | |
|
524 |
* -|-----------------------|- |
|
525 |
* | user text | |
|
526 |
* 0x00000000.00100000 -|-----------------------|- |
|
527 |
* | invalid | |
|
528 |
* 0x00000000.00000000 _|_______________________| |
|
529 |
*/ |
|
530 |
||
531 |
extern caddr_t ecache_init_scrub_flush_area(caddr_t alloc_base); |
|
532 |
extern uint64_t ecache_flush_address(void); |
|
533 |
||
534 |
#pragma weak load_platform_modules |
|
535 |
#pragma weak starcat_startup_memlist |
|
536 |
#pragma weak ecache_init_scrub_flush_area |
|
537 |
#pragma weak ecache_flush_address |
|
538 |
||
539 |
||
540 |
/* |
|
541 |
* By default the DR Cage is enabled for maximum OS |
|
542 |
* MPSS performance. Users needing to disable the cage mechanism |
|
543 |
* can set this variable to zero via /etc/system. |
|
544 |
* Disabling the cage on systems supporting Dynamic Reconfiguration (DR) |
|
545 |
* will result in loss of DR functionality. |
|
546 |
* Platforms wishing to disable kernel Cage by default |
|
547 |
* should do so in their set_platform_defaults() routine. |
|
548 |
*/ |
|
549 |
int kernel_cage_enable = 1; |
|
550 |
||
551 |
static void |
|
552 |
setup_cage_params(void) |
|
553 |
{ |
|
554 |
void (*func)(void); |
|
555 |
||
556 |
func = (void (*)(void))kobj_getsymvalue("set_platform_cage_params", 0); |
|
557 |
if (func != NULL) { |
|
558 |
(*func)(); |
|
559 |
return; |
|
560 |
} |
|
561 |
||
562 |
if (kernel_cage_enable == 0) { |
|
563 |
return; |
|
564 |
} |
|
565 |
kcage_range_lock(); |
|
566 |
if (kcage_range_init(phys_avail, 1) == 0) { |
|
567 |
kcage_init(total_pages / 256); |
|
568 |
} |
|
569 |
kcage_range_unlock(); |
|
570 |
||
571 |
if (kcage_on) { |
|
572 |
cmn_err(CE_NOTE, "!Kernel Cage is ENABLED"); |
|
573 |
} else { |
|
574 |
cmn_err(CE_NOTE, "!Kernel Cage is DISABLED"); |
|
575 |
} |
|
576 |
||
577 |
} |
|
578 |
||
579 |
/* |
|
580 |
* Machine-dependent startup code |
|
581 |
*/ |
|
582 |
void |
|
583 |
startup(void) |
|
584 |
{ |
|
585 |
startup_init(); |
|
586 |
if (&startup_platform) |
|
587 |
startup_platform(); |
|
588 |
startup_memlist(); |
|
589 |
startup_modules(); |
|
590 |
setup_cage_params(); |
|
591 |
startup_bop_gone(); |
|
592 |
startup_vm(); |
|
593 |
startup_end(); |
|
594 |
} |
|
595 |
||
596 |
struct regs sync_reg_buf; |
|
597 |
uint64_t sync_tt; |
|
598 |
||
599 |
void |
|
600 |
sync_handler(void) |
|
601 |
{ |
|
602 |
struct trap_info ti; |
|
603 |
int i; |
|
604 |
||
605 |
/* |
|
606 |
* Prevent trying to talk to the other CPUs since they are |
|
607 |
* sitting in the prom and won't reply. |
|
608 |
*/ |
|
609 |
for (i = 0; i < NCPU; i++) { |
|
610 |
if ((i != CPU->cpu_id) && CPU_XCALL_READY(i)) { |
|
611 |
cpu[i]->cpu_flags &= ~CPU_READY; |
|
612 |
cpu[i]->cpu_flags |= CPU_QUIESCED; |
|
613 |
CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id); |
|
614 |
} |
|
615 |
} |
|
616 |
||
617 |
/* |
|
618 |
* We've managed to get here without going through the |
|
619 |
* normal panic code path. Try and save some useful |
|
620 |
* information. |
|
621 |
*/ |
|
622 |
if (!panicstr && (curthread->t_panic_trap == NULL)) { |
|
623 |
ti.trap_type = sync_tt; |
|
624 |
ti.trap_regs = &sync_reg_buf; |
|
625 |
ti.trap_addr = NULL; |
|
626 |
ti.trap_mmu_fsr = 0x0; |
|
627 |
||
628 |
curthread->t_panic_trap = &ti; |
|
629 |
} |
|
630 |
||
631 |
/* |
|
632 |
* If we're re-entering the panic path, update the signature |
|
633 |
* block so that the SC knows we're in the second part of panic. |
|
634 |
*/ |
|
635 |
if (panicstr) |
|
636 |
CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1); |
|
637 |
||
638 |
nopanicdebug = 1; /* do not perform debug_enter() prior to dump */ |
|
639 |
panic("sync initiated"); |
|
640 |
} |
|
641 |
||
642 |
||
643 |
static void |
|
644 |
startup_init(void) |
|
645 |
{ |
|
646 |
/* |
|
647 |
* We want to save the registers while we're still in OBP |
|
648 |
* so that we know they haven't been fiddled with since. |
|
649 |
* (In principle, OBP can't change them just because it |
|
650 |
* makes a callback, but we'd rather not depend on that |
|
651 |
* behavior.) |
|
652 |
*/ |
|
653 |
char sync_str[] = |
|
654 |
"warning @ warning off : sync " |
|
655 |
"%%tl-c %%tstate h# %p x! " |
|
656 |
"%%g1 h# %p x! %%g2 h# %p x! %%g3 h# %p x! " |
|
657 |
"%%g4 h# %p x! %%g5 h# %p x! %%g6 h# %p x! " |
|
658 |
"%%g7 h# %p x! %%o0 h# %p x! %%o1 h# %p x! " |
|
659 |
"%%o2 h# %p x! %%o3 h# %p x! %%o4 h# %p x! " |
|
660 |
"%%o5 h# %p x! %%o6 h# %p x! %%o7 h# %p x! " |
|
661 |
"%%tl-c %%tpc h# %p x! %%tl-c %%tnpc h# %p x! " |
|
662 |
"%%y h# %p l! %%tl-c %%tt h# %p x! " |
|
663 |
"sync ; warning !"; |
|
664 |
||
665 |
/* |
|
666 |
* 20 == num of %p substrings |
|
667 |
* 16 == max num of chars %p will expand to. |
|
668 |
*/ |
|
669 |
char bp[sizeof (sync_str) + 16 * 20]; |
|
670 |
||
671 |
(void) check_boot_version(BOP_GETVERSION(bootops)); |
|
672 |
||
673 |
/* |
|
674 |
* Initialize ptl1 stack for the 1st CPU. |
|
675 |
*/ |
|
676 |
ptl1_init_cpu(&cpu0); |
|
677 |
||
678 |
/* |
|
679 |
* Initialize the address map for cache consistent mappings |
|
680 |
* to random pages; must be done after vac_size is set. |
|
681 |
*/ |
|
682 |
ppmapinit(); |
|
683 |
||
684 |
/* |
|
685 |
* Initialize the PROM callback handler. |
|
686 |
*/ |
|
687 |
init_vx_handler(); |
|
688 |
||
689 |
/* |
|
690 |
* have prom call sync_callback() to handle the sync and |
|
691 |
* save some useful information which will be stored in the |
|
692 |
* core file later. |
|
693 |
*/ |
|
694 |
(void) sprintf((char *)bp, sync_str, |
|
695 |
(void *)&sync_reg_buf.r_tstate, (void *)&sync_reg_buf.r_g1, |
|
696 |
(void *)&sync_reg_buf.r_g2, (void *)&sync_reg_buf.r_g3, |
|
697 |
(void *)&sync_reg_buf.r_g4, (void *)&sync_reg_buf.r_g5, |
|
698 |
(void *)&sync_reg_buf.r_g6, (void *)&sync_reg_buf.r_g7, |
|
699 |
(void *)&sync_reg_buf.r_o0, (void *)&sync_reg_buf.r_o1, |
|
700 |
(void *)&sync_reg_buf.r_o2, (void *)&sync_reg_buf.r_o3, |
|
701 |
(void *)&sync_reg_buf.r_o4, (void *)&sync_reg_buf.r_o5, |
|
702 |
(void *)&sync_reg_buf.r_o6, (void *)&sync_reg_buf.r_o7, |
|
703 |
(void *)&sync_reg_buf.r_pc, (void *)&sync_reg_buf.r_npc, |
|
704 |
(void *)&sync_reg_buf.r_y, (void *)&sync_tt); |
|
705 |
prom_interpret(bp, 0, 0, 0, 0, 0); |
|
706 |
add_vx_handler("sync", 1, (void (*)(cell_t *))sync_handler); |
|
707 |
} |
|
708 |
||
709 |
static u_longlong_t *boot_physinstalled, *boot_physavail, *boot_virtavail; |
|
710 |
static size_t boot_physinstalled_len, boot_physavail_len, boot_virtavail_len; |
|
711 |
||
712 |
#define IVSIZE ((MAXIVNUM + 1) * sizeof (struct intr_vector)) |
|
713 |
||
714 |
/* |
|
715 |
* As OBP takes up some RAM when the system boots, pages will already be "lost" |
|
716 |
* to the system and reflected in npages by the time we see it. |
|
717 |
* |
|
718 |
* We only want to allocate kernel structures in the 64-bit virtual address |
|
719 |
* space on systems with enough RAM to make the overhead of keeping track of |
|
720 |
* an extra kernel memory segment worthwhile. |
|
721 |
* |
|
722 |
* Since OBP has already performed its memory allocations by this point, if we |
|
723 |
* have more than MINMOVE_RAM_MB MB of RAM left free, go ahead and map |
|
724 |
* memory in the 64-bit virtual address space; otherwise keep allocations |
|
725 |
* contiguous with we've mapped so far in the 32-bit virtual address space. |
|
726 |
*/ |
|
727 |
#define MINMOVE_RAM_MB ((size_t)1900) |
|
728 |
#define MB_TO_BYTES(mb) ((mb) * 1048576ul) |
|
729 |
||
730 |
pgcnt_t tune_npages = (pgcnt_t) |
|
731 |
(MB_TO_BYTES(MINMOVE_RAM_MB)/ (size_t)MMU_PAGESIZE); |
|
732 |
||
733 |
static void |
|
734 |
startup_memlist(void) |
|
735 |
{ |
|
736 |
size_t alloc_sz; |
|
737 |
size_t ctrs_sz; |
|
738 |
caddr_t alloc_base; |
|
739 |
caddr_t ctrs_base, ctrs_end; |
|
740 |
caddr_t memspace; |
|
741 |
caddr_t va; |
|
742 |
int memblocks = 0; |
|
743 |
struct memlist *cur; |
|
744 |
size_t syslimit = (size_t)SYSLIMIT; |
|
745 |
size_t sysbase = (size_t)SYSBASE; |
|
746 |
int alloc_alignsize = MMU_PAGESIZE; |
|
747 |
extern void page_coloring_init(void); |
|
748 |
||
749 |
/* |
|
750 |
* Initialize enough of the system to allow kmem_alloc to work by |
|
751 |
* calling boot to allocate its memory until the time that |
|
752 |
* kvm_init is completed. The page structs are allocated after |
|
753 |
* rounding up end to the nearest page boundary; the memsegs are |
|
754 |
* initialized and the space they use comes from the kernel heap. |
|
755 |
* With appropriate initialization, they can be reallocated later |
|
756 |
* to a size appropriate for the machine's configuration. |
|
757 |
* |
|
758 |
* At this point, memory is allocated for things that will never |
|
759 |
* need to be freed, this used to be "valloced". This allows a |
|
760 |
* savings as the pages don't need page structures to describe |
|
761 |
* them because them will not be managed by the vm system. |
|
762 |
*/ |
|
763 |
||
764 |
/* |
|
765 |
* We're loaded by boot with the following configuration (as |
|
766 |
* specified in the sun4u/conf/Mapfile): |
|
767 |
* |
|
768 |
* text: 4 MB chunk aligned on a 4MB boundary |
|
769 |
* data & bss: 4 MB chunk aligned on a 4MB boundary |
|
770 |
* |
|
771 |
* These two chunks will eventually be mapped by 2 locked 4MB |
|
772 |
* ttes and will represent the nucleus of the kernel. This gives |
|
773 |
* us some free space that is already allocated, some or all of |
|
774 |
* which is made available to kernel module text. |
|
775 |
* |
|
776 |
* The free space in the data-bss chunk is used for nucleus |
|
777 |
* allocatable data structures and we reserve it using the |
|
778 |
* nalloc_base and nalloc_end variables. This space is currently |
|
779 |
* being used for hat data structures required for tlb miss |
|
780 |
* handling operations. We align nalloc_base to a l2 cache |
|
781 |
* linesize because this is the line size the hardware uses to |
|
782 |
* maintain cache coherency. |
|
783 |
* 256K is carved out for module data. |
|
784 |
*/ |
|
785 |
||
786 |
nalloc_base = (caddr_t)roundup((uintptr_t)e_data, MMU_PAGESIZE); |
|
787 |
moddata = nalloc_base; |
|
788 |
e_moddata = nalloc_base + MODDATA; |
|
789 |
nalloc_base = e_moddata; |
|
790 |
||
791 |
nalloc_end = (caddr_t)roundup((uintptr_t)nalloc_base, MMU_PAGESIZE4M); |
|
792 |
valloc_base = nalloc_base; |
|
793 |
||
794 |
/* |
|
795 |
* Calculate the start of the data segment. |
|
796 |
*/ |
|
797 |
sdata = (caddr_t)((uintptr_t)e_data & MMU_PAGEMASK4M); |
|
798 |
||
799 |
PRM_DEBUG(moddata); |
|
800 |
PRM_DEBUG(nalloc_base); |
|
801 |
PRM_DEBUG(nalloc_end); |
|
802 |
PRM_DEBUG(sdata); |
|
803 |
||
804 |
/* |
|
805 |
* Remember any slop after e_text so we can give it to the modules. |
|
806 |
*/ |
|
807 |
PRM_DEBUG(e_text); |
|
808 |
modtext = (caddr_t)roundup((uintptr_t)e_text, MMU_PAGESIZE); |
|
809 |
if (((uintptr_t)modtext & MMU_PAGEMASK4M) != (uintptr_t)s_text) |
|
810 |
panic("nucleus text overflow"); |
|
811 |
modtext_sz = (caddr_t)roundup((uintptr_t)modtext, MMU_PAGESIZE4M) - |
|
812 |
modtext; |
|
813 |
PRM_DEBUG(modtext); |
|
814 |
PRM_DEBUG(modtext_sz); |
|
815 |
||
816 |
copy_boot_memlists(&boot_physinstalled, &boot_physinstalled_len, |
|
817 |
&boot_physavail, &boot_physavail_len, |
|
818 |
&boot_virtavail, &boot_virtavail_len); |
|
819 |
/* |
|
820 |
* Remember what the physically available highest page is |
|
821 |
* so that dumpsys works properly, and find out how much |
|
822 |
* memory is installed. |
|
823 |
*/ |
|
824 |
installed_top_size_memlist_array(boot_physinstalled, |
|
825 |
boot_physinstalled_len, &physmax, &physinstalled); |
|
826 |
PRM_DEBUG(physinstalled); |
|
827 |
PRM_DEBUG(physmax); |
|
828 |
||
829 |
/* Fill out memory nodes config structure */ |
|
830 |
startup_build_mem_nodes(boot_physinstalled, boot_physinstalled_len); |
|
831 |
||
832 |
/* |
|
833 |
* Get the list of physically available memory to size |
|
834 |
* the number of page structures needed. |
|
835 |
*/ |
|
836 |
size_physavail(boot_physavail, boot_physavail_len, &npages, &memblocks); |
|
837 |
/* |
|
838 |
* This first snap shot of npages can represent the pages used |
|
839 |
* by OBP's text and data approximately. This is used in the |
|
840 |
* the calculation of the kernel size |
|
841 |
*/ |
|
842 |
obp_pages = physinstalled - npages; |
|
843 |
||
844 |
||
845 |
/* |
|
846 |
* On small-memory systems (<MODTEXT_SM_SIZE MB, currently 256MB), the |
|
847 |
* in-nucleus module text is capped to MODTEXT_SM_CAP bytes (currently |
|
848 |
* 2MB) and any excess pages are put on physavail. The assumption is |
|
849 |
* that small-memory systems will need more pages more than they'll |
|
850 |
* need efficiently-mapped module texts. |
|
851 |
*/ |
|
852 |
if ((physinstalled < mmu_btop(MODTEXT_SM_SIZE << 20)) && |
|
853 |
modtext_sz > MODTEXT_SM_CAP) { |
|
854 |
extra_etpg = mmu_btop(modtext_sz - MODTEXT_SM_CAP); |
|
855 |
modtext_sz = MODTEXT_SM_CAP; |
|
856 |
} else |
|
857 |
extra_etpg = 0; |
|
858 |
PRM_DEBUG(extra_etpg); |
|
859 |
PRM_DEBUG(modtext_sz); |
|
860 |
extra_etva = modtext + modtext_sz; |
|
861 |
PRM_DEBUG(extra_etva); |
|
862 |
||
863 |
/* |
|
864 |
* Account for any pages after e_text and e_data. |
|
865 |
*/ |
|
866 |
npages += extra_etpg; |
|
867 |
npages += mmu_btopr(nalloc_end - nalloc_base); |
|
868 |
PRM_DEBUG(npages); |
|
869 |
||
870 |
/* |
|
871 |
* npages is the maximum of available physical memory possible. |
|
872 |
* (ie. it will never be more than this) |
|
873 |
*/ |
|
874 |
||
875 |
/* |
|
876 |
* initialize the nucleus memory allocator. |
|
877 |
*/ |
|
878 |
ndata_alloc_init(&ndata, (uintptr_t)nalloc_base, (uintptr_t)nalloc_end); |
|
879 |
||
880 |
/* |
|
881 |
* Allocate mmu fault status area from the nucleus data area. |
|
882 |
*/ |
|
883 |
if ((&ndata_alloc_mmfsa != NULL) && (ndata_alloc_mmfsa(&ndata) != 0)) |
|
884 |
cmn_err(CE_PANIC, "no more nucleus memory after mfsa alloc"); |
|
885 |
||
886 |
/* |
|
887 |
* Allocate kernel TSBs from the nucleus data area. |
|
888 |
*/ |
|
889 |
if (ndata_alloc_tsbs(&ndata, npages) != 0) |
|
890 |
cmn_err(CE_PANIC, "no more nucleus memory after tsbs alloc"); |
|
891 |
||
892 |
/* |
|
893 |
* Allocate cpus structs from the nucleus data area. |
|
894 |
*/ |
|
895 |
if (ndata_alloc_cpus(&ndata) != 0) |
|
896 |
cmn_err(CE_PANIC, "no more nucleus memory after cpu alloc"); |
|
897 |
||
898 |
/* |
|
899 |
* Allocate dmv dispatch table from the nucleus data area. |
|
900 |
*/ |
|
901 |
if (ndata_alloc_dmv(&ndata) != 0) |
|
902 |
cmn_err(CE_PANIC, "no more nucleus memory after dmv alloc"); |
|
903 |
||
904 |
||
905 |
page_coloring_init(); |
|
906 |
||
907 |
/* |
|
908 |
* Allocate page_freelists bin headers for memnode 0 from the |
|
909 |
* nucleus data area. |
|
910 |
*/ |
|
911 |
if (ndata_alloc_page_freelists(&ndata, 0) != 0) |
|
912 |
cmn_err(CE_PANIC, |
|
913 |
"no more nucleus memory after page free lists alloc"); |
|
914 |
||
915 |
if (kpm_enable) { |
|
916 |
kpm_init(); |
|
917 |
/* |
|
918 |
* kpm page space -- Update kpm_npages and make the |
|
919 |
* same assumption about fragmenting as it is done |
|
920 |
* for memseg_sz. |
|
921 |
*/ |
|
922 |
kpm_npages_setup(memblocks + 4); |
|
923 |
} |
|
924 |
||
925 |
/* |
|
926 |
* Allocate hat related structs from the nucleus data area. |
|
927 |
*/ |
|
928 |
if (ndata_alloc_hat(&ndata, npages, kpm_npages) != 0) |
|
929 |
cmn_err(CE_PANIC, "no more nucleus memory after hat alloc"); |
|
930 |
||
931 |
/* |
|
932 |
* We want to do the BOP_ALLOCs before the real allocation of page |
|
933 |
* structs in order to not have to allocate page structs for this |
|
934 |
* memory. We need to calculate a virtual address because we want |
|
935 |
* the page structs to come before other allocations in virtual address |
|
936 |
* space. This is so some (if not all) of page structs can actually |
|
937 |
* live in the nucleus. |
|
938 |
*/ |
|
939 |
||
940 |
/* |
|
941 |
* WARNING WARNING WARNING WARNING WARNING WARNING WARNING |
|
942 |
* |
|
943 |
* There are comments all over the SFMMU code warning of dire |
|
944 |
* consequences if the TSBs are moved out of 32-bit space. This |
|
945 |
* is largely because the asm code uses "sethi %hi(addr)"-type |
|
946 |
* instructions which will not provide the expected result if the |
|
947 |
* address is a 64-bit one. |
|
948 |
* |
|
949 |
* WARNING WARNING WARNING WARNING WARNING WARNING WARNING |
|
950 |
*/ |
|
951 |
alloc_base = (caddr_t)roundup((uintptr_t)nalloc_end, MMU_PAGESIZE); |
|
952 |
alloc_base = sfmmu_ktsb_alloc(alloc_base); |
|
953 |
alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, ecache_alignsize); |
|
954 |
PRM_DEBUG(alloc_base); |
|
955 |
||
956 |
/* |
|
957 |
* Allocate IOMMU TSB array. We do this here so that the physical |
|
958 |
* memory gets deducted from the PROM's physical memory list. |
|
959 |
*/ |
|
960 |
alloc_base = iommu_tsb_init(alloc_base); |
|
961 |
alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, |
|
962 |
ecache_alignsize); |
|
963 |
PRM_DEBUG(alloc_base); |
|
964 |
||
965 |
/* |
|
966 |
* Starcat needs its special structures assigned in 32-bit virtual |
|
967 |
* address space because its probing routines execute FCode, and FCode |
|
968 |
* can't handle 64-bit virtual addresses... |
|
969 |
*/ |
|
970 |
if (&starcat_startup_memlist) { |
|
971 |
alloc_base = starcat_startup_memlist(alloc_base); |
|
972 |
alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, |
|
973 |
ecache_alignsize); |
|
974 |
PRM_DEBUG(alloc_base); |
|
975 |
} |
|
976 |
||
977 |
/* |
|
978 |
* If we have enough memory, use 4M pages for alignment because it |
|
979 |
* greatly reduces the number of TLB misses we take albeit at the cost |
|
980 |
* of possible RAM wastage (degenerate case of 4 MB - MMU_PAGESIZE per |
|
981 |
* allocation.) Still, the speedup on large memory systems (e.g. > 64 |
|
982 |
* GB) is quite noticeable, so it is worth the effort to do if we can. |
|
983 |
* |
|
984 |
* Note, however, that this speedup will only occur if the boot PROM |
|
985 |
* uses the largest possible MMU page size possible to map memory |
|
986 |
* requests that are properly aligned and sized (for example, a request |
|
987 |
* for a multiple of 4MB of memory aligned to a 4MB boundary will |
|
988 |
* result in a mapping using a 4MB MMU page.) |
|
989 |
* |
|
990 |
* Even then, the large page mappings will only speed things up until |
|
991 |
* the startup process proceeds a bit further, as when |
|
992 |
* sfmmu_map_prom_mappings() copies page mappings from the PROM to the |
|
993 |
* kernel it remaps everything but the TSBs using 8K pages anyway... |
|
994 |
* |
|
995 |
* At some point in the future, sfmmu_map_prom_mappings() will be |
|
996 |
* rewritten to copy memory mappings to the kernel using the same MMU |
|
997 |
* page sizes the PROM used. When that occurs, if the PROM did use |
|
998 |
* large MMU pages to map memory, the alignment/sizing work we're |
|
999 |
* doing now should give us a nice extra performance boost, albeit at |
|
1000 |
* the cost of greater RAM usage... |
|
1001 |
*/ |
|
1002 |
alloc_alignsize = ((npages >= tune_npages) ? MMU_PAGESIZE4M : |
|
1003 |
MMU_PAGESIZE); |
|
1004 |
||
1005 |
PRM_DEBUG(tune_npages); |
|
1006 |
PRM_DEBUG(alloc_alignsize); |
|
1007 |
||
1008 |
/* |
|
1009 |
* Save off where the contiguous allocations to date have ended |
|
1010 |
* in econtig32. |
|
1011 |
*/ |
|
1012 |
econtig32 = alloc_base; |
|
1013 |
PRM_DEBUG(econtig32); |
|
1014 |
||
1015 |
if (econtig32 > (caddr_t)KERNEL_LIMIT32) |
|
1016 |
cmn_err(CE_PANIC, "econtig32 too big"); |
|
1017 |
||
1018 |
/* |
|
1019 |
* To avoid memory allocation collisions in the 32-bit virtual address |
|
1020 |
* space, make allocations from this point forward in 64-bit virtual |
|
1021 |
* address space starting at syslimit and working up. Also use the |
|
1022 |
* alignment specified by alloc_alignsize, as we may be able to save |
|
1023 |
* ourselves TLB misses by using larger page sizes if they're |
|
1024 |
* available. |
|
1025 |
* |
|
1026 |
* All this is needed because on large memory systems, the default |
|
1027 |
* Solaris allocations will collide with SYSBASE32, which is hard |
|
1028 |
* coded to be at the virtual address 0x78000000. Therefore, on 64-bit |
|
1029 |
* kernels, move the allocations to a location in the 64-bit virtual |
|
1030 |
* address space space, allowing those structures to grow without |
|
1031 |
* worry. |
|
1032 |
* |
|
1033 |
* On current CPUs we'll run out of physical memory address bits before |
|
1034 |
* we need to worry about the allocations running into anything else in |
|
1035 |
* VM or the virtual address holes on US-I and II, as there's currently |
|
1036 |
* about 1 TB of addressable space before the US-I/II VA hole. |
|
1037 |
*/ |
|
1038 |
kmem64_base = (caddr_t)syslimit; |
|
1039 |
PRM_DEBUG(kmem64_base); |
|
1040 |
||
1041 |
alloc_base = (caddr_t)roundup((uintptr_t)kmem64_base, alloc_alignsize); |
|
1042 |
||
1043 |
/* |
|
1044 |
* If KHME and/or UHME hash buckets won't fit in the nucleus, allocate |
|
1045 |
* them here. |
|
1046 |
*/ |
|
1047 |
if (khme_hash == NULL || uhme_hash == NULL) { |
|
1048 |
/* |
|
1049 |
* alloc_hme_buckets() will align alloc_base properly before |
|
1050 |
* assigning the hash buckets, so we don't need to do it |
|
1051 |
* before the call... |
|
1052 |
*/ |
|
1053 |
alloc_base = alloc_hme_buckets(alloc_base, alloc_alignsize); |
|
1054 |
||
1055 |
PRM_DEBUG(alloc_base); |
|
1056 |
PRM_DEBUG(khme_hash); |
|
1057 |
PRM_DEBUG(uhme_hash); |
|
1058 |
} |
|
1059 |
||
1060 |
/* |
|
1061 |
* Allocate the remaining page freelists. NUMA systems can |
|
1062 |
* have lots of page freelists, one per node, which quickly |
|
1063 |
* outgrow the amount of nucleus memory available. |
|
1064 |
*/ |
|
1065 |
if (max_mem_nodes > 1) { |
|
1066 |
int mnode; |
|
1067 |
caddr_t alloc_start = alloc_base; |
|
1068 |
||
1069 |
for (mnode = 1; mnode < max_mem_nodes; mnode++) { |
|
1070 |
alloc_base = alloc_page_freelists(mnode, alloc_base, |
|
1071 |
ecache_alignsize); |
|
1072 |
} |
|
1073 |
||
1074 |
if (alloc_base > alloc_start) { |
|
1075 |
alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, |
|
1076 |
alloc_alignsize); |
|
1077 |
if ((caddr_t)BOP_ALLOC(bootops, alloc_start, |
|
1078 |
alloc_base - alloc_start, |
|
1079 |
alloc_alignsize) != alloc_start) |
|
1080 |
cmn_err(CE_PANIC, |
|
1081 |
"Unable to alloc page freelists\n"); |
|
1082 |
} |
|
1083 |
||
1084 |
PRM_DEBUG(alloc_base); |
|
1085 |
} |
|
1086 |
||
1087 |
if (!mml_table) { |
|
1088 |
size_t mmltable_sz; |
|
1089 |
||
1090 |
/* |
|
1091 |
* We need to allocate the mml_table here because there |
|
1092 |
* was not enough space within the nucleus. |
|
1093 |
*/ |
|
1094 |
mmltable_sz = sizeof (kmutex_t) * mml_table_sz; |
|
1095 |
alloc_sz = roundup(mmltable_sz, alloc_alignsize); |
|
1096 |
alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, |
|
1097 |
alloc_alignsize); |
|
1098 |
||
1099 |
if ((mml_table = (kmutex_t *)BOP_ALLOC(bootops, alloc_base, |
|
1100 |
alloc_sz, alloc_alignsize)) != (kmutex_t *)alloc_base) |
|
1101 |
panic("mml_table alloc failure"); |
|
1102 |
||
1103 |
alloc_base += alloc_sz; |
|
1104 |
PRM_DEBUG(mml_table); |
|
1105 |
PRM_DEBUG(alloc_base); |
|
1106 |
} |
|
1107 |
||
1108 |
if (kpm_enable && !(kpmp_table || kpmp_stable)) { |
|
1109 |
size_t kpmptable_sz; |
|
1110 |
caddr_t table; |
|
1111 |
||
1112 |
/* |
|
1113 |
* We need to allocate either kpmp_table or kpmp_stable here |
|
1114 |
* because there was not enough space within the nucleus. |
|
1115 |
*/ |
|
1116 |
kpmptable_sz = (kpm_smallpages == 0) ? |
|
1117 |
sizeof (kpm_hlk_t) * kpmp_table_sz : |
|
1118 |
sizeof (kpm_shlk_t) * kpmp_stable_sz; |
|
1119 |
||
1120 |
alloc_sz = roundup(kpmptable_sz, alloc_alignsize); |
|
1121 |
alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, |
|
1122 |
alloc_alignsize); |
|
1123 |
||
1124 |
table = BOP_ALLOC(bootops, alloc_base, alloc_sz, |
|
1125 |
alloc_alignsize); |
|
1126 |
||
1127 |
if (table != alloc_base) |
|
1128 |
panic("kpmp_table or kpmp_stable alloc failure"); |
|
1129 |
||
1130 |
if (kpm_smallpages == 0) { |
|
1131 |
kpmp_table = (kpm_hlk_t *)table; |
|
1132 |
PRM_DEBUG(kpmp_table); |
|
1133 |
} else { |
|
1134 |
kpmp_stable = (kpm_shlk_t *)table; |
|
1135 |
PRM_DEBUG(kpmp_stable); |
|
1136 |
} |
|
1137 |
||
1138 |
alloc_base += alloc_sz; |
|
1139 |
PRM_DEBUG(alloc_base); |
|
1140 |
} |
|
1141 |
||
1142 |
if (&ecache_init_scrub_flush_area) { |
|
1143 |
/* |
|
1144 |
* Pass alloc_base directly, as the routine itself is |
|
1145 |
* responsible for any special alignment requirements... |
|
1146 |
*/ |
|
1147 |
alloc_base = ecache_init_scrub_flush_area(alloc_base); |
|
1148 |
PRM_DEBUG(alloc_base); |
|
1149 |
} |
|
1150 |
||
1151 |
/* |
|
1152 |
* Take the most current snapshot we can by calling mem-update. |
|
1153 |
*/ |
|
1154 |
copy_boot_memlists(&boot_physinstalled, &boot_physinstalled_len, |
|
1155 |
&boot_physavail, &boot_physavail_len, |
|
1156 |
&boot_virtavail, &boot_virtavail_len); |
|
1157 |
||
1158 |
/* |
|
1159 |
* Reset npages and memblocks based on boot_physavail list. |
|
1160 |
*/ |
|
1161 |
size_physavail(boot_physavail, boot_physavail_len, &npages, &memblocks); |
|
1162 |
PRM_DEBUG(npages); |
|
1163 |
||
1164 |
/* |
|
1165 |
* Account for extra memory after e_text. |
|
1166 |
*/ |
|
1167 |
npages += extra_etpg; |
|
1168 |
||
1169 |
/* |
|
1170 |
* Calculate the largest free memory chunk in the nucleus data area. |
|
1171 |
* We need to figure out if page structs can fit in there or not. |
|
1172 |
* We also make sure enough page structs get created for any physical |
|
1173 |
* memory we might be returning to the system. |
|
1174 |
*/ |
|
1175 |
ndata_remain_sz = ndata_maxsize(&ndata); |
|
1176 |
PRM_DEBUG(ndata_remain_sz); |
|
1177 |
||
1178 |
pp_sz = sizeof (struct page) * npages; |
|
1179 |
||
1180 |
/* |
|
1181 |
* Here's a nice bit of code based on somewhat recursive logic: |
|
1182 |
* |
|
1183 |
* If the page array would fit within the nucleus, we want to |
|
1184 |
* add npages to cover any extra memory we may be returning back |
|
1185 |
* to the system. |
|
1186 |
* |
|
1187 |
* HOWEVER, the page array is sized by calculating the size of |
|
1188 |
* (struct page * npages), as are the pagehash table, ctrs and |
|
1189 |
* memseg_list, so the very act of performing the calculation below may |
|
1190 |
* in fact make the array large enough that it no longer fits in the |
|
1191 |
* nucleus, meaning there would now be a much larger area of the |
|
1192 |
* nucleus free that should really be added to npages, which would |
|
1193 |
* make the page array that much larger, and so on. |
|
1194 |
* |
|
1195 |
* This also ignores the memory possibly used in the nucleus for the |
|
1196 |
* the page hash, ctrs and memseg list and the fact that whether they |
|
1197 |
* fit there or not varies with the npages calculation below, but we |
|
1198 |
* don't even factor them into the equation at this point; perhaps we |
|
1199 |
* should or perhaps we should just take the approach that the few |
|
1200 |
* extra pages we could add via this calculation REALLY aren't worth |
|
1201 |
* the hassle... |
|
1202 |
*/ |
|
1203 |
if (ndata_remain_sz > pp_sz) { |
|
1204 |
size_t spare = ndata_spare(&ndata, pp_sz, ecache_alignsize); |
|
1205 |
||
1206 |
npages += mmu_btop(spare); |
|
1207 |
||
1208 |
pp_sz = npages * sizeof (struct page); |
|
1209 |
||
1210 |
pp_base = ndata_alloc(&ndata, pp_sz, ecache_alignsize); |
|
1211 |
} |
|
1212 |
||
1213 |
/* |
|
1214 |
* If physmem is patched to be non-zero, use it instead of |
|
1215 |
* the monitor value unless physmem is larger than the total |
|
1216 |
* amount of memory on hand. |
|
1217 |
*/ |
|
1218 |
if (physmem == 0 || physmem > npages) |
|
1219 |
physmem = npages; |
|
1220 |
||
1221 |
/* |
|
1222 |
* If pp_base is NULL that means the routines above have determined |
|
1223 |
* the page array will not fit in the nucleus; we'll have to |
|
1224 |
* BOP_ALLOC() ourselves some space for them. |
|
1225 |
*/ |
|
1226 |
if (pp_base == NULL) { |
|
1227 |
alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, |
|
1228 |
alloc_alignsize); |
|
1229 |
||
1230 |
alloc_sz = roundup(pp_sz, alloc_alignsize); |
|
1231 |
||
1232 |
if ((pp_base = (struct page *)BOP_ALLOC(bootops, |
|
1233 |
alloc_base, alloc_sz, alloc_alignsize)) != |
|
1234 |
(struct page *)alloc_base) |
|
1235 |
panic("page alloc failure"); |
|
1236 |
||
1237 |
alloc_base += alloc_sz; |
|
1238 |
} |
|
1239 |
||
1240 |
/* |
|
1241 |
* The page structure hash table size is a power of 2 |
|
1242 |
* such that the average hash chain length is PAGE_HASHAVELEN. |
|
1243 |
*/ |
|
1244 |
page_hashsz = npages / PAGE_HASHAVELEN; |
|
1245 |
page_hashsz = 1 << highbit((ulong_t)page_hashsz); |
|
1246 |
pagehash_sz = sizeof (struct page *) * page_hashsz; |
|
1247 |
||
1248 |
/* |
|
1249 |
* We want to TRY to fit the page structure hash table, |
|
1250 |
* the page size free list counters, the memseg list and |
|
1251 |
* and the kpm page space in the nucleus if possible. |
|
1252 |
* |
|
1253 |
* alloc_sz counts how much memory needs to be allocated by |
|
1254 |
* BOP_ALLOC(). |
|
1255 |
*/ |
|
1256 |
page_hash = ndata_alloc(&ndata, pagehash_sz, ecache_alignsize); |
|
1257 |
||
1258 |
alloc_sz = (page_hash == NULL ? pagehash_sz : 0); |
|
1259 |
||
1260 |
/* |
|
1261 |
* Size up per page size free list counters. |
|
1262 |
*/ |
|
1263 |
ctrs_sz = page_ctrs_sz(); |
|
1264 |
ctrs_base = ndata_alloc(&ndata, ctrs_sz, ecache_alignsize); |
|
1265 |
||
1266 |
if (ctrs_base == NULL) |
|
1267 |
alloc_sz = roundup(alloc_sz, ecache_alignsize) + ctrs_sz; |
|
1268 |
||
1269 |
/* |
|
1270 |
* The memseg list is for the chunks of physical memory that |
|
1271 |
* will be managed by the vm system. The number calculated is |
|
1272 |
* a guess as boot may fragment it more when memory allocations |
|
1273 |
* are made before kphysm_init(). Currently, there are two |
|
1274 |
* allocations before then, so we assume each causes fragmen- |
|
1275 |
* tation, and add a couple more for good measure. |
|
1276 |
*/ |
|
1277 |
memseg_sz = sizeof (struct memseg) * (memblocks + 4); |
|
1278 |
memseg_base = ndata_alloc(&ndata, memseg_sz, ecache_alignsize); |
|
1279 |
||
1280 |
if (memseg_base == NULL) |
|
1281 |
alloc_sz = roundup(alloc_sz, ecache_alignsize) + memseg_sz; |
|
1282 |
||
1283 |
||
1284 |
if (kpm_enable) { |
|
1285 |
/* |
|
1286 |
* kpm page space -- Update kpm_npages and make the |
|
1287 |
* same assumption about fragmenting as it is done |
|
1288 |
* for memseg_sz above. |
|
1289 |
*/ |
|
1290 |
kpm_npages_setup(memblocks + 4); |
|
1291 |
kpm_pp_sz = (kpm_smallpages == 0) ? |
|
1292 |
kpm_npages * sizeof (kpm_page_t): |
|
1293 |
kpm_npages * sizeof (kpm_spage_t); |
|
1294 |
||
1295 |
kpm_pp_base = (uintptr_t)ndata_alloc(&ndata, kpm_pp_sz, |
|
1296 |
ecache_alignsize); |
|
1297 |
||
1298 |
if (kpm_pp_base == NULL) |
|
1299 |
alloc_sz = roundup(alloc_sz, ecache_alignsize) + |
|
1300 |
kpm_pp_sz; |
|
1301 |
} |
|
1302 |
||
1303 |
if (alloc_sz > 0) { |
|
1304 |
uintptr_t bop_base; |
|
1305 |
||
1306 |
/* |
|
1307 |
* We need extra memory allocated through BOP_ALLOC. |
|
1308 |
*/ |
|
1309 |
alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, |
|
1310 |
alloc_alignsize); |
|
1311 |
||
1312 |
alloc_sz = roundup(alloc_sz, alloc_alignsize); |
|
1313 |
||
1314 |
if ((bop_base = (uintptr_t)BOP_ALLOC(bootops, alloc_base, |
|
1315 |
alloc_sz, alloc_alignsize)) != (uintptr_t)alloc_base) |
|
1316 |
panic("system page struct alloc failure"); |
|
1317 |
||
1318 |
alloc_base += alloc_sz; |
|
1319 |
||
1320 |
if (page_hash == NULL) { |
|
1321 |
page_hash = (struct page **)bop_base; |
|
1322 |
bop_base = roundup(bop_base + pagehash_sz, |
|
1323 |
ecache_alignsize); |
|
1324 |
} |
|
1325 |
||
1326 |
if (ctrs_base == NULL) { |
|
1327 |
ctrs_base = (caddr_t)bop_base; |
|
1328 |
bop_base = roundup(bop_base + ctrs_sz, |
|
1329 |
ecache_alignsize); |
|
1330 |
} |
|
1331 |
||
1332 |
if (memseg_base == NULL) { |
|
1333 |
memseg_base = (struct memseg *)bop_base; |
|
1334 |
bop_base = roundup(bop_base + memseg_sz, |
|
1335 |
ecache_alignsize); |
|
1336 |
} |
|
1337 |
||
1338 |
if (kpm_enable && kpm_pp_base == NULL) { |
|
1339 |
kpm_pp_base = (uintptr_t)bop_base; |
|
1340 |
bop_base = roundup(bop_base + kpm_pp_sz, |
|
1341 |
ecache_alignsize); |
|
1342 |
} |
|
1343 |
||
1344 |
ASSERT(bop_base <= (uintptr_t)alloc_base); |
|
1345 |
} |
|
1346 |
||
1347 |
/* |
|
1348 |
* Initialize per page size free list counters. |
|
1349 |
*/ |
|
1350 |
ctrs_end = page_ctrs_alloc(ctrs_base); |
|
1351 |
ASSERT(ctrs_base + ctrs_sz >= ctrs_end); |
|
1352 |
||
1353 |
PRM_DEBUG(page_hash); |
|
1354 |
PRM_DEBUG(memseg_base); |
|
1355 |
PRM_DEBUG(kpm_pp_base); |
|
1356 |
PRM_DEBUG(kpm_pp_sz); |
|
1357 |
PRM_DEBUG(pp_base); |
|
1358 |
PRM_DEBUG(pp_sz); |
|
1359 |
PRM_DEBUG(alloc_base); |
|
1360 |
||
1361 |
#ifdef TRAPTRACE |
|
1362 |
/* |
|
1363 |
* Allocate trap trace buffer last so as not to affect |
|
1364 |
* the 4M alignments of the allocations above on V9 SPARCs... |
|
1365 |
*/ |
|
1366 |
alloc_base = trap_trace_alloc(alloc_base); |
|
1367 |
PRM_DEBUG(alloc_base); |
|
1368 |
#endif /* TRAPTRACE */ |
|
1369 |
||
1370 |
if (kmem64_base) { |
|
1371 |
/* |
|
1372 |
* Set the end of the kmem64 segment for V9 SPARCs, if |
|
1373 |
* appropriate... |
|
1374 |
*/ |
|
1375 |
kmem64_end = (caddr_t)roundup((uintptr_t)alloc_base, |
|
1376 |
alloc_alignsize); |
|
1377 |
||
1378 |
PRM_DEBUG(kmem64_base); |
|
1379 |
PRM_DEBUG(kmem64_end); |
|
1380 |
} |
|
1381 |
||
1382 |
/* |
|
1383 |
* Allocate space for the interrupt vector table. |
|
1384 |
*/ |
|
1385 |
memspace = (caddr_t)BOP_ALLOC(bootops, (caddr_t)intr_vector, |
|
1386 |
IVSIZE, MMU_PAGESIZE); |
|
1387 |
if (memspace != (caddr_t)intr_vector) |
|
1388 |
panic("interrupt table allocation failure"); |
|
1389 |
||
1390 |
/* |
|
1391 |
* The memory lists from boot are allocated from the heap arena |
|
1392 |
* so that later they can be freed and/or reallocated. |
|
1393 |
*/ |
|
1394 |
if (BOP_GETPROP(bootops, "extent", &memlist_sz) == -1) |
|
1395 |
panic("could not retrieve property \"extent\""); |
|
1396 |
||
1397 |
/* |
|
1398 |
* Between now and when we finish copying in the memory lists, |
|
1399 |
* allocations happen so the space gets fragmented and the |
|
1400 |
* lists longer. Leave enough space for lists twice as long |
|
1401 |
* as what boot says it has now; roundup to a pagesize. |
|
1402 |
* Also add space for the final phys-avail copy in the fixup |
|
1403 |
* routine. |
|
1404 |
*/ |
|
1405 |
va = (caddr_t)(sysbase + PAGESIZE + PANICBUFSIZE + |
|
1406 |
roundup(IVSIZE, MMU_PAGESIZE)); |
|
1407 |
memlist_sz *= 4; |
|
1408 |
memlist_sz = roundup(memlist_sz, MMU_PAGESIZE); |
|
1409 |
memspace = (caddr_t)BOP_ALLOC(bootops, va, memlist_sz, BO_NO_ALIGN); |
|
1410 |
if (memspace == NULL) |
|
1411 |
halt("Boot allocation failed."); |
|
1412 |
||
1413 |
memlist = (struct memlist *)memspace; |
|
1414 |
memlist_end = (char *)memspace + memlist_sz; |
|
1415 |
||
1416 |
PRM_DEBUG(memlist); |
|
1417 |
PRM_DEBUG(memlist_end); |
|
1418 |
PRM_DEBUG(sysbase); |
|
1419 |
PRM_DEBUG(syslimit); |
|
1420 |
||
1421 |
kernelheap_init((void *)sysbase, (void *)syslimit, |
|
1422 |
(caddr_t)sysbase + PAGESIZE, NULL, NULL); |
|
1423 |
||
1424 |
/* |
|
1425 |
* Take the most current snapshot we can by calling mem-update. |
|
1426 |
*/ |
|
1427 |
copy_boot_memlists(&boot_physinstalled, &boot_physinstalled_len, |
|
1428 |
&boot_physavail, &boot_physavail_len, |
|
1429 |
&boot_virtavail, &boot_virtavail_len); |
|
1430 |
||
1431 |
/* |
|
1432 |
* Remove the space used by BOP_ALLOC from the kernel heap |
|
1433 |
* plus the area actually used by the OBP (if any) |
|
1434 |
* ignoring virtual addresses in virt_avail, above syslimit. |
|
1435 |
*/ |
|
1436 |
virt_avail = memlist; |
|
1437 |
copy_memlist(boot_virtavail, boot_virtavail_len, &memlist); |
|
1438 |
||
1439 |
for (cur = virt_avail; cur->next; cur = cur->next) { |
|
1440 |
uint64_t range_base, range_size; |
|
1441 |
||
1442 |
if ((range_base = cur->address + cur->size) < (uint64_t)sysbase) |
|
1443 |
continue; |
|
1444 |
if (range_base >= (uint64_t)syslimit) |
|
1445 |
break; |
|
1446 |
/* |
|
1447 |
* Limit the range to end at syslimit. |
|
1448 |
*/ |
|
1449 |
range_size = MIN(cur->next->address, |
|
1450 |
(uint64_t)syslimit) - range_base; |
|
1451 |
(void) vmem_xalloc(heap_arena, (size_t)range_size, PAGESIZE, |
|
1452 |
0, 0, (void *)range_base, (void *)(range_base + range_size), |
|
1453 |
VM_NOSLEEP | VM_BESTFIT | VM_PANIC); |
|
1454 |
} |
|
1455 |
||
1456 |
phys_avail = memlist; |
|
1457 |
(void) copy_physavail(boot_physavail, boot_physavail_len, |
|
1458 |
&memlist, 0, 0); |
|
1459 |
||
1460 |
/* |
|
1461 |
* Add any extra memory after e_text to the phys_avail list, as long |
|
1462 |
* as there's at least a page to add. |
|
1463 |
*/ |
|
1464 |
if (extra_etpg) |
|
1465 |
memlist_add(va_to_pa(extra_etva), mmu_ptob(extra_etpg), |
|
1466 |
&memlist, &phys_avail); |
|
1467 |
||
1468 |
/* |
|
1469 |
* Add any extra memory after e_data to the phys_avail list as long |
|
1470 |
* as there's at least a page to add. Usually, there isn't any, |
|
1471 |
* since extra HME blocks typically get allocated there first before |
|
1472 |
* using RAM elsewhere. |
|
1473 |
*/ |
|
1474 |
if ((nalloc_base = ndata_extra_base(&ndata, MMU_PAGESIZE)) == NULL) |
|
1475 |
nalloc_base = nalloc_end; |
|
1476 |
ndata_remain_sz = nalloc_end - nalloc_base; |
|
1477 |
||
1478 |
if (ndata_remain_sz >= MMU_PAGESIZE) |
|
1479 |
memlist_add(va_to_pa(nalloc_base), |
|
1480 |
(uint64_t)ndata_remain_sz, &memlist, &phys_avail); |
|
1481 |
||
1482 |
PRM_DEBUG(memlist); |
|
1483 |
PRM_DEBUG(memlist_sz); |
|
1484 |
PRM_DEBUG(memspace); |
|
1485 |
||
1486 |
if ((caddr_t)memlist > (memspace + memlist_sz)) |
|
1487 |
panic("memlist overflow"); |
|
1488 |
||
1489 |
PRM_DEBUG(pp_base); |
|
1490 |
PRM_DEBUG(memseg_base); |
|
1491 |
PRM_DEBUG(npages); |
|
1492 |
||
1493 |
/* |
|
1494 |
* Initialize the page structures from the memory lists. |
|
1495 |
*/ |
|
1496 |
kphysm_init(pp_base, memseg_base, npages, kpm_pp_base, kpm_npages); |
|
1497 |
||
1498 |
availrmem_initial = availrmem = freemem; |
|
1499 |
PRM_DEBUG(availrmem); |
|
1500 |
||
1501 |
/* |
|
1502 |
* Some of the locks depend on page_hashsz being set! |
|
1503 |
* kmem_init() depends on this; so, keep it here. |
|
1504 |
*/ |
|
1505 |
page_lock_init(); |
|
1506 |
||
1507 |
/* |
|
1508 |
* Initialize kernel memory allocator. |
|
1509 |
*/ |
|
1510 |
kmem_init(); |
|
1511 |
||
1512 |
/* |
|
1513 |
* Initialize bp_mapin(). |
|
1514 |
*/ |
|
1515 |
bp_init(shm_alignment, HAT_STRICTORDER); |
|
1516 |
||
1517 |
/* |
|
1518 |
* Reserve space for panicbuf and intr_vector from the 32-bit heap |
|
1519 |
*/ |
|
1520 |
(void) vmem_xalloc(heap32_arena, PANICBUFSIZE, PAGESIZE, 0, 0, |
|
1521 |
panicbuf, panicbuf + PANICBUFSIZE, |
|
1522 |
VM_NOSLEEP | VM_BESTFIT | VM_PANIC); |
|
1523 |
||
1524 |
(void) vmem_xalloc(heap32_arena, IVSIZE, PAGESIZE, 0, 0, |
|
1525 |
intr_vector, (caddr_t)intr_vector + IVSIZE, |
|
1526 |
VM_NOSLEEP | VM_BESTFIT | VM_PANIC); |
|
1527 |
||
1528 |
mem_config_init(); |
|
1529 |
} |
|
1530 |
||
1531 |
static void |
|
1532 |
startup_modules(void) |
|
1533 |
{ |
|
1534 |
int proplen, nhblk1, nhblk8; |
|
1535 |
size_t nhblksz; |
|
1536 |
pgcnt_t hblk_pages, pages_per_hblk; |
|
1537 |
size_t hme8blk_sz, hme1blk_sz; |
|
1538 |
||
1539 |
/* |
|
1540 |
* Log any optional messages from the boot program |
|
1541 |
*/ |
|
1542 |
proplen = (size_t)BOP_GETPROPLEN(bootops, "boot-message"); |
|
1543 |
if (proplen > 0) { |
|
1544 |
char *msg; |
|
1545 |
size_t len = (size_t)proplen; |
|
1546 |
||
1547 |
msg = kmem_zalloc(len, KM_SLEEP); |
|
1548 |
(void) BOP_GETPROP(bootops, "boot-message", msg); |
|
1549 |
cmn_err(CE_CONT, "?%s\n", msg); |
|
1550 |
kmem_free(msg, len); |
|
1551 |
} |
|
1552 |
||
1553 |
/* |
|
1554 |
* Let the platforms have a chance to change default |
|
1555 |
* values before reading system file. |
|
1556 |
*/ |
|
1557 |
if (&set_platform_defaults) |
|
1558 |
set_platform_defaults(); |
|
1559 |
||
1560 |
/* |
|
1561 |
* Calculate default settings of system parameters based upon |
|
1562 |
* maxusers, yet allow to be overridden via the /etc/system file. |
|
1563 |
*/ |
|
1564 |
param_calc(0); |
|
1565 |
||
1566 |
mod_setup(); |
|
1567 |
||
1568 |
/* |
|
1569 |
* If this is a positron, complain and halt. |
|
1570 |
*/ |
|
1571 |
if (&iam_positron && iam_positron()) { |
|
1572 |
cmn_err(CE_WARN, "This hardware platform is not supported" |
|
1573 |
" by this release of Solaris.\n"); |
|
1574 |
#ifdef DEBUG |
|
1575 |
prom_enter_mon(); /* Type 'go' to resume */ |
|
1576 |
cmn_err(CE_WARN, "Booting an unsupported platform.\n"); |
|
1577 |
cmn_err(CE_WARN, "Booting with down-rev firmware.\n"); |
|
1578 |
||
1579 |
#else /* DEBUG */ |
|
1580 |
halt(0); |
|
1581 |
#endif /* DEBUG */ |
|
1582 |
} |
|
1583 |
||
1584 |
/* |
|
1585 |
* If we are running firmware that isn't 64-bit ready |
|
1586 |
* then complain and halt. |
|
1587 |
*/ |
|
1588 |
do_prom_version_check(); |
|
1589 |
||
1590 |
/* |
|
1591 |
* Initialize system parameters |
|
1592 |
*/ |
|
1593 |
param_init(); |
|
1594 |
||
1595 |
/* |
|
1596 |
* maxmem is the amount of physical memory we're playing with. |
|
1597 |
*/ |
|
1598 |
maxmem = physmem; |
|
1599 |
||
1600 |
/* Set segkp limits. */ |
|
1601 |
ncbase = (caddr_t)SEGDEBUGBASE; |
|
1602 |
ncend = (caddr_t)SEGDEBUGBASE; |
|
1603 |
||
1604 |
/* |
|
1605 |
* Initialize the hat layer. |
|
1606 |
*/ |
|
1607 |
hat_init(); |
|
1608 |
||
1609 |
/* |
|
1610 |
* Initialize segment management stuff. |
|
1611 |
*/ |
|
1612 |
seg_init(); |
|
1613 |
||
1614 |
/* |
|
1615 |
* Create the va>tte handler, so the prom can understand |
|
1616 |
* kernel translations. The handler is installed later, just |
|
1617 |
* as we are about to take over the trap table from the prom. |
|
1618 |
*/ |
|
1619 |
create_va_to_tte(); |
|
1620 |
||
1621 |
/* |
|
1622 |
* Load the forthdebugger (optional) |
|
1623 |
*/ |
|
1624 |
forthdebug_init(); |
|
1625 |
||
1626 |
/* |
|
1627 |
* Create OBP node for console input callbacks |
|
1628 |
* if it is needed. |
|
1629 |
*/ |
|
1253 | 1630 |
startup_create_io_node(); |
0 | 1631 |
|
1632 |
if (modloadonly("fs", "specfs") == -1) |
|
1633 |
halt("Can't load specfs"); |
|
1634 |
||
1635 |
if (modloadonly("fs", "devfs") == -1) |
|
1636 |
halt("Can't load devfs"); |
|
1637 |
||
1638 |
if (modloadonly("misc", "swapgeneric") == -1) |
|
1639 |
halt("Can't load swapgeneric"); |
|
1640 |
||
1641 |
dispinit(); |
|
1642 |
||
1643 |
/* |
|
1644 |
* Infer meanings to the members of the idprom buffer. |
|
1645 |
*/ |
|
1646 |
parse_idprom(); |
|
1647 |
||
1648 |
/* Read cluster configuration data. */ |
|
1649 |
clconf_init(); |
|
1650 |
||
1651 |
setup_ddi(); |
|
1652 |
||
1653 |
/* |
|
1654 |
* Lets take this opportunity to load the root device. |
|
1655 |
*/ |
|
1656 |
if (loadrootmodules() != 0) |
|
1657 |
debug_enter("Can't load the root filesystem"); |
|
1658 |
||
1659 |
/* |
|
1660 |
* Load tod driver module for the tod part found on this system. |
|
1661 |
* Recompute the cpu frequency/delays based on tod as tod part |
|
1662 |
* tends to keep time more accurately. |
|
1663 |
*/ |
|
1664 |
if (&load_tod_module) |
|
1665 |
load_tod_module(); |
|
1666 |
||
1667 |
/* |
|
1668 |
* Allow platforms to load modules which might |
|
1669 |
* be needed after bootops are gone. |
|
1670 |
*/ |
|
1671 |
if (&load_platform_modules) |
|
1672 |
load_platform_modules(); |
|
1673 |
||
1674 |
setcpudelay(); |
|
1675 |
||
1676 |
copy_boot_memlists(&boot_physinstalled, &boot_physinstalled_len, |
|
1677 |
&boot_physavail, &boot_physavail_len, |
|
1678 |
&boot_virtavail, &boot_virtavail_len); |
|
1679 |
||
1680 |
bop_alloc_pages = size_virtalloc(boot_virtavail, boot_virtavail_len); |
|
1681 |
||
1682 |
/* |
|
1683 |
* Calculation and allocation of hmeblks needed to remap |
|
1684 |
* the memory allocated by PROM till now: |
|
1685 |
* |
|
1686 |
* (1) calculate how much virtual memory has been bop_alloc'ed. |
|
1687 |
* (2) roundup this memory to span of hme8blk, i.e. 64KB |
|
1688 |
* (3) calculate number of hme8blk's needed to remap this memory |
|
1689 |
* (4) calculate amount of memory that's consumed by these hme8blk's |
|
1690 |
* (5) add memory calculated in steps (2) and (4) above. |
|
1691 |
* (6) roundup this memory to span of hme8blk, i.e. 64KB |
|
1692 |
* (7) calculate number of hme8blk's needed to remap this memory |
|
1693 |
* (8) calculate amount of memory that's consumed by these hme8blk's |
|
1694 |
* (9) allocate additional hme1blk's to hold large mappings. |
|
1695 |
* H8TOH1 determines this. The current SWAG gives enough hblk1's |
|
1696 |
* to remap everything with 4M mappings. |
|
1697 |
* (10) account for partially used hblk8's due to non-64K aligned |
|
1698 |
* PROM mapping entries. |
|
1699 |
* (11) add memory calculated in steps (8), (9), and (10) above. |
|
1700 |
* (12) kmem_zalloc the memory calculated in (11); since segkmem |
|
1701 |
* is not ready yet, this gets bop_alloc'ed. |
|
1702 |
* (13) there will be very few bop_alloc's after this point before |
|
1703 |
* trap table takes over |
|
1704 |
*/ |
|
1705 |
||
1706 |
/* sfmmu_init_nucleus_hblks expects properly aligned data structures. */ |
|
1707 |
hme8blk_sz = roundup(HME8BLK_SZ, sizeof (int64_t)); |
|
1708 |
hme1blk_sz = roundup(HME1BLK_SZ, sizeof (int64_t)); |
|
1709 |
||
1710 |
pages_per_hblk = btop(HMEBLK_SPAN(TTE8K)); |
|
1711 |
bop_alloc_pages = roundup(bop_alloc_pages, pages_per_hblk); |
|
1712 |
nhblk8 = bop_alloc_pages / pages_per_hblk; |
|
1713 |
nhblk1 = roundup(nhblk8, H8TOH1) / H8TOH1; |
|
1714 |
hblk_pages = btopr(nhblk8 * hme8blk_sz + nhblk1 * hme1blk_sz); |
|
1715 |
bop_alloc_pages += hblk_pages; |
|
1716 |
bop_alloc_pages = roundup(bop_alloc_pages, pages_per_hblk); |
|
1717 |
nhblk8 = bop_alloc_pages / pages_per_hblk; |
|
1718 |
nhblk1 = roundup(nhblk8, H8TOH1) / H8TOH1; |
|
1719 |
if (nhblk1 < hblk1_min) |
|
1720 |
nhblk1 = hblk1_min; |
|
1721 |
if (nhblk8 < hblk8_min) |
|
1722 |
nhblk8 = hblk8_min; |
|
1723 |
||
1724 |
/* |
|
1725 |
* Since hblk8's can hold up to 64k of mappings aligned on a 64k |
|
1726 |
* boundary, the number of hblk8's needed to map the entries in the |
|
1727 |
* boot_virtavail list needs to be adjusted to take this into |
|
1728 |
* consideration. Thus, we need to add additional hblk8's since it |
|
1729 |
* is possible that an hblk8 will not have all 8 slots used due to |
|
1730 |
* alignment constraints. Since there were boot_virtavail_len entries |
|
1731 |
* in that list, we need to add that many hblk8's to the number |
|
1732 |
* already calculated to make sure we don't underestimate. |
|
1733 |
*/ |
|
1734 |
nhblk8 += boot_virtavail_len; |
|
1735 |
nhblksz = nhblk8 * hme8blk_sz + nhblk1 * hme1blk_sz; |
|
1736 |
||
1737 |
/* Allocate in pagesize chunks */ |
|
1738 |
nhblksz = roundup(nhblksz, MMU_PAGESIZE); |
|
1739 |
hblk_base = kmem_zalloc(nhblksz, KM_SLEEP); |
|
1740 |
sfmmu_init_nucleus_hblks(hblk_base, nhblksz, nhblk8, nhblk1); |
|
1741 |
} |
|
1742 |
||
1743 |
static void |
|
1744 |
startup_bop_gone(void) |
|
1745 |
{ |
|
1746 |
extern int bop_io_quiesced; |
|
1747 |
||
1748 |
/* |
|
1749 |
* Call back into boot and release boots resources. |
|
1750 |
*/ |
|
1751 |
BOP_QUIESCE_IO(bootops); |
|
1752 |
bop_io_quiesced = 1; |
|
1753 |
||
1754 |
copy_boot_memlists(&boot_physinstalled, &boot_physinstalled_len, |
|
1755 |
&boot_physavail, &boot_physavail_len, |
|
1756 |
&boot_virtavail, &boot_virtavail_len); |
|
1757 |
/* |
|
1758 |
* Copy physinstalled list into kernel space. |
|
1759 |
*/ |
|
1760 |
phys_install = memlist; |
|
1761 |
copy_memlist(boot_physinstalled, boot_physinstalled_len, &memlist); |
|
1762 |
||
1763 |
/* |
|
1764 |
* setup physically contiguous area twice as large as the ecache. |
|
1765 |
* this is used while doing displacement flush of ecaches |
|
1766 |
*/ |
|
1767 |
if (&ecache_flush_address) { |
|
1768 |
ecache_flushaddr = ecache_flush_address(); |
|
1769 |
if (ecache_flushaddr == (uint64_t)-1) { |
|
1770 |
cmn_err(CE_PANIC, |
|
1771 |
"startup: no memory to set ecache_flushaddr"); |
|
1772 |
} |
|
1773 |
} |
|
1774 |
||
1775 |
/* |
|
1776 |
* Virtual available next. |
|
1777 |
*/ |
|
1778 |
ASSERT(virt_avail != NULL); |
|
1779 |
memlist_free_list(virt_avail); |
|
1780 |
virt_avail = memlist; |
|
1781 |
copy_memlist(boot_virtavail, boot_virtavail_len, &memlist); |
|
1782 |
||
1783 |
/* |
|
1784 |
* Last chance to ask our booter questions .. |
|
1785 |
*/ |
|
1786 |
} |
|
1787 |
||
1788 |
||
1789 |
/* |
|
1790 |
* startup_fixup_physavail - called from mach_sfmmu.c after the final |
|
1791 |
* allocations have been performed. We can't call it in startup_bop_gone |
|
1792 |
* since later operations can cause obp to allocate more memory. |
|
1793 |
*/ |
|
1794 |
void |
|
1795 |
startup_fixup_physavail(void) |
|
1796 |
{ |
|
1797 |
struct memlist *cur; |
|
1798 |
||
1799 |
/* |
|
1800 |
* take the most current snapshot we can by calling mem-update |
|
1801 |
*/ |
|
1802 |
copy_boot_memlists(&boot_physinstalled, &boot_physinstalled_len, |
|
1803 |
&boot_physavail, &boot_physavail_len, |
|
1804 |
&boot_virtavail, &boot_virtavail_len); |
|
1805 |
||
1806 |
/* |
|
1807 |
* Copy phys_avail list, again. |
|
1808 |
* Both the kernel/boot and the prom have been allocating |
|
1809 |
* from the original list we copied earlier. |
|
1810 |
*/ |
|
1811 |
cur = memlist; |
|
1812 |
(void) copy_physavail(boot_physavail, boot_physavail_len, |
|
1813 |
&memlist, 0, 0); |
|
1814 |
||
1815 |
/* |
|
1816 |
* Add any extra memory after e_text we added to the phys_avail list |
|
1817 |
* back to the old list. |
|
1818 |
*/ |
|
1819 |
if (extra_etpg) |
|
1820 |
memlist_add(va_to_pa(extra_etva), mmu_ptob(extra_etpg), |
|
1821 |
&memlist, &cur); |
|
1822 |
if (ndata_remain_sz >= MMU_PAGESIZE) |
|
1823 |
memlist_add(va_to_pa(nalloc_base), |
|
1824 |
(uint64_t)ndata_remain_sz, &memlist, &cur); |
|
1825 |
||
1826 |
/* |
|
1827 |
* There isn't any bounds checking on the memlist area |
|
1828 |
* so ensure it hasn't overgrown. |
|
1829 |
*/ |
|
1830 |
if ((caddr_t)memlist > (caddr_t)memlist_end) |
|
1831 |
cmn_err(CE_PANIC, "startup: memlist size exceeded"); |
|
1832 |
||
1833 |
/* |
|
1834 |
* The kernel removes the pages that were allocated for it from |
|
1835 |
* the freelist, but we now have to find any -extra- pages that |
|
1836 |
* the prom has allocated for it's own book-keeping, and remove |
|
1837 |
* them from the freelist too. sigh. |
|
1838 |
*/ |
|
1839 |
fix_prom_pages(phys_avail, cur); |
|
1840 |
||
1841 |
ASSERT(phys_avail != NULL); |
|
1842 |
memlist_free_list(phys_avail); |
|
1843 |
phys_avail = cur; |
|
1844 |
||
1845 |
/* |
|
1846 |
* We're done with boot. Just after this point in time, boot |
|
1847 |
* gets unmapped, so we can no longer rely on its services. |
|
1848 |
* Zero the bootops to indicate this fact. |
|
1849 |
*/ |
|
1850 |
bootops = (struct bootops *)NULL; |
|
1851 |
BOOTOPS_GONE(); |
|
1852 |
} |
|
1853 |
||
1854 |
static void |
|
1855 |
startup_vm(void) |
|
1856 |
{ |
|
1857 |
size_t i; |
|
1858 |
struct segmap_crargs a; |
|
1859 |
struct segkpm_crargs b; |
|
1860 |
||
1861 |
uint64_t avmem; |
|
1862 |
caddr_t va; |
|
1863 |
pgcnt_t max_phys_segkp; |
|
1864 |
int mnode; |
|
1865 |
||
423
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1866 |
extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1867 |
|
0 | 1868 |
/* |
1869 |
* get prom's mappings, create hments for them and switch |
|
1870 |
* to the kernel context. |
|
1871 |
*/ |
|
1872 |
hat_kern_setup(); |
|
1873 |
||
1874 |
/* |
|
1875 |
* Take over trap table |
|
1876 |
*/ |
|
1877 |
setup_trap_table(); |
|
1878 |
||
1879 |
/* |
|
1880 |
* Install the va>tte handler, so that the prom can handle |
|
1881 |
* misses and understand the kernel table layout in case |
|
1882 |
* we need call into the prom. |
|
1883 |
*/ |
|
1884 |
install_va_to_tte(); |
|
1885 |
||
1886 |
/* |
|
1887 |
* Set a flag to indicate that the tba has been taken over. |
|
1888 |
*/ |
|
1889 |
tba_taken_over = 1; |
|
1890 |
||
1891 |
/* initialize MMU primary context register */ |
|
1892 |
mmu_init_kcontext(); |
|
1893 |
||
1894 |
/* |
|
1895 |
* The boot cpu can now take interrupts, x-calls, x-traps |
|
1896 |
*/ |
|
1897 |
CPUSET_ADD(cpu_ready_set, CPU->cpu_id); |
|
1898 |
CPU->cpu_flags |= (CPU_READY | CPU_ENABLE | CPU_EXISTS); |
|
1899 |
||
1900 |
/* |
|
1901 |
* Set a flag to tell write_scb_int() that it can access V_TBR_WR_ADDR. |
|
1902 |
*/ |
|
1903 |
tbr_wr_addr_inited = 1; |
|
1904 |
||
1905 |
/* |
|
1906 |
* Initialize VM system, and map kernel address space. |
|
1907 |
*/ |
|
1908 |
kvm_init(); |
|
1909 |
||
1910 |
/* |
|
1911 |
* XXX4U: previously, we initialized and turned on |
|
1912 |
* the caches at this point. But of course we have |
|
1913 |
* nothing to do, as the prom has already done this |
|
1914 |
* for us -- main memory must be E$able at all times. |
|
1915 |
*/ |
|
1916 |
||
1917 |
/* |
|
1918 |
* If the following is true, someone has patched |
|
1919 |
* phsymem to be less than the number of pages that |
|
1920 |
* the system actually has. Remove pages until system |
|
1921 |
* memory is limited to the requested amount. Since we |
|
1922 |
* have allocated page structures for all pages, we |
|
1923 |
* correct the amount of memory we want to remove |
|
1924 |
* by the size of the memory used to hold page structures |
|
1925 |
* for the non-used pages. |
|
1926 |
*/ |
|
1927 |
if (physmem < npages) { |
|
1928 |
pgcnt_t diff, off; |
|
1929 |
struct page *pp; |
|
1930 |
struct seg kseg; |
|
1931 |
||
1932 |
cmn_err(CE_WARN, "limiting physmem to %ld pages", physmem); |
|
1933 |
||
1934 |
off = 0; |
|
1935 |
diff = npages - physmem; |
|
1936 |
diff -= mmu_btopr(diff * sizeof (struct page)); |
|
1937 |
kseg.s_as = &kas; |
|
1938 |
while (diff--) { |
|
1939 |
pp = page_create_va(&unused_pages_vp, (offset_t)off, |
|
1940 |
MMU_PAGESIZE, PG_WAIT | PG_EXCL, |
|
1941 |
&kseg, (caddr_t)off); |
|
1942 |
if (pp == NULL) |
|
1943 |
cmn_err(CE_PANIC, "limited physmem too much!"); |
|
1944 |
page_io_unlock(pp); |
|
1945 |
page_downgrade(pp); |
|
1946 |
availrmem--; |
|
1947 |
off += MMU_PAGESIZE; |
|
1948 |
} |
|
1949 |
} |
|
1950 |
||
1951 |
/* |
|
1952 |
* When printing memory, show the total as physmem less |
|
1953 |
* that stolen by a debugger. |
|
1954 |
*/ |
|
1955 |
cmn_err(CE_CONT, "?mem = %ldK (0x%lx000)\n", |
|
1956 |
(ulong_t)(physinstalled) << (PAGESHIFT - 10), |
|
1957 |
(ulong_t)(physinstalled) << (PAGESHIFT - 12)); |
|
1958 |
||
1959 |
avmem = (uint64_t)freemem << PAGESHIFT; |
|
1960 |
cmn_err(CE_CONT, "?avail mem = %lld\n", (unsigned long long)avmem); |
|
1961 |
||
423
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1962 |
/* For small memory systems disable automatic large pages. */ |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1963 |
if (physmem < auto_lpg_min_physmem) { |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1964 |
exec_lpg_disable = 1; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1965 |
use_brk_lpg = 0; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1966 |
use_stk_lpg = 0; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1967 |
use_zmap_lpg = 0; |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1968 |
} |
6cbc492798ce
6294795 The behavior of malloc is different between solaris 10 and nevada
davemq
parents:
0
diff
changeset
|
1969 |
|
0 | 1970 |
/* |
1971 |
* Perform platform specific freelist processing |
|
1972 |
*/ |
|
1973 |
if (&plat_freelist_process) { |
|
1974 |
for (mnode = 0; mnode < max_mem_nodes; mnode++) |
|
1975 |
if (mem_node_config[mnode].exists) |
|
1976 |
plat_freelist_process(mnode); |
|
1977 |
} |
|
1978 |
||
1979 |
/* |
|
1980 |
* Initialize the segkp segment type. We position it |
|
1981 |
* after the configured tables and buffers (whose end |
|
1982 |
* is given by econtig) and before V_WKBASE_ADDR. |
|
1983 |
* Also in this area is segkmap (size SEGMAPSIZE). |
|
1984 |
*/ |
|
1985 |
||
1986 |
/* XXX - cache alignment? */ |
|
1987 |
va = (caddr_t)SEGKPBASE; |
|
1988 |
ASSERT(((uintptr_t)va & PAGEOFFSET) == 0); |
|
1989 |
||
1990 |
max_phys_segkp = (physmem * 2); |
|
1991 |
||
1992 |
if (segkpsize < btop(SEGKPMINSIZE) || segkpsize > btop(SEGKPMAXSIZE)) { |
|
1993 |
segkpsize = btop(SEGKPDEFSIZE); |
|
1994 |
cmn_err(CE_WARN, "Illegal value for segkpsize. " |
|
1995 |
"segkpsize has been reset to %ld pages", segkpsize); |
|
1996 |
} |
|
1997 |
||
1998 |
i = ptob(MIN(segkpsize, max_phys_segkp)); |
|
1999 |
||
2000 |
rw_enter(&kas.a_lock, RW_WRITER); |
|
2001 |
if (seg_attach(&kas, va, i, segkp) < 0) |
|
2002 |
cmn_err(CE_PANIC, "startup: cannot attach segkp"); |
|
2003 |
if (segkp_create(segkp) != 0) |
|
2004 |
cmn_err(CE_PANIC, "startup: segkp_create failed"); |
|
2005 |
rw_exit(&kas.a_lock); |
|
2006 |
||
2007 |
/* |
|
2008 |
* kpm segment |
|
2009 |
*/ |
|
2010 |
segmap_kpm = kpm_enable && |
|
2011 |
segmap_kpm && PAGESIZE == MAXBSIZE; |
|
2012 |
||
2013 |
if (kpm_enable) { |
|
2014 |
rw_enter(&kas.a_lock, RW_WRITER); |
|
2015 |
||
2016 |
/* |
|
2017 |
* The segkpm virtual range range is larger than the |
|
2018 |
* actual physical memory size and also covers gaps in |
|
2019 |
* the physical address range for the following reasons: |
|
2020 |
* . keep conversion between segkpm and physical addresses |
|
2021 |
* simple, cheap and unambiguous. |
|
2022 |
* . avoid extension/shrink of the the segkpm in case of DR. |
|
2023 |
* . avoid complexity for handling of virtual addressed |
|
2024 |
* caches, segkpm and the regular mapping scheme must be |
|
2025 |
* kept in sync wrt. the virtual color of mapped pages. |
|
2026 |
* Any accesses to virtual segkpm ranges not backed by |
|
2027 |
* physical memory will fall through the memseg pfn hash |
|
2028 |
* and will be handled in segkpm_fault. |
|
2029 |
* Additional kpm_size spaces needed for vac alias prevention. |
|
2030 |
*/ |
|
2031 |
if (seg_attach(&kas, kpm_vbase, kpm_size * vac_colors, |
|
2032 |
segkpm) < 0) |
|
2033 |
cmn_err(CE_PANIC, "cannot attach segkpm"); |
|
2034 |
||
2035 |
b.prot = PROT_READ | PROT_WRITE; |
|
2036 |
b.nvcolors = shm_alignment >> MMU_PAGESHIFT; |
|
2037 |
||
2038 |
if (segkpm_create(segkpm, (caddr_t)&b) != 0) |
|
2039 |
panic("segkpm_create segkpm"); |
|
2040 |
||
2041 |
rw_exit(&kas.a_lock); |
|
2042 |
} |
|
2043 |
||
2044 |
/* |
|
2045 |
* Now create generic mapping segment. This mapping |
|
2046 |
* goes SEGMAPSIZE beyond SEGMAPBASE. But if the total |
|
2047 |
* virtual address is greater than the amount of free |
|
2048 |
* memory that is available, then we trim back the |
|
2049 |
* segment size to that amount |
|
2050 |
*/ |
|
2051 |
va = (caddr_t)SEGMAPBASE; |
|
2052 |
||
2053 |
/* |
|
2054 |
* 1201049: segkmap base address must be MAXBSIZE aligned |
|
2055 |
*/ |
|
2056 |
ASSERT(((uintptr_t)va & MAXBOFFSET) == 0); |
|
2057 |
||
2058 |
/* |
|
2059 |
* Set size of segmap to percentage of freemem at boot, |
|
2060 |
* but stay within the allowable range |
|
2061 |
* Note we take percentage before converting from pages |
|
2062 |
* to bytes to avoid an overflow on 32-bit kernels. |
|
2063 |
*/ |
|
2064 |
i = mmu_ptob((freemem * segmap_percent) / 100); |
|
2065 |
||
2066 |
if (i < MINMAPSIZE) |
|
2067 |
i = MINMAPSIZE; |
|
2068 |
||
2069 |
if (i > MIN(SEGMAPSIZE, mmu_ptob(freemem))) |
|
2070 |
i = MIN(SEGMAPSIZE, mmu_ptob(freemem)); |
|
2071 |
||
2072 |
i &= MAXBMASK; /* 1201049: segkmap size must be MAXBSIZE aligned */ |
|
2073 |
||
2074 |
rw_enter(&kas.a_lock, RW_WRITER); |
|
2075 |
if (seg_attach(&kas, va, i, segkmap) < 0) |
|
2076 |
cmn_err(CE_PANIC, "cannot attach segkmap"); |
|
2077 |
||
2078 |
a.prot = PROT_READ | PROT_WRITE; |
|
2079 |
a.shmsize = shm_alignment; |
|
2080 |
a.nfreelist = 0; /* use segmap driver defaults */ |
|
2081 |
||
2082 |
if (segmap_create(segkmap, (caddr_t)&a) != 0) |
|
2083 |
panic("segmap_create segkmap"); |
|
2084 |
rw_exit(&kas.a_lock); |
|
2085 |
||
2086 |
segdev_init(); |
|
2087 |
} |
|
2088 |
||
2089 |
static void |
|
2090 |
startup_end(void) |
|
2091 |
{ |
|
2092 |
if ((caddr_t)memlist > (caddr_t)memlist_end) |
|
2093 |
panic("memlist overflow 2"); |
|
2094 |
memlist_free_block((caddr_t)memlist, |
|
2095 |
((caddr_t)memlist_end - (caddr_t)memlist)); |
|
2096 |
memlist = NULL; |
|
2097 |
||
2098 |
/* enable page_relocation since OBP is now done */ |
|
2099 |
page_relocate_ready = 1; |
|
2100 |
||
2101 |
/* |
|
2102 |
* Perform tasks that get done after most of the VM |
|
2103 |
* initialization has been done but before the clock |
|
2104 |
* and other devices get started. |
|
2105 |
*/ |
|
2106 |
kern_setup1(); |
|
2107 |
||
2108 |
/* |
|
2109 |
* Intialize the VM arenas for allocating physically |
|
2110 |
* contiguus memory chunk for interrupt queues snd |
|
2111 |
* allocate/register boot cpu's queues, if any and |
|
2112 |
* allocate dump buffer for sun4v systems to store |
|
2113 |
* extra crash information during crash dump |
|
2114 |
*/ |
|
2115 |
contig_mem_init(); |
|
2116 |
mach_descrip_init(); |
|
2117 |
cpu_intrq_setup(CPU); |
|
2118 |
cpu_intrq_register(CPU); |
|
1077 | 2119 |
mach_htraptrace_init(); |
2120 |
mach_htraptrace_setup(CPU->cpu_id); |
|
2121 |
mach_htraptrace_configure(CPU->cpu_id); |
|
0 | 2122 |
mach_dump_buffer_init(); |
2123 |
||
2124 |
/* |
|
2125 |
* Initialize interrupt related stuff |
|
2126 |
*/ |
|
1455
b43f098fa50c
6378953 allocation of interrupt threads could be more common
andrei
parents:
1253
diff
changeset
|
2127 |
cpu_intr_alloc(CPU, NINTR_THREADS); |
0 | 2128 |
|
2129 |
(void) splzs(); /* allow hi clock ints but not zs */ |
|
2130 |
||
2131 |
/* |
|
2132 |
* Initialize errors. |
|
2133 |
*/ |
|
2134 |
error_init(); |
|
2135 |
||
2136 |
/* |
|
2137 |
* Note that we may have already used kernel bcopy before this |
|
2138 |
* point - but if you really care about this, adb the use_hw_* |
|
2139 |
* variables to 0 before rebooting. |
|
2140 |
*/ |
|
2141 |
mach_hw_copy_limit(); |
|
2142 |
||
2143 |
/* |
|
2144 |
* Install the "real" preemption guards before DDI services |
|
2145 |
* are available. |
|
2146 |
*/ |
|
2147 |
(void) prom_set_preprom(kern_preprom); |
|
2148 |
(void) prom_set_postprom(kern_postprom); |
|
2149 |
CPU->cpu_m.mutex_ready = 1; |
|
2150 |
||
2151 |
/* |
|
2152 |
* Initialize segnf (kernel support for non-faulting loads). |
|
2153 |
*/ |
|
2154 |
segnf_init(); |
|
2155 |
||
2156 |
/* |
|
2157 |
* Configure the root devinfo node. |
|
2158 |
*/ |
|
2159 |
configure(); /* set up devices */ |
|
2160 |
mach_cpu_halt_idle(); |
|
2161 |
} |
|
2162 |
||
2163 |
||
2164 |
void |
|
2165 |
post_startup(void) |
|
2166 |
{ |
|
2167 |
#ifdef PTL1_PANIC_DEBUG |
|
2168 |
extern void init_ptl1_thread(void); |
|
2169 |
#endif /* PTL1_PANIC_DEBUG */ |
|
2170 |
extern void abort_sequence_init(void); |
|
2171 |
||
2172 |
/* |
|
2173 |
* Set the system wide, processor-specific flags to be passed |
|
2174 |
* to userland via the aux vector for performance hints and |
|
2175 |
* instruction set extensions. |
|
2176 |
*/ |
|
2177 |
bind_hwcap(); |
|
2178 |
||
2179 |
/* |
|
2180 |
* Startup memory scrubber (if any) |
|
2181 |
*/ |
|
2182 |
mach_memscrub(); |
|
2183 |
||
2184 |
/* |
|
2185 |
* Allocate soft interrupt to handle abort sequence. |
|
2186 |
*/ |
|
2187 |
abort_sequence_init(); |
|
2188 |
||
2189 |
/* |
|
2190 |
* Configure the rest of the system. |
|
2191 |
* Perform forceloading tasks for /etc/system. |
|
2192 |
*/ |
|
2193 |
(void) mod_sysctl(SYS_FORCELOAD, NULL); |
|
2194 |
/* |
|
2195 |
* ON4.0: Force /proc module in until clock interrupt handle fixed |
|
2196 |
* ON4.0: This must be fixed or restated in /etc/systems. |
|
2197 |
*/ |
|
2198 |
(void) modload("fs", "procfs"); |
|
2199 |
||
2200 |
if (&load_platform_drivers) |
|
2201 |
load_platform_drivers(); |
|
2202 |
||
2203 |
/* load vis simulation module, if we are running w/fpu off */ |
|
2204 |
if (!fpu_exists) { |
|
2205 |
if (modload("misc", "vis") == -1) |
|
2206 |
halt("Can't load vis"); |
|
2207 |
} |
|
2208 |
||
2209 |
mach_fpras(); |
|
2210 |
||
2211 |
maxmem = freemem; |
|
2212 |
||
2213 |
#ifdef PTL1_PANIC_DEBUG |
|
2214 |
init_ptl1_thread(); |
|
2215 |
#endif /* PTL1_PANIC_DEBUG */ |
|
2216 |
} |
|
2217 |
||
2218 |
#ifdef PTL1_PANIC_DEBUG |
|
2219 |
int ptl1_panic_test = 0; |
|
2220 |
int ptl1_panic_xc_one_test = 0; |
|
2221 |
int ptl1_panic_xc_all_test = 0; |
|
2222 |
int ptl1_panic_xt_one_test = 0; |
|
2223 |
int ptl1_panic_xt_all_test = 0; |
|
2224 |
kthread_id_t ptl1_thread_p = NULL; |
|
2225 |
kcondvar_t ptl1_cv; |
|
2226 |
kmutex_t ptl1_mutex; |
|
2227 |
int ptl1_recurse_count_threshold = 0x40; |
|
2228 |
int ptl1_recurse_trap_threshold = 0x3d; |
|
2229 |
extern void ptl1_recurse(int, int); |
|
2230 |
extern void ptl1_panic_xt(int, int); |
|
2231 |
||
2232 |
/* |
|
2233 |
* Called once per second by timeout() to wake up |
|
2234 |
* the ptl1_panic thread to see if it should cause |
|
2235 |
* a trap to the ptl1_panic() code. |
|
2236 |
*/ |
|
2237 |
/* ARGSUSED */ |
|
2238 |
static void |
|
2239 |
ptl1_wakeup(void *arg) |
|
2240 |
{ |
|
2241 |
mutex_enter(&ptl1_mutex); |
|
2242 |
cv_signal(&ptl1_cv); |
|
2243 |
mutex_exit(&ptl1_mutex); |
|
2244 |
} |
|
2245 |
||
2246 |
/* |
|
2247 |
* ptl1_panic cross call function: |
|
2248 |
* Needed because xc_one() and xc_some() can pass |
|
2249 |
* 64 bit args but ptl1_recurse() expects ints. |
|
2250 |
*/ |
|
2251 |
static void |
|
2252 |
ptl1_panic_xc(void) |
|
2253 |
{ |
|
2254 |
ptl1_recurse(ptl1_recurse_count_threshold, |
|
2255 |
ptl1_recurse_trap_threshold); |
|
2256 |
} |
|
2257 |
||
2258 |
/* |
|
2259 |
* The ptl1 thread waits for a global flag to be set |
|
2260 |
* and uses the recurse thresholds to set the stack depth |
|
2261 |
* to cause a ptl1_panic() directly via a call to ptl1_recurse |
|
2262 |
* or indirectly via the cross call and cross trap functions. |
|
2263 |
* |
|
2264 |
* This is useful testing stack overflows and normal |
|
2265 |
* ptl1_panic() states with a know stack frame. |
|
2266 |
* |
|
2267 |
* ptl1_recurse() is an asm function in ptl1_panic.s that |
|
2268 |
* sets the {In, Local, Out, and Global} registers to a |
|
2269 |
* know state on the stack and just prior to causing a |
|
2270 |
* test ptl1_panic trap. |
|
2271 |
*/ |
|
2272 |
static void |
|
2273 |
ptl1_thread(void) |
|
2274 |
{ |
|
2275 |
mutex_enter(&ptl1_mutex); |
|
2276 |
while (ptl1_thread_p) { |
|
2277 |
cpuset_t other_cpus; |
|
2278 |
int cpu_id; |
|
2279 |
int my_cpu_id; |
|
2280 |
int target_cpu_id; |
|
2281 |
int target_found; |
|
2282 |
||
2283 |
if (ptl1_panic_test) { |
|
2284 |
ptl1_recurse(ptl1_recurse_count_threshold, |
|
2285 |
ptl1_recurse_trap_threshold); |
|
2286 |
} |
|
2287 |
||
2288 |
/* |
|
2289 |
* Find potential targets for x-call and x-trap, |
|
2290 |
* if any exist while preempt is disabled we |
|
2291 |
* start a ptl1_panic if requested via a |
|
2292 |
* globals. |
|
2293 |
*/ |
|
2294 |
kpreempt_disable(); |
|
2295 |
my_cpu_id = CPU->cpu_id; |
|
2296 |
other_cpus = cpu_ready_set; |
|
2297 |
CPUSET_DEL(other_cpus, CPU->cpu_id); |
|
2298 |
target_found = 0; |
|
2299 |
if (!CPUSET_ISNULL(other_cpus)) { |
|
2300 |
/* |
|
2301 |
* Pick the first one |
|
2302 |
*/ |
|
2303 |
for (cpu_id = 0; cpu_id < NCPU; cpu_id++) { |
|
2304 |
if (cpu_id == my_cpu_id) |
|
2305 |
continue; |
|
2306 |
||
2307 |
if (CPU_XCALL_READY(cpu_id)) { |
|
2308 |
target_cpu_id = cpu_id; |
|
2309 |
target_found = 1; |
|
2310 |
break; |
|
2311 |
} |
|
2312 |
} |
|
2313 |
ASSERT(target_found); |
|
2314 |
||
2315 |
if (ptl1_panic_xc_one_test) { |
|
2316 |
xc_one(target_cpu_id, |
|
2317 |
(xcfunc_t *)ptl1_panic_xc, 0, 0); |
|
2318 |
} |
|
2319 |
if (ptl1_panic_xc_all_test) { |
|
2320 |
xc_some(other_cpus, |
|
2321 |
(xcfunc_t *)ptl1_panic_xc, 0, 0); |
|
2322 |
} |
|
2323 |
if (ptl1_panic_xt_one_test) { |
|
2324 |
xt_one(target_cpu_id, |
|
2325 |
(xcfunc_t *)ptl1_panic_xt, 0, 0); |
|
2326 |
} |
|
2327 |
if (ptl1_panic_xt_all_test) { |
|
2328 |
xt_some(other_cpus, |
|
2329 |
(xcfunc_t *)ptl1_panic_xt, 0, 0); |
|
2330 |
} |
|
2331 |
} |
|
2332 |
kpreempt_enable(); |
|
2333 |
(void) timeout(ptl1_wakeup, NULL, hz); |
|
2334 |
(void) cv_wait(&ptl1_cv, &ptl1_mutex); |
|
2335 |
} |
|
2336 |
mutex_exit(&ptl1_mutex); |
|
2337 |
} |
|
2338 |
||
2339 |
/* |
|
2340 |
* Called during early startup to create the ptl1_thread |
|
2341 |
*/ |
|
2342 |
void |
|
2343 |
init_ptl1_thread(void) |
|
2344 |
{ |
|
2345 |
ptl1_thread_p = thread_create(NULL, 0, ptl1_thread, NULL, 0, |
|
2346 |
&p0, TS_RUN, 0); |
|
2347 |
} |
|
2348 |
#endif /* PTL1_PANIC_DEBUG */ |
|
2349 |
||
2350 |
||
2351 |
/* |
|
2352 |
* Add to a memory list. |
|
2353 |
* start = start of new memory segment |
|
2354 |
* len = length of new memory segment in bytes |
|
2355 |
* memlistp = pointer to array of available memory segment structures |
|
2356 |
* curmemlistp = memory list to which to add segment. |
|
2357 |
*/ |
|
2358 |
static void |
|
2359 |
memlist_add(uint64_t start, uint64_t len, struct memlist **memlistp, |
|
2360 |
struct memlist **curmemlistp) |
|
2361 |
{ |
|
2362 |
struct memlist *new; |
|
2363 |
||
2364 |
new = *memlistp; |
|
2365 |
new->address = start; |
|
2366 |
new->size = len; |
|
2367 |
*memlistp = new + 1; |
|
2368 |
||
2369 |
memlist_insert(new, curmemlistp); |
|
2370 |
} |
|
2371 |
||
2372 |
/* |
|
2373 |
* In the case of architectures that support dynamic addition of |
|
2374 |
* memory at run-time there are two cases where memsegs need to |
|
2375 |
* be initialized and added to the memseg list. |
|
2376 |
* 1) memsegs that are constructed at startup. |
|
2377 |
* 2) memsegs that are constructed at run-time on |
|
2378 |
* hot-plug capable architectures. |
|
2379 |
* This code was originally part of the function kphysm_init(). |
|
2380 |
*/ |
|
2381 |
||
2382 |
static void |
|
2383 |
memseg_list_add(struct memseg *memsegp) |
|
2384 |
{ |
|
2385 |
struct memseg **prev_memsegp; |
|
2386 |
pgcnt_t num; |
|
2387 |
||
2388 |
/* insert in memseg list, decreasing number of pages order */ |
|
2389 |
||
2390 |
num = MSEG_NPAGES(memsegp); |
|
2391 |
||
2392 |
for (prev_memsegp = &memsegs; *prev_memsegp; |
|
2393 |
prev_memsegp = &((*prev_memsegp)->next)) { |
|
2394 |
if (num > MSEG_NPAGES(*prev_memsegp)) |
|
2395 |
break; |
|
2396 |
} |
|
2397 |
||
2398 |
memsegp->next = *prev_memsegp; |
|
2399 |
*prev_memsegp = memsegp; |
|
2400 |
||
2401 |
if (kpm_enable) { |
|
2402 |
memsegp->nextpa = (memsegp->next) ? |
|
2403 |
va_to_pa(memsegp->next) : MSEG_NULLPTR_PA; |
|
2404 |
||
2405 |
if (prev_memsegp != &memsegs) { |
|
2406 |
struct memseg *msp; |
|
2407 |
msp = (struct memseg *)((caddr_t)prev_memsegp - |
|
2408 |
offsetof(struct memseg, next)); |
|
2409 |
msp->nextpa = va_to_pa(memsegp); |
|
2410 |
} else { |
|
2411 |
memsegspa = va_to_pa(memsegs); |
|
2412 |
} |
|
2413 |
} |
|
2414 |
} |
|
2415 |
||
2416 |
/* |
|
2417 |
* PSM add_physmem_cb(). US-II and newer processors have some |
|
2418 |
* flavor of the prefetch capability implemented. We exploit |
|
2419 |
* this capability for optimum performance. |
|
2420 |
*/ |
|
2421 |
#define PREFETCH_BYTES 64 |
|
2422 |
||
2423 |
void |
|
2424 |
add_physmem_cb(page_t *pp, pfn_t pnum) |
|
2425 |
{ |
|
2426 |
extern void prefetch_page_w(void *); |
|
2427 |
||
2428 |
pp->p_pagenum = pnum; |
|
2429 |
||
2430 |
/* |
|
2431 |
* Prefetch one more page_t into E$. To prevent future |
|
2432 |
* mishaps with the sizeof(page_t) changing on us, we |
|
2433 |
* catch this on debug kernels if we can't bring in the |
|
2434 |
* entire hpage with 2 PREFETCH_BYTES reads. See |
|
2435 |
* also, sun4u/cpu/cpu_module.c |
|
2436 |
*/ |
|
2437 |
/*LINTED*/ |
|
2438 |
ASSERT(sizeof (page_t) <= 2*PREFETCH_BYTES); |
|
2439 |
prefetch_page_w((char *)pp); |
|
2440 |
} |
|
2441 |
||
2442 |
/* |
|
2443 |
* kphysm_init() tackles the problem of initializing physical memory. |
|
2444 |
* The old startup made some assumptions about the kernel living in |
|
2445 |
* physically contiguous space which is no longer valid. |
|
2446 |
*/ |
|
2447 |
static void |
|
2448 |
kphysm_init(page_t *pp, struct memseg *memsegp, pgcnt_t npages, |
|
2449 |
uintptr_t kpm_pp, pgcnt_t kpm_npages) |
|
2450 |
{ |
|
2451 |
struct memlist *pmem; |
|
2452 |
struct memseg *msp; |
|
2453 |
pfn_t base; |
|
2454 |
pgcnt_t num; |
|
2455 |
pfn_t lastseg_pages_end = 0; |
|
2456 |
pgcnt_t nelem_used = 0; |
|
2457 |
||
2458 |
ASSERT(page_hash != NULL && page_hashsz != 0); |
|
2459 |
||
2460 |
msp = memsegp; |
|
2461 |
for (pmem = phys_avail; pmem && npages; pmem = pmem->next) { |
|
2462 |
||
2463 |
/* |
|
2464 |
* Build the memsegs entry |
|
2465 |
*/ |
|
2466 |
num = btop(pmem->size); |
|
2467 |
if (num > npages) |
|
2468 |
num = npages; |
|
2469 |
npages -= num; |
|
2470 |
base = btop(pmem->address); |
|
2471 |
||
2472 |
msp->pages = pp; |
|
2473 |
msp->epages = pp + num; |
|
2474 |
msp->pages_base = base; |
|
2475 |
msp->pages_end = base + num; |
|
2476 |
||
2477 |
if (kpm_enable) { |
|
2478 |
pfn_t pbase_a; |
|
2479 |
pfn_t pend_a; |
|
2480 |
pfn_t prev_pend_a; |
|
2481 |
pgcnt_t nelem; |
|
2482 |
||
2483 |
msp->pagespa = va_to_pa(pp); |
|
2484 |
msp->epagespa = va_to_pa(pp + num); |
|
2485 |
pbase_a = kpmptop(ptokpmp(base)); |
|
2486 |
pend_a = kpmptop(ptokpmp(base + num - 1)) + kpmpnpgs; |
|
2487 |
nelem = ptokpmp(pend_a - pbase_a); |
|
2488 |
msp->kpm_nkpmpgs = nelem; |
|
2489 |
msp->kpm_pbase = pbase_a; |
|
2490 |
if (lastseg_pages_end) { |
|
2491 |
/* |
|
2492 |
* Assume phys_avail is in ascending order |
|
2493 |
* of physical addresses. |
|
2494 |
*/ |
|
2495 |
ASSERT(base + num > lastseg_pages_end); |
|
2496 |
prev_pend_a = kpmptop( |
|
2497 |
ptokpmp(lastseg_pages_end - 1)) + kpmpnpgs; |
|
2498 |
||
2499 |
if (prev_pend_a > pbase_a) { |
|
2500 |
/* |
|
2501 |
* Overlap, more than one memseg may |
|
2502 |
* point to the same kpm_page range. |
|
2503 |
*/ |
|
2504 |
if (kpm_smallpages == 0) { |
|
2505 |
msp->kpm_pages = |
|
2506 |
(kpm_page_t *)kpm_pp - 1; |
|
2507 |
kpm_pp = (uintptr_t) |
|
2508 |
((kpm_page_t *)kpm_pp |
|
2509 |
+ nelem - 1); |
|
2510 |
} else { |
|
2511 |
msp->kpm_spages = |
|
2512 |
(kpm_spage_t *)kpm_pp - 1; |
|
2513 |
kpm_pp = (uintptr_t) |
|
2514 |
((kpm_spage_t *)kpm_pp |
|
2515 |
+ nelem - 1); |
|
2516 |
} |
|
2517 |
nelem_used += nelem - 1; |
|
2518 |
||
2519 |
} else { |
|
2520 |
if (kpm_smallpages == 0) { |
|
2521 |
msp->kpm_pages = |
|
2522 |
(kpm_page_t *)kpm_pp; |
|
2523 |
kpm_pp = (uintptr_t) |
|
2524 |
((kpm_page_t *)kpm_pp |
|
2525 |
+ nelem); |
|
2526 |
} else { |
|
2527 |
msp->kpm_spages = |
|
2528 |
(kpm_spage_t *)kpm_pp; |
|
2529 |
kpm_pp = (uintptr_t) |
|
2530 |
((kpm_spage_t *) |
|
2531 |
kpm_pp + nelem); |
|
2532 |
} |
|
2533 |
nelem_used += nelem; |
|
2534 |
} |
|
2535 |
||
2536 |
} else { |
|
2537 |
if (kpm_smallpages == 0) { |
|
2538 |
msp->kpm_pages = (kpm_page_t *)kpm_pp; |
|
2539 |
kpm_pp = (uintptr_t) |
|
2540 |
((kpm_page_t *)kpm_pp + nelem); |
|
2541 |
} else { |
|
2542 |
msp->kpm_spages = (kpm_spage_t *)kpm_pp; |
|
2543 |
kpm_pp = (uintptr_t) |
|
2544 |
((kpm_spage_t *)kpm_pp + nelem); |
|
2545 |
} |
|
2546 |
nelem_used = nelem; |
|
2547 |
} |
|
2548 |
||
2549 |
if (nelem_used > kpm_npages) |
|
2550 |
panic("kphysm_init: kpm_pp overflow\n"); |
|
2551 |
||
2552 |
msp->kpm_pagespa = va_to_pa(msp->kpm_pages); |
|
2553 |
lastseg_pages_end = msp->pages_end; |
|
2554 |
} |
|
2555 |
||
2556 |
memseg_list_add(msp); |
|
2557 |
||
2558 |
/* |
|
2559 |
* add_physmem() initializes the PSM part of the page |
|
2560 |
* struct by calling the PSM back with add_physmem_cb(). |
|
2561 |
* In addition it coalesces pages into larger pages as |
|
2562 |
* it initializes them. |
|
2563 |
*/ |
|
2564 |
add_physmem(pp, num, base); |
|
2565 |
pp += num; |
|
2566 |
msp++; |
|
2567 |
} |
|
2568 |
||
2569 |
build_pfn_hash(); |
|
2570 |
} |
|
2571 |
||
2572 |
/* |
|
2573 |
* Kernel VM initialization. |
|
2574 |
* Assumptions about kernel address space ordering: |
|
2575 |
* (1) gap (user space) |
|
2576 |
* (2) kernel text |
|
2577 |
* (3) kernel data/bss |
|
2578 |
* (4) gap |
|
2579 |
* (5) kernel data structures |
|
2580 |
* (6) gap |
|
2581 |
* (7) debugger (optional) |
|
2582 |
* (8) monitor |
|
2583 |
* (9) gap (possibly null) |
|
2584 |
* (10) dvma |
|
2585 |
* (11) devices |
|
2586 |
*/ |
|
2587 |
static void |
|
2588 |
kvm_init(void) |
|
2589 |
{ |
|
2590 |
/* |
|
2591 |
* Put the kernel segments in kernel address space. |
|
2592 |
*/ |
|
2593 |
rw_enter(&kas.a_lock, RW_WRITER); |
|
2594 |
as_avlinit(&kas); |
|
2595 |
||
2596 |
(void) seg_attach(&kas, (caddr_t)KERNELBASE, |
|
2597 |
(size_t)(e_moddata - KERNELBASE), &ktextseg); |
|
2598 |
(void) segkmem_create(&ktextseg); |
|
2599 |
||
2600 |
(void) seg_attach(&kas, (caddr_t)(KERNELBASE + MMU_PAGESIZE4M), |
|
2601 |
(size_t)(MMU_PAGESIZE4M), &ktexthole); |
|
2602 |
(void) segkmem_create(&ktexthole); |
|
2603 |
||
2604 |
(void) seg_attach(&kas, (caddr_t)valloc_base, |
|
2605 |
(size_t)(econtig32 - valloc_base), &kvalloc); |
|
2606 |
(void) segkmem_create(&kvalloc); |
|
2607 |
||
2608 |
if (kmem64_base) { |
|
2609 |
(void) seg_attach(&kas, (caddr_t)kmem64_base, |
|
2610 |
(size_t)(kmem64_end - kmem64_base), &kmem64); |
|
2611 |
(void) segkmem_create(&kmem64); |
|
2612 |
} |
|
2613 |
||
2614 |
/* |
|
2615 |
* We're about to map out /boot. This is the beginning of the |
|
2616 |
* system resource management transition. We can no longer |
|
2617 |
* call into /boot for I/O or memory allocations. |
|
2618 |
*/ |
|
2619 |
(void) seg_attach(&kas, kernelheap, ekernelheap - kernelheap, &kvseg); |
|
2620 |
(void) segkmem_create(&kvseg); |
|
2621 |
hblk_alloc_dynamic = 1; |
|
2622 |
||
2623 |
/* |
|
2624 |
* we need to preallocate pages for DR operations before enabling large |
|
2625 |
* page kernel heap because of memseg_remap_init() hat_unload() hack. |
|
2626 |
*/ |
|
2627 |
memseg_remap_init(); |
|
2628 |
||
2629 |
/* at this point we are ready to use large page heap */ |
|
2630 |
segkmem_heap_lp_init(); |
|
2631 |
||
2632 |
(void) seg_attach(&kas, (caddr_t)SYSBASE32, SYSLIMIT32 - SYSBASE32, |
|
2633 |
&kvseg32); |
|
2634 |
(void) segkmem_create(&kvseg32); |
|
2635 |
||
2636 |
/* |
|
2637 |
* Create a segment for the debugger. |
|
2638 |
*/ |
|
2639 |
(void) seg_attach(&kas, (caddr_t)SEGDEBUGBASE, (size_t)SEGDEBUGSIZE, |
|
2640 |
&kdebugseg); |
|
2641 |
(void) segkmem_create(&kdebugseg); |
|
2642 |
||
2643 |
rw_exit(&kas.a_lock); |
|
2644 |
} |
|
2645 |
||
2646 |
char obp_tte_str[] = |
|
2647 |
"h# %x constant MMU_PAGESHIFT " |
|
2648 |
"h# %x constant TTE8K " |
|
2649 |
"h# %x constant SFHME_SIZE " |
|
2650 |
"h# %x constant SFHME_TTE " |
|
2651 |
"h# %x constant HMEBLK_TAG " |
|
2652 |
"h# %x constant HMEBLK_NEXT " |
|
2653 |
"h# %x constant HMEBLK_MISC " |
|
2654 |
"h# %x constant HMEBLK_HME1 " |
|
2655 |
"h# %x constant NHMENTS " |
|
2656 |
"h# %x constant HBLK_SZMASK " |
|
2657 |
"h# %x constant HBLK_RANGE_SHIFT " |
|
2658 |
"h# %x constant HMEBP_HBLK " |
|
2659 |
"h# %x constant HMEBUCKET_SIZE " |
|
2660 |
"h# %x constant HTAG_SFMMUPSZ " |
|
2661 |
"h# %x constant HTAG_REHASHSZ " |
|
2662 |
"h# %x constant mmu_hashcnt " |
|
2663 |
"h# %p constant uhme_hash " |
|
2664 |
"h# %p constant khme_hash " |
|
2665 |
"h# %x constant UHMEHASH_SZ " |
|
2666 |
"h# %x constant KHMEHASH_SZ " |
|
2667 |
"h# %p constant KHATID " |
|
2668 |
"h# %x constant CTX_SIZE " |
|
2669 |
"h# %x constant CTX_SFMMU " |
|
2670 |
"h# %p constant ctxs " |
|
2671 |
"h# %x constant ASI_MEM " |
|
2672 |
||
2673 |
": PHYS-X@ ( phys -- data ) " |
|
2674 |
" ASI_MEM spacex@ " |
|
2675 |
"; " |
|
2676 |
||
2677 |
": PHYS-W@ ( phys -- data ) " |
|
2678 |
" ASI_MEM spacew@ " |
|
2679 |
"; " |
|
2680 |
||
2681 |
": PHYS-L@ ( phys -- data ) " |
|
2682 |
" ASI_MEM spaceL@ " |
|
2683 |
"; " |
|
2684 |
||
2685 |
": TTE_PAGE_SHIFT ( ttesz -- hmeshift ) " |
|
2686 |
" 3 * MMU_PAGESHIFT + " |
|
2687 |
"; " |
|
2688 |
||
2689 |
": TTE_IS_VALID ( ttep -- flag ) " |
|
2690 |
" PHYS-X@ 0< " |
|
2691 |
"; " |
|
2692 |
||
2693 |
": HME_HASH_SHIFT ( ttesz -- hmeshift ) " |
|
2694 |
" dup TTE8K = if " |
|
2695 |
" drop HBLK_RANGE_SHIFT " |
|
2696 |
" else " |
|
2697 |
" TTE_PAGE_SHIFT " |
|
2698 |
" then " |
|
2699 |
"; " |
|
2700 |
||
2701 |
": HME_HASH_BSPAGE ( addr hmeshift -- bspage ) " |
|
2702 |
" tuck >> swap MMU_PAGESHIFT - << " |
|
2703 |
"; " |
|
2704 |
||
2705 |
": HME_HASH_FUNCTION ( sfmmup addr hmeshift -- hmebp ) " |
|
2706 |
" >> over xor swap ( hash sfmmup ) " |
|
2707 |
" KHATID <> if ( hash ) " |
|
2708 |
" UHMEHASH_SZ and ( bucket ) " |
|
2709 |
" HMEBUCKET_SIZE * uhme_hash + ( hmebp ) " |
|
2710 |
" else ( hash ) " |
|
2711 |
" KHMEHASH_SZ and ( bucket ) " |
|
2712 |
" HMEBUCKET_SIZE * khme_hash + ( hmebp ) " |
|
2713 |
" then ( hmebp ) " |
|
2714 |
"; " |
|
2715 |
||
2716 |
": HME_HASH_TABLE_SEARCH " |
|
2717 |
" ( sfmmup hmebp hblktag -- sfmmup null | sfmmup hmeblkp ) " |
|
2718 |
" >r hmebp_hblk + phys-x@ begin ( sfmmup hmeblkp ) ( r: hblktag ) " |
|
2719 |
" dup if ( sfmmup hmeblkp ) ( r: hblktag ) " |
|
2720 |
" dup hmeblk_tag + phys-x@ r@ = if ( sfmmup hmeblkp ) " |
|
2721 |
" dup hmeblk_tag + 8 + phys-x@ 2 pick = if " |
|
2722 |
" true ( sfmmup hmeblkp true ) ( r: hblktag ) " |
|
2723 |
" else " |
|
2724 |
" hmeblk_next + phys-x@ false " |
|
2725 |
" ( sfmmup hmeblkp false ) ( r: hblktag ) " |
|
2726 |
" then " |
|
2727 |
" else " |
|
2728 |
" hmeblk_next + phys-x@ false " |
|
2729 |
" ( sfmmup hmeblkp false ) ( r: hblktag ) " |
|
2730 |
" then " |
|
2731 |
" else " |
|
2732 |
" true " |
|
2733 |
" then " |
|
2734 |
" until r> drop " |
|
2735 |
"; " |
|
2736 |
||
2737 |
": CNUM_TO_SFMMUP ( cnum -- sfmmup ) " |
|
2738 |
" CTX_SIZE * ctxs + CTX_SFMMU + " |
|
2739 |
"x@ " |
|
2740 |
"; " |
|
2741 |
||
2742 |
": HME_HASH_TAG ( sfmmup rehash addr -- hblktag ) " |
|
2743 |
" over HME_HASH_SHIFT HME_HASH_BSPAGE ( sfmmup rehash bspage ) " |
|
2744 |
" HTAG_REHASHSZ << or nip ( hblktag ) " |
|
2745 |
"; " |
|
2746 |
||
2747 |
": HBLK_TO_TTEP ( hmeblkp addr -- ttep ) " |
|
2748 |
" over HMEBLK_MISC + PHYS-L@ HBLK_SZMASK and ( hmeblkp addr ttesz ) " |
|
2749 |
" TTE8K = if ( hmeblkp addr ) " |
|
2750 |
" MMU_PAGESHIFT >> NHMENTS 1- and ( hmeblkp hme-index ) " |
|
2751 |
" else ( hmeblkp addr ) " |
|
2752 |
" drop 0 ( hmeblkp 0 ) " |
|
2753 |
" then ( hmeblkp hme-index ) " |
|
2754 |
" SFHME_SIZE * + HMEBLK_HME1 + ( hmep ) " |
|
2755 |
" SFHME_TTE + ( ttep ) " |
|
2756 |
"; " |
|
2757 |
||
2758 |
": unix-tte ( addr cnum -- false | tte-data true ) " |
|
2759 |
" CNUM_TO_SFMMUP ( addr sfmmup ) " |
|
2760 |
" mmu_hashcnt 1+ 1 do ( addr sfmmup ) " |
|
2761 |
" 2dup swap i HME_HASH_SHIFT " |
|
2762 |
"( addr sfmmup sfmmup addr hmeshift ) " |
|
2763 |
" HME_HASH_FUNCTION ( addr sfmmup hmebp ) " |
|
2764 |
" over i 4 pick " |
|
2765 |
"( addr sfmmup hmebp sfmmup rehash addr ) " |
|
2766 |
" HME_HASH_TAG ( addr sfmmup hmebp hblktag ) " |
|
2767 |
" HME_HASH_TABLE_SEARCH " |
|
2768 |
"( addr sfmmup { null | hmeblkp } ) " |
|
2769 |
" ?dup if ( addr sfmmup hmeblkp ) " |
|
2770 |
" nip swap HBLK_TO_TTEP ( ttep ) " |
|
2771 |
" dup TTE_IS_VALID if ( valid-ttep ) " |
|
2772 |
" PHYS-X@ true ( tte-data true ) " |
|
2773 |
" else ( invalid-tte ) " |
|
2774 |
" drop false ( false ) " |
|
2775 |
" then ( false | tte-data true ) " |
|
2776 |
" unloop exit ( false | tte-data true ) " |
|
2777 |
" then ( addr sfmmup ) " |
|
2778 |
" loop ( addr sfmmup ) " |
|
2779 |
" 2drop false ( false ) " |
|
2780 |
"; " |
|
2781 |
; |
|
2782 |
||
2783 |
void |
|
2784 |
create_va_to_tte(void) |
|
2785 |
{ |
|
2786 |
char *bp; |
|
2787 |
extern int khmehash_num, uhmehash_num; |
|
2788 |
extern struct hmehash_bucket *khme_hash, *uhme_hash; |
|
2789 |
||
2790 |
#define OFFSET(type, field) ((uintptr_t)(&((type *)0)->field)) |
|
2791 |
||
2792 |
bp = (char *)kobj_zalloc(MMU_PAGESIZE, KM_SLEEP); |
|
2793 |
||
2794 |
/* |
|
2795 |
* Teach obp how to parse our sw ttes. |
|
2796 |
*/ |
|
2797 |
(void) sprintf(bp, obp_tte_str, |
|
2798 |
MMU_PAGESHIFT, |
|
2799 |
TTE8K, |
|
2800 |
sizeof (struct sf_hment), |
|
2801 |
OFFSET(struct sf_hment, hme_tte), |
|
2802 |
OFFSET(struct hme_blk, hblk_tag), |
|
2803 |
OFFSET(struct hme_blk, hblk_nextpa), |
|
2804 |
OFFSET(struct hme_blk, hblk_misc), |
|
2805 |
OFFSET(struct hme_blk, hblk_hme), |
|
2806 |
NHMENTS, |
|
2807 |
HBLK_SZMASK, |
|
2808 |
HBLK_RANGE_SHIFT, |
|
2809 |
OFFSET(struct hmehash_bucket, hmeh_nextpa), |
|
2810 |
sizeof (struct hmehash_bucket), |
|
2811 |
HTAG_SFMMUPSZ, |
|
2812 |
HTAG_REHASHSZ, |
|
2813 |
mmu_hashcnt, |
|
2814 |
(caddr_t)va_to_pa((caddr_t)uhme_hash), |
|
2815 |
(caddr_t)va_to_pa((caddr_t)khme_hash), |
|
2816 |
UHMEHASH_SZ, |
|
2817 |
KHMEHASH_SZ, |
|
2818 |
KHATID, |
|
2819 |
sizeof (struct ctx), |
|
2820 |
OFFSET(struct ctx, ctx_sfmmu), |
|
2821 |
ctxs, |
|
2822 |
ASI_MEM); |
|
2823 |
prom_interpret(bp, 0, 0, 0, 0, 0); |
|
2824 |
||
2825 |
kobj_free(bp, MMU_PAGESIZE); |
|
2826 |
} |
|
2827 |
||
2828 |
void |
|
2829 |
install_va_to_tte(void) |
|
2830 |
{ |
|
2831 |
/* |
|
2832 |
* advise prom that he can use unix-tte |
|
2833 |
*/ |
|
2834 |
prom_interpret("' unix-tte is va>tte-data", 0, 0, 0, 0, 0); |
|
2835 |
} |
|
2836 |
||
2837 |
||
1253 | 2838 |
/* |
2839 |
* Because kmdb links prom_stdout_is_framebuffer into its own |
|
2840 |
* module, we add "device-type=display" here for /os-io node, so that |
|
2841 |
* prom_stdout_is_framebuffer still works corrrectly after /os-io node |
|
2842 |
* is registered into OBP. |
|
2843 |
*/ |
|
0 | 2844 |
static char *create_node = |
2845 |
"root-device " |
|
2846 |
"new-device " |
|
2847 |
"\" os-io\" device-name " |
|
1253 | 2848 |
"\" display\" device-type " |
0 | 2849 |
": cb-r/w ( adr,len method$ -- #read/#written ) " |
2850 |
" 2>r swap 2 2r> ['] $callback catch if " |
|
2851 |
" 2drop 3drop 0 " |
|
2852 |
" then " |
|
2853 |
"; " |
|
2854 |
": read ( adr,len -- #read ) " |
|
2855 |
" \" read\" ['] cb-r/w catch if 2drop 2drop -2 exit then " |
|
2856 |
" ( retN ... ret1 N ) " |
|
2857 |
" ?dup if " |
|
2858 |
" swap >r 1- 0 ?do drop loop r> " |
|
2859 |
" else " |
|
2860 |
" -2 " |
|
2861 |
" then l->n " |
|
2862 |
"; " |
|
2863 |
": write ( adr,len -- #written ) " |
|
2864 |
" \" write\" ['] cb-r/w catch if 2drop 2drop 0 exit then " |
|
2865 |
" ( retN ... ret1 N ) " |
|
2866 |
" ?dup if " |
|
2867 |
" swap >r 1- 0 ?do drop loop r> " |
|
2868 |
" else " |
|
2869 |
" 0 " |
|
2870 |
" then " |
|
2871 |
"; " |
|
2872 |
": poll-tty ( -- ) ; " |
|
2873 |
": install-abort ( -- ) ['] poll-tty d# 10 alarm ; " |
|
2874 |
": remove-abort ( -- ) ['] poll-tty 0 alarm ; " |
|
2875 |
": cb-give/take ( $method -- ) " |
|
2876 |
" 0 -rot ['] $callback catch ?dup if " |
|
2877 |
" >r 2drop 2drop r> throw " |
|
2878 |
" else " |
|
2879 |
" 0 ?do drop loop " |
|
2880 |
" then " |
|
2881 |
"; " |
|
2882 |
": give ( -- ) \" exit-input\" cb-give/take ; " |
|
2883 |
": take ( -- ) \" enter-input\" cb-give/take ; " |
|
2884 |
": open ( -- ok? ) true ; " |
|
2885 |
": close ( -- ) ; " |
|
2886 |
"finish-device " |
|
2887 |
"device-end "; |
|
2888 |
||
2889 |
/* |
|
1253 | 2890 |
* Create the OBP input/output node (FCode serial driver). |
2891 |
* It is needed for both USB console keyboard and for |
|
2892 |
* the kernel terminal emulator. It is too early to check for a |
|
2893 |
* kernel console compatible framebuffer now, so we create this |
|
2894 |
* so that we're ready if we need to enable kernel terminal emulation. |
|
0 | 2895 |
* |
1253 | 2896 |
* When the USB software takes over the input device at the time |
2897 |
* consconfig runs, OBP's stdin is redirected to this node. |
|
2898 |
* Whenever the FORTH user interface is used after this switch, |
|
2899 |
* the node will call back into the kernel for console input. |
|
2900 |
* If a serial device such as ttya or a UART with a Type 5 keyboard |
|
2901 |
* attached is used, OBP takes over the serial device when the system |
|
2902 |
* goes to the debugger after the system is booted. This sharing |
|
2903 |
* of the relatively simple serial device is difficult but possible. |
|
2904 |
* Sharing the USB host controller is impossible due its complexity. |
|
2905 |
* |
|
2906 |
* Similarly to USB keyboard input redirection, after consconfig_dacf |
|
2907 |
* configures a kernel console framebuffer as the standard output |
|
2908 |
* device, OBP's stdout is switched to to vector through the |
|
2909 |
* /os-io node into the kernel terminal emulator. |
|
0 | 2910 |
*/ |
2911 |
static void |
|
1253 | 2912 |
startup_create_io_node(void) |
0 | 2913 |
{ |
1253 | 2914 |
prom_interpret(create_node, 0, 0, 0, 0, 0); |
0 | 2915 |
} |
2916 |
||
2917 |
||
2918 |
static void |
|
2919 |
do_prom_version_check(void) |
|
2920 |
{ |
|
2921 |
int i; |
|
789 | 2922 |
pnode_t node; |
0 | 2923 |
char buf[64]; |
2924 |
static char drev[] = "Down-rev firmware detected%s\n" |
|
2925 |
"\tPlease upgrade to the following minimum version:\n" |
|
2926 |
"\t\t%s\n"; |
|
2927 |
||
2928 |
i = prom_version_check(buf, sizeof (buf), &node); |
|
2929 |
||
2930 |
if (i == PROM_VER64_OK) |
|
2931 |
return; |
|
2932 |
||
2933 |
if (i == PROM_VER64_UPGRADE) { |
|
2934 |
cmn_err(CE_WARN, drev, "", buf); |
|
2935 |
||
2936 |
#ifdef DEBUG |
|
2937 |
prom_enter_mon(); /* Type 'go' to continue */ |
|
2938 |
cmn_err(CE_WARN, "Booting with down-rev firmware\n"); |
|
2939 |
return; |
|
2940 |
#else |
|
2941 |
halt(0); |
|
2942 |
#endif |
|
2943 |
} |
|
2944 |
||
2945 |
/* |
|
2946 |
* The other possibility is that this is a server running |
|
2947 |
* good firmware, but down-rev firmware was detected on at |
|
2948 |
* least one other cpu board. We just complain if we see |
|
2949 |
* that. |
|
2950 |
*/ |
|
2951 |
cmn_err(CE_WARN, drev, " on one or more CPU boards", buf); |
|
2952 |
} |
|
2953 |
||
2954 |
static void |
|
2955 |
kpm_init() |
|
2956 |
{ |
|
2957 |
kpm_pgshft = (kpm_smallpages == 0) ? MMU_PAGESHIFT4M : MMU_PAGESHIFT; |
|
2958 |
kpm_pgsz = 1ull << kpm_pgshft; |
|
2959 |
kpm_pgoff = kpm_pgsz - 1; |
|
2960 |
kpmp2pshft = kpm_pgshft - PAGESHIFT; |
|
2961 |
kpmpnpgs = 1 << kpmp2pshft; |
|
2962 |
ASSERT(((uintptr_t)kpm_vbase & (kpm_pgsz - 1)) == 0); |
|
2963 |
} |
|
2964 |
||
2965 |
void |
|
2966 |
kpm_npages_setup(int memblocks) |
|
2967 |
{ |
|
2968 |
/* |
|
2969 |
* npages can be scattered in a maximum of 'memblocks' |
|
2970 |
*/ |
|
2971 |
kpm_npages = ptokpmpr(npages) + memblocks; |
|
2972 |
} |
|
2973 |
||
2974 |
/* |
|
2975 |
* Must be defined in platform dependent code. |
|
2976 |
*/ |
|
2977 |
extern caddr_t modtext; |
|
2978 |
extern size_t modtext_sz; |
|
2979 |
extern caddr_t moddata; |
|
2980 |
||
2981 |
#define HEAPTEXT_ARENA(addr) \ |
|
2982 |
((uintptr_t)(addr) < KERNELBASE + 2 * MMU_PAGESIZE4M ? 0 : \ |
|
2983 |
(((uintptr_t)(addr) - HEAPTEXT_BASE) / \ |
|
2984 |
(HEAPTEXT_MAPPED + HEAPTEXT_UNMAPPED) + 1)) |
|
2985 |
||
2986 |
#define HEAPTEXT_OVERSIZED(addr) \ |
|
2987 |
((uintptr_t)(addr) >= HEAPTEXT_BASE + HEAPTEXT_SIZE - HEAPTEXT_OVERSIZE) |
|
2988 |
||
2989 |
vmem_t *texthole_source[HEAPTEXT_NARENAS]; |
|
2990 |
vmem_t *texthole_arena[HEAPTEXT_NARENAS]; |
|
2991 |
kmutex_t texthole_lock; |
|
2992 |
||
2993 |
char kern_bootargs[OBP_MAXPATHLEN]; |
|
2994 |
||
2995 |
void |
|
2996 |
kobj_vmem_init(vmem_t **text_arena, vmem_t **data_arena) |
|
2997 |
{ |
|
2998 |
uintptr_t addr, limit; |
|
2999 |
||
3000 |
addr = HEAPTEXT_BASE; |
|
3001 |
limit = addr + HEAPTEXT_SIZE - HEAPTEXT_OVERSIZE; |
|
3002 |
||
3003 |
/* |
|
3004 |
* Before we initialize the text_arena, we want to punch holes in the |
|
3005 |
* underlying heaptext_arena. This guarantees that for any text |
|
3006 |
* address we can find a text hole less than HEAPTEXT_MAPPED away. |
|
3007 |
*/ |
|
3008 |
for (; addr + HEAPTEXT_UNMAPPED <= limit; |
|
3009 |
addr += HEAPTEXT_MAPPED + HEAPTEXT_UNMAPPED) { |
|
3010 |
(void) vmem_xalloc(heaptext_arena, HEAPTEXT_UNMAPPED, PAGESIZE, |
|
3011 |
0, 0, (void *)addr, (void *)(addr + HEAPTEXT_UNMAPPED), |
|
3012 |
VM_NOSLEEP | VM_BESTFIT | VM_PANIC); |
|
3013 |
} |
|
3014 |
||
3015 |
/* |
|
3016 |
* Allocate one page at the oversize to break up the text region |
|
3017 |
* from the oversized region. |
|
3018 |
*/ |
|
3019 |
(void) vmem_xalloc(heaptext_arena, PAGESIZE, PAGESIZE, 0, 0, |
|
3020 |
(void *)limit, (void *)(limit + PAGESIZE), |
|
3021 |
VM_NOSLEEP | VM_BESTFIT | VM_PANIC); |
|
3022 |
||
3023 |
*text_arena = vmem_create("module_text", modtext, modtext_sz, |
|
3024 |
sizeof (uintptr_t), segkmem_alloc, segkmem_free, |
|
3025 |
heaptext_arena, 0, VM_SLEEP); |
|
3026 |
*data_arena = vmem_create("module_data", moddata, MODDATA, 1, |
|
3027 |
segkmem_alloc, segkmem_free, heap32_arena, 0, VM_SLEEP); |
|
3028 |
} |
|
3029 |
||
3030 |
caddr_t |
|
3031 |
kobj_text_alloc(vmem_t *arena, size_t size) |
|
3032 |
{ |
|
3033 |
caddr_t rval, better; |
|
3034 |
||
3035 |
/* |
|
3036 |
* First, try a sleeping allocation. |
|
3037 |
*/ |
|
3038 |
rval = vmem_alloc(arena, size, VM_SLEEP | VM_BESTFIT); |
|
3039 |
||
3040 |
if (size >= HEAPTEXT_MAPPED || !HEAPTEXT_OVERSIZED(rval)) |
|
3041 |
return (rval); |
|
3042 |
||
3043 |
/* |
|
3044 |
* We didn't get the area that we wanted. We're going to try to do an |
|
3045 |
* allocation with explicit constraints. |
|
3046 |
*/ |
|
3047 |
better = vmem_xalloc(arena, size, sizeof (uintptr_t), 0, 0, NULL, |
|
3048 |
(void *)(HEAPTEXT_BASE + HEAPTEXT_SIZE - HEAPTEXT_OVERSIZE), |
|
3049 |
VM_NOSLEEP | VM_BESTFIT); |
|
3050 |
||
3051 |
if (better != NULL) { |
|
3052 |
/* |
|
3053 |
* That worked. Free our first attempt and return. |
|
3054 |
*/ |
|
3055 |
vmem_free(arena, rval, size); |
|
3056 |
return (better); |
|
3057 |
} |
|
3058 |
||
3059 |
/* |
|
3060 |
* That didn't work; we'll have to return our first attempt. |
|
3061 |
*/ |
|
3062 |
return (rval); |
|
3063 |
} |
|
3064 |
||
3065 |
caddr_t |
|
3066 |
kobj_texthole_alloc(caddr_t addr, size_t size) |
|
3067 |
{ |
|
3068 |
int arena = HEAPTEXT_ARENA(addr); |
|
3069 |
char c[30]; |
|
3070 |
uintptr_t base; |
|
3071 |
||
3072 |
if (HEAPTEXT_OVERSIZED(addr)) { |
|
3073 |
/* |
|
3074 |
* If this is an oversized allocation, there is no text hole |
|
3075 |
* available for it; return NULL. |
|
3076 |
*/ |
|
3077 |
return (NULL); |
|
3078 |
} |
|
3079 |
||
3080 |
mutex_enter(&texthole_lock); |
|
3081 |
||
3082 |
if (texthole_arena[arena] == NULL) { |
|
3083 |
ASSERT(texthole_source[arena] == NULL); |
|
3084 |
||
3085 |
if (arena == 0) { |
|
3086 |
texthole_source[0] = vmem_create("module_text_holesrc", |
|
3087 |
(void *)(KERNELBASE + MMU_PAGESIZE4M), |
|
3088 |
MMU_PAGESIZE4M, PAGESIZE, NULL, NULL, NULL, |
|
3089 |
0, VM_SLEEP); |
|
3090 |
} else { |
|
3091 |
base = HEAPTEXT_BASE + |
|
3092 |
(arena - 1) * (HEAPTEXT_MAPPED + HEAPTEXT_UNMAPPED); |
|
3093 |
||
3094 |
(void) snprintf(c, sizeof (c), |
|
3095 |
"heaptext_holesrc_%d", arena); |
|
3096 |
||
3097 |
texthole_source[arena] = vmem_create(c, (void *)base, |
|
3098 |
HEAPTEXT_UNMAPPED, PAGESIZE, NULL, NULL, NULL, |
|
3099 |
0, VM_SLEEP); |
|
3100 |
} |
|
3101 |
||
3102 |
(void) snprintf(c, sizeof (c), "heaptext_hole_%d", arena); |
|
3103 |
||
3104 |
texthole_arena[arena] = vmem_create(c, NULL, 0, |
|
3105 |
sizeof (uint32_t), segkmem_alloc_permanent, segkmem_free, |
|
3106 |
texthole_source[arena], 0, VM_SLEEP); |
|
3107 |
} |
|
3108 |
||
3109 |
mutex_exit(&texthole_lock); |
|
3110 |
||
3111 |
ASSERT(texthole_arena[arena] != NULL); |
|
3112 |
ASSERT(arena >= 0 && arena < HEAPTEXT_NARENAS); |
|
3113 |
return (vmem_alloc(texthole_arena[arena], size, |
|
3114 |
VM_BESTFIT | VM_NOSLEEP)); |
|
3115 |
} |
|
3116 |
||
3117 |
void |
|
3118 |
kobj_texthole_free(caddr_t addr, size_t size) |
|
3119 |
{ |
|
3120 |
int arena = HEAPTEXT_ARENA(addr); |
|
3121 |
||
3122 |
ASSERT(arena >= 0 && arena < HEAPTEXT_NARENAS); |
|
3123 |
ASSERT(texthole_arena[arena] != NULL); |
|
3124 |
vmem_free(texthole_arena[arena], addr, size); |
|
3125 |
} |