2080 cpr doesn't even nearly work if built with gcc
authorRichard Lowe <richlowe@richlowe.net>
Sat, 17 Mar 2012 01:05:45 +0000
changeset 13646 8f5bd7d387bc
parent 13645 1cef92b663d9
child 13647 395e51435a78
2080 cpr doesn't even nearly work if built with gcc 2425 don't pretend to use Sun as on amd64 Reviewed by: Garrett D'Amore <[email protected]> Reviewed by: Joshua M. Clulow <[email protected]> Reviewed by: Albert Lee <[email protected]> Approved by: Gordon Ross <[email protected]>
usr/src/uts/i86pc/ml/cpr_wakecode.s
usr/src/uts/i86pc/ml/mpcore.s
--- a/usr/src/uts/i86pc/ml/cpr_wakecode.s	Wed Jun 22 15:50:16 2011 -0700
+++ b/usr/src/uts/i86pc/ml/cpr_wakecode.s	Sat Mar 17 01:05:45 2012 +0000
@@ -84,12 +84,6 @@
 
 #else	/* lint */
 
-#if defined(__GNU_AS__)
-
-	NOTHING AT ALL YET!
-
-#else	/* !defined(__GNU_AS__) */
-
 #if defined(__amd64)
 
 	ENTRY_NP(wc_save_context)
@@ -230,8 +224,6 @@
 
 #endif	/* __amd64 */
 
-#endif	/* __GNU_AS__ */
-
 #endif /* lint */
 
 
@@ -263,20 +255,18 @@
 
 #else	/* lint */
 
-#if defined(__GNU_AS__)
-
-	NOTHING AT ALL YET!
-
-#else	/* __GNU_AS__ */
-
 #if defined(__amd64)
 
 	ENTRY_NP(wc_rm_start)
 
 	/*
-	 * For vulcan as we need to do a .code32 and mentally invert the
-	 * meaning of the addr16 and data16 prefixes to get 32-bit access when
-	 * generating code to be executed in 16-bit mode (sigh...)
+	 * For the Sun Studio 10 assembler we needed to do a .code32 and
+	 * mentally invert the meaning of the addr16 and data16 prefixes to
+	 * get 32-bit access when generating code to be executed in 16-bit
+	 * mode (sigh...)
+	 *
+	 * This code, despite always being built with GNU as, has inherited
+	 * the conceptual damage.
 	 */
 
 	.code32
@@ -309,7 +299,7 @@
 	 * %cr0 has already been initialsed to zero
 	 */
 	movl		%cr0, %eax
-	D16 orl		$[CR0_PE|CR0_WP|CR0_AM], %eax
+	D16 orl		$_CONST(CR0_PE|CR0_WP|CR0_AM), %eax
 	movl		%eax, %cr0
 
 	/*
@@ -494,45 +484,45 @@
  */
 
 / select COM1
-	D16 movl	$[COM1+LCR], %edx
+	D16 movl	$_CONST(COM1+LCR), %edx
 	D16 movb	$DLAB, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM1+DLL], %edx	/ divisor latch lsb
+	D16 movl	$_CONST(COM1+DLL), %edx	/ divisor latch lsb
 	D16 movb	$B9600L, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM1+DLH], %edx	/ divisor latch hsb
+	D16 movl	$_CONST(COM1+DLH), %edx	/ divisor latch hsb
 	D16 movb	$B9600H, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM1+LCR], %edx	/ select COM1
-	D16 movb	$[STOP1|BITS8], %al	/ 1 stop bit, 8bit word len
+	D16 movl	$_CONST(COM1+LCR), %edx	/ select COM1
+	D16 movb	$_CONST(STOP1|BITS8), %al	/ 1 stop bit, 8bit word len
 	outb	(%dx)
 
-	D16 movl	$[COM1+MCR], %edx	/ select COM1
-	D16 movb	$[RTS|DTR], %al		/ data term ready & req to send
+	D16 movl	$_CONST(COM1+MCR), %edx	/ select COM1
+	D16 movb	$_CONST(RTS|DTR), %al		/ data term ready & req to send
 	outb	(%dx)
 
 / select COM2
-	D16 movl	$[COM2+LCR], %edx
+	D16 movl	$_CONST(COM2+LCR), %edx
 	D16 movb	$DLAB, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM2+DLL], %edx	/ divisor latch lsb
+	D16 movl	$_CONST(COM2+DLL), %edx	/ divisor latch lsb
 	D16 movb	$B9600L, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM2+DLH], %edx	/ divisor latch hsb
+	D16 movl	$_CONST(COM2+DLH), %edx	/ divisor latch hsb
 	D16 movb	$B9600H, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM2+LCR], %edx	/ select COM1
-	D16 movb	$[STOP1|BITS8], %al	/ 1 stop bit, 8bit word len
+	D16 movl	$_CONST(COM2+LCR), %edx	/ select COM1
+	D16 movb	$_CONST(STOP1|BITS8), %al	/ 1 stop bit, 8bit word len
 	outb	(%dx)
 
-	D16 movl	$[COM2+MCR], %edx	/ select COM1
-	D16 movb	$[RTS|DTR], %al		/ data term ready & req to send
+	D16 movl	$_CONST(COM2+MCR), %edx	/ select COM1
+	D16 movb	$_CONST(RTS|DTR), %al		/ data term ready & req to send
 	outb	(%dx)
 #endif	/*	DEBUG	*/
 
@@ -949,7 +939,7 @@
 #endif
 
 	D16 A16 movl	%cs:WC_CR4(%ebx), %eax	/ restore cr4
-	D16 andl	$-1!CR4_PGE, %eax	/ don't set Global Enable yet
+	D16 andl	$_BITNOT(CR4_PGE), %eax / don't set Global Enable yet
 	movl		%eax, %cr4
 
 #if     LED
@@ -1015,45 +1005,45 @@
  */
 
 / select COM1
-	D16 movl	$[COM1+LCR], %edx
+	D16 movl	$_CONST(COM1+LCR), %edx
 	D16 movb	$DLAB, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM1+DLL], %edx	/ divisor latch lsb
+	D16 movl	$_CONST(COM1+DLL), %edx	/ divisor latch lsb
 	D16 movb	$B9600L, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM1+DLH], %edx	/ divisor latch hsb
+	D16 movl	$_CONST(COM1+DLH), %edx	/ divisor latch hsb
 	D16 movb	$B9600H, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM1+LCR], %edx	/ select COM1
-	D16 movb	$[STOP1|BITS8], %al	/ 1 stop bit, 8bit word len
+	D16 movl	$_CONST(COM1+LCR), %edx	/ select COM1
+	D16 movb	$_CONST(STOP1|BITS8), %al	/ 1 stop bit, 8bit word len
 	outb	(%dx)
 
-	D16 movl	$[COM1+MCR], %edx	/ select COM1
-	D16 movb	$[RTS|DTR], %al		/ 1 stop bit, 8bit word len
+	D16 movl	$_CONST(COM1+MCR), %edx	/ select COM1
+	D16 movb	$_CONST(RTS|DTR), %al		/ 1 stop bit, 8bit word len
 	outb	(%dx)
 
 / select COM2
-	D16 movl	$[COM2+LCR], %edx
+	D16 movl	$_CONST(COM2+LCR), %edx
 	D16 movb	$DLAB, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM2+DLL], %edx	/ divisor latch lsb
+	D16 movl	$_CONST(COM2+DLL), %edx	/ divisor latch lsb
 	D16 movb	$B9600L, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM2+DLH], %edx	/ divisor latch hsb
+	D16 movl	$_CONST(COM2+DLH), %edx	/ divisor latch hsb
 	D16 movb	$B9600H, %al		/ divisor latch
 	outb	(%dx)
 
-	D16 movl	$[COM2+LCR], %edx	/ select COM1
-	D16 movb	$[STOP1|BITS8], %al	/ 1 stop bit, 8bit word len
+	D16 movl	$_CONST(COM2+LCR), %edx	/ select COM1
+	D16 movb	$_CONST(STOP1|BITS8), %al	/ 1 stop bit, 8bit word len
 	outb	(%dx)
 
-	D16 movl	$[COM2+MCR], %edx	/ select COM1
-	D16 movb	$[RTS|DTR], %al		/ 1 stop bit, 8bit word len
+	D16 movl	$_CONST(COM2+MCR), %edx	/ select COM1
+	D16 movb	$_CONST(RTS|DTR), %al		/ 1 stop bit, 8bit word len
 	outb	(%dx)
 #endif	/*	DEBUG	*/
 
@@ -1106,7 +1096,7 @@
 
 	movzwl	WC_TR(%ebx), %eax	/ clear TSS busy bit
 	addl	WC_GDT+2(%ebx), %eax
-	andl	$-1!0x200, 4(%eax)
+	andl	$_BITNOT(0x200), 4(%eax)
 	ltr	WC_TR(%ebx)		/ $UTSS_SEL
 
 	movw	WC_SS(%ebx), %ss	/ restore segment registers
@@ -1172,7 +1162,5 @@
 
 #endif	/* defined(__amd64) */
 
-#endif	/* !defined(__GNU_AS__) */
-
 #endif /* lint */
 
--- a/usr/src/uts/i86pc/ml/mpcore.s	Wed Jun 22 15:50:16 2011 -0700
+++ b/usr/src/uts/i86pc/ml/mpcore.s	Sat Mar 17 01:05:45 2012 +0000
@@ -78,260 +78,6 @@
 
 	ENTRY_NP(real_mode_start_cpu)
 
-#if !defined(__GNUC_AS__)
-
-	/*
-	 * For vulcan as we need to do a .code32 and mentally invert the
-	 * meaning of the addr16 and data16 prefixes to get 32-bit access when
-	 * generating code to be executed in 16-bit mode (sigh...)
-	 */
-	.code32
-	cli
-	movw		%cs, %ax
-	movw		%ax, %ds	/* load cs into ds */
-	movw		%ax, %ss	/* and into ss */
-
-	/*
-	 * Helps in debugging by giving us the fault address.
-	 *
-	 * Remember to patch a hlt (0xf4) at cmntrap to get a good stack.
-	 */
-	D16 movl	$0xffc, %esp
-	movl		%cr0, %eax
-
-	/*
-	 * Enable protected-mode, write protect, and alignment mask
-	 */
-	D16 orl		$[CR0_PE|CR0_WP|CR0_AM], %eax
-	movl		%eax, %cr0
-
-	/*
-	 * Do a jmp immediately after writing to cr0 when enabling protected
-	 * mode to clear the real mode prefetch queue (per Intel's docs)
-	 */
-	jmp		pestart
-
-pestart:
-	/*
- 	 * 16-bit protected mode is now active, so prepare to turn on long
-	 * mode.
-	 *
-	 * Note that we currently assume that if we're attempting to run a
-	 * kernel compiled with (__amd64) #defined, the target CPU has long
-	 * mode support.
-	 */
-
-#if 0
-	/*
-	 * If there's a chance this might not be true, the following test should
-	 * be done, with the no_long_mode branch then doing something
-	 * appropriate:
-	 */
-
-	D16 movl	$0x80000000, %eax	/* get largest extended CPUID */
-	cpuid
-	D16 cmpl	$0x80000000, %eax	/* check if > 0x80000000 */
-	jbe		no_long_mode		/* nope, no long mode */
-	D16 movl	$0x80000001, %eax	
-	cpuid					/* get extended feature flags */
-	btl		$29, %edx		/* check for long mode */
-	jnc		no_long_mode		/* long mode not supported */
-#endif
-
-	/*
- 	 * Add any initial cr4 bits
-	 */
-	movl		%cr4, %eax
-	A16 D16 orl	CR4OFF, %eax
-
-	/*
-	 * Enable PAE mode (CR4.PAE)
-	 */
-	D16 orl		$CR4_PAE, %eax
-	movl		%eax, %cr4
-
-	/*
-	 * Point cr3 to the 64-bit long mode page tables.
-	 *
-	 * Note that these MUST exist in 32-bit space, as we don't have
-	 * a way to load %cr3 with a 64-bit base address for the page tables
-	 * until the CPU is actually executing in 64-bit long mode.
-	 */
-	A16 D16 movl	CR3OFF, %eax
-	movl		%eax, %cr3
-
-	/*
-	 * Set long mode enable in EFER (EFER.LME = 1)
-	 */
-	D16 movl	$MSR_AMD_EFER, %ecx
-	rdmsr
-	D16 orl		$AMD_EFER_LME, %eax
-	wrmsr
-
-	/*
-	 * Finally, turn on paging (CR0.PG = 1) to activate long mode.
-	 */
-	movl		%cr0, %eax
-	D16 orl		$CR0_PG, %eax
-	movl		%eax, %cr0
-
-	/*
-	 * The instruction after enabling paging in CR0 MUST be a branch.
-	 */
-	jmp		long_mode_active
-
-long_mode_active:
-	/*
-	 * Long mode is now active but since we're still running with the
-	 * original 16-bit CS we're actually in 16-bit compatability mode.
-	 *
-	 * We have to load an intermediate GDT and IDT here that we know are
-	 * in 32-bit space before we can use the kernel's GDT and IDT, which
-	 * may be in the 64-bit address space, and since we're in compatability
-	 * mode, we only have access to 16 and 32-bit instructions at the
-	 * moment.
-	 */
-	A16 D16 lgdt	TEMPGDTOFF	/* load temporary GDT */
-	A16 D16 lidt	TEMPIDTOFF	/* load temporary IDT */
-
-	/*
- 	 * Do a far transfer to 64-bit mode.  Set the CS selector to a 64-bit
-	 * long mode selector (CS.L=1) in the temporary 32-bit GDT and jump
-	 * to the real mode platter address of long_mode 64 as until the 64-bit
-	 * CS is in place we don't have access to 64-bit instructions and thus
-	 * can't reference a 64-bit %rip.
-	 */
-	D16 	pushl 	$TEMP_CS64_SEL
-	A16 D16 pushl	LM64OFF
-	D16 lret
-
-	.globl	long_mode_64
-long_mode_64:
-	.code64
-	/*
-	 * We are now running in long mode with a 64-bit CS (EFER.LMA=1,
-	 * CS.L=1) so we now have access to 64-bit instructions.
-	 *
-	 * First, set the 64-bit GDT base.
-	 */
-	.globl	rm_platter_pa
-	movl	rm_platter_pa, %eax
-
-	lgdtq	GDTROFF(%rax)		/* load 64-bit GDT */
-
-	/*
-	 * Save the CPU number in %r11; get the value here since it's saved in
-	 * the real mode platter.
-	 */
-	movl	CPUNOFF(%rax), %r11d
-
-	/*
-	 * Add rm_platter_pa to %rsp to point it to the same location as seen
-	 * from 64-bit mode.
-	 */
-	addq	%rax, %rsp
-
-	/*
-	 * Now do an lretq to load CS with the appropriate selector for the
-	 * kernel's 64-bit GDT and to start executing 64-bit setup code at the
-	 * virtual address where boot originally loaded this code rather than
-	 * the copy in the real mode platter's rm_code array as we've been
-	 * doing so far.
-	 */
-	pushq	$KCS_SEL
-	pushq	$kernel_cs_code
-	lretq
-	.globl real_mode_start_cpu_end
-real_mode_start_cpu_end:
-	nop
-
-kernel_cs_code:
-	/*
-	 * Complete the balance of the setup we need to before executing
-	 * 64-bit kernel code (namely init rsp, TSS, LGDT, FS and GS).
-	 */
-	.globl	rm_platter_va
-	movq	rm_platter_va, %rax
-
-	lidtq	IDTROFF(%rax)
-
-	movw	$KDS_SEL, %ax
-	movw	%ax, %ds
-	movw	%ax, %es
-	movw	%ax, %ss
-
-	movw	$KTSS_SEL, %ax		/* setup kernel TSS */
-	ltr	%ax
-
-	xorw	%ax, %ax		/* clear LDTR */
-	lldt	%ax
-
-	/*
-	 * Set GS to the address of the per-cpu structure as contained in
-	 * cpu[cpu_number].
-	 *
-	 * Unfortunately there's no way to set the 64-bit gsbase with a mov,
-	 * so we have to stuff the low 32 bits in %eax and the high 32 bits in
-	 * %edx, then call wrmsr.
-	 */
-	leaq	cpu(%rip), %rdi
-	movl	(%rdi, %r11, 8), %eax
-	movl	4(%rdi, %r11, 8), %edx
-	movl	$MSR_AMD_GSBASE, %ecx
-	wrmsr
-
-	/*
-	 * Init FS and KernelGSBase.
-	 *
-	 * Based on code in mlsetup(), set them both to 8G (which shouldn't be
-	 * valid until some 64-bit processes run); this will then cause an
-	 * exception in any code that tries to index off them before they are
-	 * properly setup.
-	 */
-	xorl	%eax, %eax		/* low 32 bits = 0 */
-	movl	$2, %edx		/* high 32 bits = 2 */
-	movl	$MSR_AMD_FSBASE, %ecx
-	wrmsr
-
-	movl	$MSR_AMD_KGSBASE, %ecx
-	wrmsr
-
-	/*
-	 * Init %rsp to the exception stack set in tss_ist1 and create a legal
-	 * AMD64 ABI stack frame
-	 */
-	movq	%gs:CPU_TSS, %rax
-	movq	TSS_IST1(%rax), %rsp
-	pushq	$0		/* null return address */
-	pushq	$0		/* null frame pointer terminates stack trace */
-	movq	%rsp, %rbp	/* stack aligned on 16-byte boundary */
-
-	movq	%cr0, %rax
-	andq    $-1![CR0_TS|CR0_EM], %rax	/* clr emulate math chip bit */
-	orq     $[CR0_MP|CR0_NE], %rax
-	movq    %rax, %cr0			/* set machine status word */
-
-	/*
-	 * Before going any further, enable usage of page table NX bit if 
-	 * that's how our page tables are set up.
-	 */
-	bt	$X86FSET_NX, x86_featureset(%rip)
-	jnc	1f
-	movl	$MSR_AMD_EFER, %ecx
-	rdmsr
-	orl	$AMD_EFER_NXE, %eax
-	wrmsr
-1:
-
-	/*
-	 * Complete the rest of the setup and call mp_startup().
-	 */
-	movq	%gs:CPU_THREAD, %rax	/* get thread ptr */
-	call	*T_PC(%rax)		/* call mp_startup */
-	/* not reached */
-	int	$20			/* whoops, returned somehow! */
-#else	/* __GNUC_AS__ */
-
 	/*
 	 * NOTE:  The GNU assembler automatically does the right thing to
 	 *	  generate data size operand prefixes based on the code size
@@ -583,7 +329,6 @@
 	call	*T_PC(%rax)		/* call mp_startup */
 	/* not reached */
 	int	$20			/* whoops, returned somehow! */
-#endif	/* !__GNUC_AS__ */
 
 	SET_SIZE(real_mode_start_cpu)