PSARC/2007/307 Victoria Falls CPU/memory FMA
6532872 Incorrect fault name reported for store buffer
6536478 anchored page retire for T5140/T5240
6536482 diagnose FBR and FBU errors to branch
6545057 on T5140/T5240, diagnose mem UE as L2 cache data UE if C2C bit is set
6545604 Enhance CPU/Mem DE to support T2plus
6545632 add US-T2plus support to CPU/Mem error injector
--- a/usr/src/cmd/fm/dicts/SUN4V.dict Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/dicts/SUN4V.dict Mon Jul 30 12:41:05 2007 -0700
@@ -66,3 +66,19 @@
fault.io.n2.crossbar=36
fault.io.fire.fw-epkt fault.io.fire.sw-epkt fault.io.fire.sw-fw-mismatch=37
fault.io.vf.ncx=38
+fault.memory.link-f=39
+fault.cpu.ultraSPARC-T2plus.ireg=40
+fault.cpu.ultraSPARC-T2plus.freg=41
+fault.cpu.ultraSPARC-T2plus.misc_reg=42
+fault.cpu.ultraSPARC-T2plus.itlb=43
+fault.cpu.ultraSPARC-T2plus.dtlb=44
+fault.cpu.ultraSPARC-T2plus.icache=45
+fault.cpu.ultraSPARC-T2plus.dcache=46
+fault.cpu.ultraSPARC-T2plus.mau=47
+fault.cpu.ultraSPARC-T2plus.l2data-c=48
+fault.cpu.ultraSPARC-T2plus.l2cachetag=49
+fault.cpu.ultraSPARC-T2plus.l2cachectl=50
+fault.cpu.ultraSPARC-T2plus.l2data-u=51
+fault.cpu.ultraSPARC-T2plus.lfu-f=52
+fault.cpu.ultraSPARC-T2plus.lfu-p=53
+fault.cpu.ultraSPARC-T2plus.lfu-u=54
--- a/usr/src/cmd/fm/dicts/SUN4V.po Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/dicts/SUN4V.po Mon Jul 30 12:41:05 2007 -0700
@@ -633,3 +633,259 @@
msgstr "Loss of services provided by the device\ninstances associated with this fault\n"
msgid "SUN4V-8001-64.action"
msgstr "Schedule a repair procedure to replace the affected\ndevice if necessary, or contact Sun for support.\n"
+#
+# code: SUN4V-8001-7R
+# keys: fault.memory.link-f
+#
+msgid "SUN4V-8001-7R.type"
+msgstr "Fault"
+msgid "SUN4V-8001-7R.severity"
+msgstr "Major"
+msgid "SUN4V-8001-7R.description"
+msgstr "A problem was detected in the interconnect between a memory DIMM module and\nits memory controller. A lane failover has taken place.\n Refer to %s for more information."
+msgid "SUN4V-8001-7R.response"
+msgstr "No automated response.\n"
+msgid "SUN4V-8001-7R.impact"
+msgstr "System performance may be impacted.\n"
+msgid "SUN4V-8001-7R.action"
+msgstr "At convenient time, try reseating the memory module(s). If problem persists,\ncontact Sun to schedule part replacement.\n"
+#
+# code: SUN4V-8001-8H
+# keys: fault.cpu.ultraSPARC-T2plus.ireg
+#
+msgid "SUN4V-8001-8H.type"
+msgstr "Fault"
+msgid "SUN4V-8001-8H.severity"
+msgstr "Minor"
+msgid "SUN4V-8001-8H.description"
+msgstr "The number of integer register errors associated with this thread has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-8H.response"
+msgstr "The fault manager will attempt to remove the affected thread\nfrom service.\n"
+msgid "SUN4V-8001-8H.impact"
+msgstr "System performance may be affected. \n"
+msgid "SUN4V-8001-8H.action"
+msgstr "Schedule a repair procedure to replace the affected CPU, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-9D
+# keys: fault.cpu.ultraSPARC-T2plus.freg
+#
+msgid "SUN4V-8001-9D.type"
+msgstr "Fault"
+msgid "SUN4V-8001-9D.severity"
+msgstr "Minor"
+msgid "SUN4V-8001-9D.description"
+msgstr "The number of floating register errors associated with this thread has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-9D.response"
+msgstr "The fault manager will attempt to remove the affected thread\nfrom service.\n"
+msgid "SUN4V-8001-9D.impact"
+msgstr "System performance may be affected. \n"
+msgid "SUN4V-8001-9D.action"
+msgstr "Schedule a repair procedure to replace the affected CPU, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-AY
+# keys: fault.cpu.ultraSPARC-T2plus.misc_reg
+#
+msgid "SUN4V-8001-AY.type"
+msgstr "Fault"
+msgid "SUN4V-8001-AY.severity"
+msgstr "Minor"
+msgid "SUN4V-8001-AY.description"
+msgstr "The number of ancillary register errors associated with this thread has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-AY.response"
+msgstr "The fault manager will attempt to remove the affected thread\nfrom service.\n"
+msgid "SUN4V-8001-AY.impact"
+msgstr "System performance may be affected. \n"
+msgid "SUN4V-8001-AY.action"
+msgstr "Schedule a repair procedure to replace the affected CPU, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-C3
+# keys: fault.cpu.ultraSPARC-T2plus.itlb
+#
+msgid "SUN4V-8001-C3.type"
+msgstr "Fault"
+msgid "SUN4V-8001-C3.severity"
+msgstr "Major"
+msgid "SUN4V-8001-C3.description"
+msgstr "The number of ITLB errors associated with this thread has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-C3.response"
+msgstr "The fault manager will attempt to remove all threads associated with\nthis resource from service.\n"
+msgid "SUN4V-8001-C3.impact"
+msgstr "System performance is likely to be affected. \n"
+msgid "SUN4V-8001-C3.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-DS
+# keys: fault.cpu.ultraSPARC-T2plus.dtlb
+#
+msgid "SUN4V-8001-DS.type"
+msgstr "Fault"
+msgid "SUN4V-8001-DS.severity"
+msgstr "Major"
+msgid "SUN4V-8001-DS.description"
+msgstr "The number of DTLB errors associated with this thread has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-DS.response"
+msgstr "The fault manager will attempt to remove all threads associated with\nthis resource from service.\n"
+msgid "SUN4V-8001-DS.impact"
+msgstr "System performance is likely to be affected. \n"
+msgid "SUN4V-8001-DS.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-E5
+# keys: fault.cpu.ultraSPARC-T2plus.icache
+#
+msgid "SUN4V-8001-E5.type"
+msgstr "Fault"
+msgid "SUN4V-8001-E5.severity"
+msgstr "Major"
+msgid "SUN4V-8001-E5.description"
+msgstr "The number of I-cache errors associated with this thread has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-E5.response"
+msgstr "The fault manager will attempt to remove all threads associated with\nthis resource from service.\n"
+msgid "SUN4V-8001-E5.impact"
+msgstr "System performance is likely to be affected. \n"
+msgid "SUN4V-8001-E5.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-FP
+# keys: fault.cpu.ultraSPARC-T2plus.dcache
+#
+msgid "SUN4V-8001-FP.type"
+msgstr "Fault"
+msgid "SUN4V-8001-FP.severity"
+msgstr "Major"
+msgid "SUN4V-8001-FP.description"
+msgstr "The number of D-cache errors associated with this thread has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-FP.response"
+msgstr "The fault manager will attempt to remove all threads associated with\nthis resource from service.\n"
+msgid "SUN4V-8001-FP.impact"
+msgstr "System performance is likely to be affected. \n"
+msgid "SUN4V-8001-FP.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-GA
+# keys: fault.cpu.ultraSPARC-T2plus.mau
+#
+msgid "SUN4V-8001-GA.type"
+msgstr "Fault"
+msgid "SUN4V-8001-GA.severity"
+msgstr "Major"
+msgid "SUN4V-8001-GA.description"
+msgstr "The number of modular arithmetic unit errors associated with this thread has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-GA.response"
+msgstr "Cryptographic software will not use this modular arithmetic unit.\n\n"
+msgid "SUN4V-8001-GA.impact"
+msgstr "System performance is likely to be affected. \n"
+msgid "SUN4V-8001-GA.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-HJ
+# keys: fault.cpu.ultraSPARC-T2plus.l2data-c
+#
+msgid "SUN4V-8001-HJ.type"
+msgstr "Fault"
+msgid "SUN4V-8001-HJ.severity"
+msgstr "Critical"
+msgid "SUN4V-8001-HJ.description"
+msgstr "The number of level 2 cache correctable data errors has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-HJ.response"
+msgstr "No automated response.\n"
+msgid "SUN4V-8001-HJ.impact"
+msgstr "System performance is likely to be affected. \n"
+msgid "SUN4V-8001-HJ.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-JE
+# keys: fault.cpu.ultraSPARC-T2plus.l2cachetag
+#
+msgid "SUN4V-8001-JE.type"
+msgstr "Fault"
+msgid "SUN4V-8001-JE.severity"
+msgstr "Critical"
+msgid "SUN4V-8001-JE.description"
+msgstr "The number of level 2 cache tag errors has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-JE.response"
+msgstr "No automated response.\n"
+msgid "SUN4V-8001-JE.impact"
+msgstr "System performance is likely to be affected. \n"
+msgid "SUN4V-8001-JE.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-KX
+# keys: fault.cpu.ultraSPARC-T2plus.l2cachectl
+#
+msgid "SUN4V-8001-KX.type"
+msgstr "Fault"
+msgid "SUN4V-8001-KX.severity"
+msgstr "Critical"
+msgid "SUN4V-8001-KX.description"
+msgstr "The number of level 2 cache control errors has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-KX.response"
+msgstr "No automated response.\n"
+msgid "SUN4V-8001-KX.impact"
+msgstr "System performance is likely to be affected. System may be unstable.\n"
+msgid "SUN4V-8001-KX.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-L2
+# keys: fault.cpu.ultraSPARC-T2plus.l2data-u
+#
+msgid "SUN4V-8001-L2.type"
+msgstr "Fault"
+msgid "SUN4V-8001-L2.severity"
+msgstr "Critical"
+msgid "SUN4V-8001-L2.description"
+msgstr "The number of level 2 cache uncorrectable data errors has exceeded acceptable levels.\n Refer to %s for more information."
+msgid "SUN4V-8001-L2.response"
+msgstr "No automated response.\n"
+msgid "SUN4V-8001-L2.impact"
+msgstr "System performance is likely to be affected. \n"
+msgid "SUN4V-8001-L2.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-MR
+# keys: fault.cpu.ultraSPARC-T2plus.lfu-f
+#
+msgid "SUN4V-8001-MR.type"
+msgstr "Fault"
+msgid "SUN4V-8001-MR.severity"
+msgstr "Major"
+msgid "SUN4V-8001-MR.description"
+msgstr "A CPU chip's Link Framing Unit has stopped using a bad lane.\n Refer to %s for more information."
+msgid "SUN4V-8001-MR.response"
+msgstr "No other automated response.\n"
+msgid "SUN4V-8001-MR.impact"
+msgstr "The system's capacity to correct transmission errors between CPU chips has been reduced.\n"
+msgid "SUN4V-8001-MR.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-N4
+# keys: fault.cpu.ultraSPARC-T2plus.lfu-p
+#
+msgid "SUN4V-8001-N4.type"
+msgstr "Fault"
+msgid "SUN4V-8001-N4.severity"
+msgstr "Major"
+msgid "SUN4V-8001-N4.description"
+msgstr "A CPU chip's Link Framing Unit has encountered a protocol error.\n Refer to %s for more information."
+msgid "SUN4V-8001-N4.response"
+msgstr "No automated response.\n"
+msgid "SUN4V-8001-N4.impact"
+msgstr "The system has most likely taken a fatal reset.\n"
+msgid "SUN4V-8001-N4.action"
+msgstr "Schedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
+#
+# code: SUN4V-8001-PQ
+# keys: fault.cpu.ultraSPARC-T2plus.lfu-u
+#
+msgid "SUN4V-8001-PQ.type"
+msgstr "Fault"
+msgid "SUN4V-8001-PQ.severity"
+msgstr "Major"
+msgid "SUN4V-8001-PQ.description"
+msgstr "A CPU chip's Link Framing Unit has encountered an unrecoverable lane failure.\n Refer to %s for more information."
+msgid "SUN4V-8001-PQ.response"
+msgstr "No automated response.\n"
+msgid "SUN4V-8001-PQ.impact"
+msgstr "The system's integrity is seriously compromised.\n"
+msgid "SUN4V-8001-PQ.action"
+msgstr "Do not rely on this system for mission-critical tasks.\nSchedule a repair procedure to replace the affected resource, the identity of which can be determined using fmdump -v -u <EVENT_ID>.\n"
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd.h Mon Jul 30 12:41:05 2007 -0700
@@ -158,6 +158,9 @@
fmd_stat_t dp_ignored_ue; /* # of UEs ignored due to DP fault */
fmd_stat_t dp_deferred_ue; /* # of UEs deferred due to DP error */
#endif
+#ifdef sun4v
+ fmd_stat_t branch_creat; /* # of branch state structs created */
+#endif
} cmd_stat_t;
typedef struct cmd_serd {
@@ -195,6 +198,9 @@
#ifdef sun4u
uint16_t cmd_dp_flag; /* datapath error in progress if set */
#endif
+#ifdef sun4v
+ cmd_list_t cmd_branches; /* List of branches state structures */
+#endif
} cmd_t;
extern cmd_t cmd;
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.c Mon Jul 30 12:41:05 2007 -0700
@@ -70,7 +70,8 @@
"ultraSPARC-IIIiplus",
"ultraSPARC-T1",
"SPARC64-VI",
- "ultraSPARC-T2"
+ "ultraSPARC-T2",
+ "ultraSPARC-T2plus"
};
/*
@@ -179,6 +180,7 @@
return;
}
case CPU_ULTRASPARC_T2:
+ case CPU_ULTRASPARC_T2plus:
switch (level) {
case CMD_CPU_LEVEL_CORE:
*cpuinit = core * UST2_CPUS_PER_CORE;
@@ -238,6 +240,7 @@
return (cpuid);
}
case CPU_ULTRASPARC_T2:
+ case CPU_ULTRASPARC_T2plus:
switch (level) {
case CMD_CPU_LEVEL_CORE:
return (cpuid/UST2_CPUS_PER_CORE);
@@ -2016,6 +2019,9 @@
cpu_case_restore(hdl, cpu, &cpu->cpu_misc_regs, cp,
"misc_regs");
break;
+ case CMD_PTR_CPU_LFU:
+ cpu_case_restore(hdl, cpu, &cpu->cpu_lfu, cp, "lfu");
+ break;
#ifdef sun4u
case CMD_PTR_CPU_INV_SFSR:
cpu_case_restore(hdl, cpu, &cpu->cpu_opl_invsfsr, cp,
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpu.h Mon Jul 30 12:41:05 2007 -0700
@@ -100,7 +100,8 @@
CPU_ULTRASPARC_IIIiplus,
CPU_ULTRASPARC_T1,
CPU_SPARC64_VI,
- CPU_ULTRASPARC_T2
+ CPU_ULTRASPARC_T2,
+ CPU_ULTRASPARC_T2plus
} cmd_cpu_type_t;
typedef struct cmd_cpu_cases {
@@ -122,6 +123,7 @@
/* Tick compare (TC) */
/* Store buffer (SBD) */
/* Trap stack array errors (TSA) */
+ cmd_case_t cpuc_lfu; /* Coherency link error (LFU) */
#ifdef sun4u
cmd_case_t cpuc_opl_invsfsr; /* Olympus-C cpu inv-sfsr errors */
cmd_case_t cpuc_oplue_detcpu; /* Olympus-C cpu det. ue (eid=CPU) */
@@ -424,6 +426,7 @@
#define cpu_mau cpu_cases.cpuc_mau
#define cpu_l2ctl cpu_cases.cpuc_l2ctl
#define cpu_misc_regs cpu_cases.cpuc_misc_regs
+#define cpu_lfu cpu_cases.cpuc_lfu
#ifdef sun4u
#define cpu_opl_invsfsr cpu_cases.cpuc_opl_invsfsr
#define cpu_oplue_detcpu cpu_cases.cpuc_oplue_detcpu
@@ -656,6 +659,37 @@
const char *, cmd_errcl_t);
/*
+ * Type Fault
+ * ---------------------------------------------------------------------
+ * LFU-RTF uncorrectable link retrain fail error fault.cpu.T2plus.lfu-u
+ * LFU-TTO uncorrectable training timeout error
+ * LFU-CTO uncorrectable config timeout error
+ * LFU-MLF uncorrectable multi lanes link fail error
+ * LFU-SLF correctable single lane failover fault.cpu.T2plus.lfu-f
+ *
+ * The expected resolution of lfu faults is the repair of the indicated CPU.
+ */
+extern cmd_evdisp_t cmd_lfu_ue(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
+ const char *, cmd_errcl_t);
+extern cmd_evdisp_t cmd_lfu_ce(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
+ const char *, cmd_errcl_t);
+/*
+ * Type Fault
+ * ---------------------------------------------------------------------
+ * Coherency link protocol errors
+ * to Transaction timed out fault.cpu.T2plus.lfu-p
+ * frack Invalid or redundant request ack
+ * fsr Invalid or redundant snoop response
+ * fdr Invalid or redundant data return
+ * snptyp Invalid snoop type received from
+ * coherency link
+ *
+ * The expected resolution of lfu faults is the repair of the indicated CPU.
+ */
+extern cmd_evdisp_t cmd_lfu_pe(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
+ const char *, cmd_errcl_t);
+
+/*
* CPUs are described by FMRIs. This routine will retrieve the CPU state
* structure (creating a new one if necessary) described by the detector
* FMRI in the passed ereport.
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_cpuerr.c Mon Jul 30 12:41:05 2007 -0700
@@ -128,7 +128,7 @@
CMD_CPU_SIMPLEHANDLER(frc, freg, CMD_PTR_CPU_FREG, "freg", "freg")
CMD_CPU_SIMPLEHANDLER(mau, mau, CMD_PTR_CPU_MAU, "mau", "mau")
CMD_CPU_SIMPLEHANDLER(miscregs_ce, misc_regs, CMD_PTR_CPU_MISC_REGS,
- "misc_regs", "misc_regs")
+ "misc_regs", "misc_reg")
CMD_CPU_SIMPLEHANDLER(l2c, l2data, CMD_PTR_CPU_L2DATA, "l2data", "l2data-c")
CMD_CPU_SIMPLEHANDLER(fpu, fpu, CMD_PTR_CPU_FPU, "", "fpu")
@@ -136,8 +136,11 @@
CMD_CPU_SIMPLEHANDLER(iru, ireg, CMD_PTR_CPU_IREG, "", "ireg")
CMD_CPU_SIMPLEHANDLER(fru, freg, CMD_PTR_CPU_FREG, "", "freg")
CMD_CPU_SIMPLEHANDLER(miscregs_ue, misc_regs, CMD_PTR_CPU_MISC_REGS,
- "", "misc_regs")
+ "", "misc_reg")
CMD_CPU_SIMPLEHANDLER(l2u, l2data, CMD_PTR_CPU_L2DATA, "", "l2data-u")
+CMD_CPU_SIMPLEHANDLER(lfu_ue, lfu, CMD_PTR_CPU_LFU, "", "lfu-u")
+CMD_CPU_SIMPLEHANDLER(lfu_ce, lfu, CMD_PTR_CPU_LFU, "", "lfu-f")
+CMD_CPU_SIMPLEHANDLER(lfu_pe, lfu, CMD_PTR_CPU_LFU, "", "lfu-p")
#ifdef sun4u
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_dimm.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_dimm.c Mon Jul 30 12:41:05 2007 -0700
@@ -45,6 +45,7 @@
#include <sys/nvpair.h>
#ifdef sun4v
#include <cmd_hc_sun4v.h>
+#include <cmd_branch.h>
#endif /* sun4v */
/*
@@ -99,6 +100,9 @@
{
cmd_case_t *cc = &dimm->dimm_case;
+#ifdef sun4v
+ cmd_branch_t *branch;
+#endif
if (cc->cc_cp != NULL) {
cmd_case_fini(hdl, cc->cc_cp, destroy);
if (cc->cc_serdnm != NULL) {
@@ -111,6 +115,11 @@
if (dimm->dimm_bank != NULL)
cmd_bank_remove_dimm(hdl, dimm->dimm_bank, dimm);
+#ifdef sun4v
+ branch = cmd_branch_lookup_by_unum(hdl, dimm->dimm_unum);
+ if (branch != NULL)
+ cmd_branch_remove_dimm(hdl, branch, dimm);
+#endif
cmd_fmri_fini(hdl, &dimm->dimm_asru, destroy);
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_main.c Mon Jul 30 12:41:05 2007 -0700
@@ -308,8 +308,8 @@
CMD_CPU_LEVEL_CHIP },
{ "ereport.cpu.*.lvu", cmd_l2ctl },
{ "ereport.cpu.*.lru", cmd_l2ctl },
- { "ereport.cpu.*.fbr", cmd_nop },
- { "ereport.cpu.*.fbu", cmd_ue, CMD_ERRCL_DAU },
+ { "ereport.cpu.*.fbr", cmd_fb },
+ { "ereport.cpu.*.fbu", cmd_fb },
{ "ereport.cpu.*.dac", cmd_ce, CMD_ERRCL_DAC },
{ "ereport.cpu.*.dsc", cmd_ce, CMD_ERRCL_DSC },
{ "ereport.cpu.*.dau", cmd_ue, CMD_ERRCL_DAU },
@@ -342,6 +342,19 @@
CMD_ERRCL_SBDPC | CMD_CPU_LEVEL_THREAD },
{ "ereport.cpu.*.tsau", cmd_miscregs_ue,
CMD_CPU_LEVEL_THREAD },
+ { "ereport.cpu.*.cbce", cmd_xxc, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.dce", cmd_nop },
+ { "ereport.cpu.*.wbue", cmd_nop },
+ { "ereport.cpu.*.lfu-slf", cmd_lfu_ce, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.lfu-rtf", cmd_lfu_ue, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.lfu-tto", cmd_lfu_ue, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.lfu-cto", cmd_lfu_ue, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.lfu-mlf", cmd_lfu_ue, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.frack", cmd_lfu_pe, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.fsr", cmd_lfu_pe, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.fdr", cmd_lfu_pe, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.to", cmd_lfu_pe, CMD_CPU_LEVEL_CHIP },
+ { "ereport.cpu.*.snptyp", cmd_lfu_pe, CMD_CPU_LEVEL_CHIP },
#endif /* sun4u */
{ "ereport.cpu.*.fpu.hwcopy", cmd_fpu },
{ NULL, NULL }
@@ -456,6 +469,9 @@
{ "dp_deferred_ue", FMD_TYPE_UINT64,
"memory UEs deferred due to DP error" },
#endif
+#ifdef sun4v
+ { "branch_creat", FMD_TYPE_UINT64, "created new mem branch structure" },
+#endif
};
static const fmd_prop_t fmd_props[] = {
@@ -507,6 +523,10 @@
{ "thresh_abs_badrw", FMD_TYPE_UINT64, "128" },
{ "max_perm_ce_dimm", FMD_TYPE_UINT32, "128" },
{ "miscregs_trdelay", FMD_TYPE_TIME, "45s"},
+#ifdef sun4v
+ { "fbr_n", FMD_TYPE_UINT32, "14" },
+ { "fbr_t", FMD_TYPE_TIME, "30min"},
+#endif
{ NULL, 0, NULL }
};
@@ -588,6 +608,7 @@
fmd_hdl_subscribe(hdl, "ereport.cpu.ultraSPARC-IV.*");
fmd_hdl_subscribe(hdl, "ereport.cpu.ultraSPARC-IVplus.*");
fmd_hdl_subscribe(hdl, "ereport.cpu.ultraSPARC-T2.*");
+ fmd_hdl_subscribe(hdl, "ereport.cpu.ultraSPARC-T2plus.*");
fmd_hdl_subscribe(hdl, "ereport.cpu.ultraSPARC-T1.*");
fmd_hdl_subscribe(hdl, "ereport.io.tom.ecc.drce");
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.c Mon Jul 30 12:41:05 2007 -0700
@@ -36,6 +36,9 @@
#ifdef sun4u
#include <cmd_dp.h>
#endif
+#ifdef sun4v
+#include <cmd_branch.h>
+#endif
#include <errno.h>
#include <strings.h>
@@ -253,6 +256,9 @@
{
cmd_dimm_gc(hdl);
cmd_bank_gc(hdl);
+#ifdef sun4v
+ cmd_branch_gc(hdl);
+#endif
}
void
@@ -262,6 +268,9 @@
cmd_dimm_fini(hdl);
cmd_bank_fini(hdl);
+#ifdef sun4v
+ cmd_branch_fini(hdl);
+#endif
while ((rf = cmd_list_next(&cmd.cmd_iorxefrx)) != NULL)
cmd_iorxefrx_free(hdl, rf);
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_mem.h Mon Jul 30 12:41:05 2007 -0700
@@ -78,6 +78,9 @@
typedef struct cmd_dimm cmd_dimm_t;
typedef struct cmd_bank cmd_bank_t;
+#ifdef sun4v
+typedef struct cmd_branch cmd_branch_t;
+#endif
/*
* Correctable and Uncorrectable memory errors
@@ -129,6 +132,11 @@
uint16_t, uint8_t, cmd_cpu_t *);
extern void cmd_dimm_close(fmd_hdl_t *, void *);
extern void cmd_bank_close(fmd_hdl_t *, void *);
+#ifdef sun4v
+extern void cmd_branch_close(fmd_hdl_t *, void *);
+extern cmd_evdisp_t cmd_fb(fmd_hdl_t *, fmd_event_t *, nvlist_t *,
+ const char *, cmd_errcl_t);
+#endif
/*
* US-IIIi I/O, Remote and Foreign Read memory errors
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_state.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_state.c Mon Jul 30 12:41:05 2007 -0700
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -40,6 +40,9 @@
#endif
#include <cmd_bank.h>
#include <cmd.h>
+#ifdef sun4v
+#include <cmd_branch.h>
+#endif
#include <string.h>
#include <fm/fmd_api.h>
@@ -98,7 +101,13 @@
cmd_cpuerr_close, /* CMD_PTR_CPU_UGESR_DAE */
cmd_cpuerr_close, /* CMD_PTR_CPU_UGESR_IAE */
cmd_cpuerr_close, /* CMD_PTR_CPU_UGESR_UGE */
- cmd_cpuerr_close /* CMD_PTR_CPU_MISC_REGS */
+ cmd_cpuerr_close, /* CMD_PTR_CPU_MISC_REGS */
+ cmd_cpuerr_close, /* CMD_PTR_CPU_LFU */
+#ifdef sun4v
+ cmd_branch_close /* CMD_PTR_BRANCH_CASE */
+#else
+ NULL
+#endif
};
fmd_case_t *
@@ -171,6 +180,9 @@
#ifdef sun4u
cmd_dp_restore /* CMD_NT_DP */
#endif
+#ifdef sun4v
+ cmd_branch_restore /* CMD_NT_BRANCH */
+#endif
};
int
@@ -222,6 +234,9 @@
cmd_dp_validate(hdl);
#endif
cmd_page_validate(hdl);
+#ifdef sun4v
+ cmd_branch_validate(hdl);
+#endif
return (0);
}
--- a/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_state.h Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_state.h Mon Jul 30 12:41:05 2007 -0700
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -71,7 +71,12 @@
CMD_NT_DIMM,
CMD_NT_BANK,
CMD_NT_PAGE,
+#ifdef sun4u
CMD_NT_DP
+#endif
+#ifdef sun4v
+ CMD_NT_BRANCH
+#endif
} cmd_nodetype_t;
/*
@@ -123,7 +128,9 @@
CMD_PTR_CPU_UGESR_DAE,
CMD_PTR_CPU_UGESR_IAE,
CMD_PTR_CPU_UGESR_UGE,
- CMD_PTR_CPU_MISC_REGS
+ CMD_PTR_CPU_MISC_REGS,
+ CMD_PTR_CPU_LFU,
+ CMD_PTR_BRANCH_CASE
} cmd_ptrsubtype_t;
/*
--- a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/Makefile Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/Makefile Mon Jul 30 12:41:05 2007 -0700
@@ -26,7 +26,8 @@
ARCH = sun4v
-sun4v_SRCS = cmd_hc_sun4v.c
+sun4v_SRCS = cmd_hc_sun4v.c \
+ cmd_branch.c
INCDIRS = $(SRC)/uts/sun4v \
$(ROOT)/usr/platform/sun4v/include
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_branch.c Mon Jul 30 12:41:05 2007 -0700
@@ -0,0 +1,453 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <cmd_mem.h>
+#include <cmd_branch.h>
+#include <cmd_dimm.h>
+#include <cmd.h>
+#include <cmd_hc_sun4v.h>
+
+#include <errno.h>
+#include <string.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <fm/fmd_api.h>
+#include <sys/fm/protocol.h>
+#include <sys/mem.h>
+#include <sys/nvpair.h>
+
+#define BUF_SIZE 120
+
+void
+cmd_branch_add_dimm(fmd_hdl_t *hdl, cmd_branch_t *branch, cmd_dimm_t *dimm)
+{
+ cmd_branch_memb_t *bm;
+
+ if (dimm == NULL)
+ return;
+
+ fmd_hdl_debug(hdl, "Attaching dimm %s to branch %s\n",
+ dimm->dimm_unum, branch->branch_unum);
+ bm = fmd_hdl_zalloc(hdl, sizeof (cmd_branch_memb_t), FMD_SLEEP);
+ bm->dimm = dimm;
+ cmd_list_append(&branch->branch_dimms, bm);
+}
+
+void
+cmd_branch_remove_dimm(fmd_hdl_t *hdl, cmd_branch_t *branch, cmd_dimm_t *dimm)
+{
+ cmd_branch_memb_t *bm;
+
+ fmd_hdl_debug(hdl, "Detaching dimm %s from branch %s\n",
+ dimm->dimm_unum, branch->branch_unum);
+
+ for (bm = cmd_list_next(&branch->branch_dimms); bm != NULL;
+ bm = cmd_list_next(bm)) {
+ if (bm->dimm == dimm) {
+ cmd_list_delete(&branch->branch_dimms, bm);
+ fmd_hdl_free(hdl, bm, sizeof (cmd_branch_memb_t));
+ return;
+ }
+ }
+
+ fmd_hdl_abort(hdl,
+ "Attempt to disconnect dimm from non-parent branch\n");
+}
+
+static cmd_dimm_t *
+branch_dimm_create(fmd_hdl_t *hdl, char *dimm_unum)
+{
+ nvlist_t *fmri;
+ cmd_dimm_t *dimm;
+
+ fmri = cmd_mem_fmri_create(dimm_unum);
+
+ if (fmri != NULL && (fmd_nvl_fmri_expand(hdl, fmri) == 0)) {
+ dimm = cmd_dimm_create(hdl, fmri);
+ if (dimm != NULL) {
+ nvlist_free(fmri);
+ return (dimm);
+ }
+ }
+
+ nvlist_free(fmri);
+ return (NULL);
+}
+
+/*
+ * The cmd_dimm_t structure created for a DIMM in a branch never has a
+ * Jxxx in its unum; the cmd_dimm_t structure created for a DIMM containing
+ * a page, or in a bank (i.e. for ECC errors)-always-has a Jxxx in its
+ * unum. Therefore the set of cmd_dimm_t's created for a branch is always
+ * disjoint from the set of cmd_dimm_t's created for pages and/or banks, so
+ * the cmd_dimm_create will never link a 'branch' cmd_dimm_t into bank.
+ * Faulting a DIMM for ECC will not prevent subsequent faulting of "same"
+ * dimm for FBR/FBU and vice versa
+ */
+static int
+branch_dimmlist_create(fmd_hdl_t *hdl, cmd_branch_t *branch)
+{
+ int channel, d;
+ char dimm_unum[BUF_SIZE];
+ cmd_dimm_t *dimm;
+ int dimm_count = 0;
+
+ for (channel = 0; channel < MAX_CHANNELS_ON_CHIP; channel++) {
+ for (d = 0; d < MAX_DIMMS_IN_CHANNEL; d++) {
+ (void) snprintf(dimm_unum, BUF_SIZE,
+ "%s/CH%1d/D%1d", branch->branch_unum, channel, d);
+ dimm = branch_dimm_create(hdl, dimm_unum);
+ if (dimm != NULL) {
+ cmd_branch_add_dimm(hdl, branch, dimm);
+ dimm_count++;
+ }
+ }
+ }
+ return (dimm_count);
+}
+
+void
+cmd_branch_create_fault(fmd_hdl_t *hdl, cmd_branch_t *branch,
+ const char *fltnm, nvlist_t *asru)
+{
+ nvlist_t *flt;
+ cmd_branch_memb_t *bm;
+ cmd_dimm_t *dimm;
+ int dimm_count;
+ uint_t cert = 0;
+
+ /* attach the dimms to the branch */
+ dimm_count = branch_dimmlist_create(hdl, branch);
+
+ if (dimm_count != 0)
+ cert = (100 - CMD_MBFAULT_CERT) / dimm_count;
+
+ /* create motherboard fault */
+ flt = cmd_motherboard_create_fault(hdl, asru, fltnm, CMD_MBFAULT_CERT);
+ fmd_case_add_suspect(hdl, branch->branch_case.cc_cp, flt);
+
+ /* create dimm faults */
+ for (bm = cmd_list_next(&branch->branch_dimms); bm != NULL;
+ bm = cmd_list_next(bm)) {
+ dimm = bm->dimm;
+ if (dimm != NULL) {
+ dimm->dimm_flags |= CMD_MEM_F_FAULTING;
+ cmd_dimm_dirty(hdl, dimm);
+ flt = cmd_dimm_create_fault(hdl, dimm, fltnm, cert);
+ fmd_case_add_suspect(hdl, branch->branch_case.cc_cp,
+ flt);
+ }
+ }
+}
+
+cmd_branch_t *
+cmd_branch_create(fmd_hdl_t *hdl, nvlist_t *asru)
+{
+ cmd_branch_t *branch;
+ const char *b_unum;
+
+ if ((b_unum = cmd_fmri_get_unum(asru)) == NULL) {
+ CMD_STAT_BUMP(bad_mem_asru);
+ return (NULL);
+ }
+
+ fmd_hdl_debug(hdl, "branch_create: creating new branch %s\n", b_unum);
+ CMD_STAT_BUMP(branch_creat);
+
+ branch = fmd_hdl_zalloc(hdl, sizeof (cmd_branch_t), FMD_SLEEP);
+ branch->branch_nodetype = CMD_NT_BRANCH;
+ branch->branch_version = CMD_BRANCH_VERSION;
+
+ cmd_bufname(branch->branch_bufname, sizeof (branch->branch_bufname),
+ "branch_%s", b_unum);
+ cmd_fmri_init(hdl, &branch->branch_asru, asru, "branch_asru_%s",
+ b_unum);
+
+ (void) nvlist_lookup_string(branch->branch_asru_nvl, FM_FMRI_MEM_UNUM,
+ (char **)&branch->branch_unum);
+
+ cmd_list_append(&cmd.cmd_branches, branch);
+ cmd_branch_dirty(hdl, branch);
+
+ return (branch);
+}
+
+cmd_branch_t *
+cmd_branch_lookup_by_unum(fmd_hdl_t *hdl, const char *unum)
+{
+ cmd_branch_t *branch;
+
+ fmd_hdl_debug(hdl, "branch_lookup: dimm_unum %s", unum);
+ /*
+ * fbr/fbu unum dimm does not have a J number
+ */
+ if (strstr(unum, "J") != NULL)
+ return (NULL);
+
+ for (branch = cmd_list_next(&cmd.cmd_branches); branch != NULL;
+ branch = cmd_list_next(branch)) {
+ if (strncmp(branch->branch_unum, unum, BRANCH_UNUM_LEN) == 0)
+ return (branch);
+ }
+
+ fmd_hdl_debug(hdl, "branch_lookup_by_unum: no branch is found\n");
+ return (NULL);
+}
+
+cmd_branch_t *
+cmd_branch_lookup(fmd_hdl_t *hdl, nvlist_t *asru)
+{
+ cmd_branch_t *branch;
+ const char *unum;
+
+ if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
+ CMD_STAT_BUMP(bad_mem_asru);
+ return (NULL);
+ }
+
+ for (branch = cmd_list_next(&cmd.cmd_branches); branch != NULL;
+ branch = cmd_list_next(branch)) {
+ if (strncmp(branch->branch_unum, unum, BRANCH_UNUM_LEN) == 0)
+ return (branch);
+ }
+
+ fmd_hdl_debug(hdl, "cmd_branch_lookup: discarding old \n");
+ return (NULL);
+}
+
+static cmd_branch_t *
+branch_wrapv0(fmd_hdl_t *hdl, cmd_branch_pers_t *pers, size_t psz)
+{
+ cmd_branch_t *branch;
+
+ if (psz != sizeof (cmd_branch_pers_t)) {
+ fmd_hdl_abort(hdl, "size of state doesn't match size of "
+ "version 0 state (%u bytes).\n",
+ sizeof (cmd_branch_pers_t));
+ }
+
+ branch = fmd_hdl_zalloc(hdl, sizeof (cmd_branch_t), FMD_SLEEP);
+ bcopy(pers, branch, sizeof (cmd_branch_pers_t));
+ fmd_hdl_free(hdl, pers, psz);
+ return (branch);
+}
+
+void *
+cmd_branch_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
+{
+ cmd_branch_t *branch;
+ size_t branchsz;
+
+
+ for (branch = cmd_list_next(&cmd.cmd_branches); branch != NULL;
+ branch = cmd_list_next(branch)) {
+ if (strcmp(branch->branch_bufname, ptr->ptr_name) == 0)
+ break;
+ }
+
+ if (branch == NULL) {
+ fmd_hdl_debug(hdl, "restoring branch from %s\n", ptr->ptr_name);
+
+ if ((branchsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
+ fmd_hdl_abort(hdl, "branch referenced by case %s does "
+ "not exist in saved state\n",
+ fmd_case_uuid(hdl, cp));
+ } else if (branchsz > CMD_BRANCH_MAXSIZE ||
+ branchsz < CMD_BRANCH_MINSIZE) {
+ fmd_hdl_abort(hdl,
+ "branch buffer referenced by case %s "
+ "is out of bounds (is %u bytes, max %u, min %u)\n",
+ fmd_case_uuid(hdl, cp), branchsz,
+ CMD_BRANCH_MAXSIZE, CMD_BRANCH_MINSIZE);
+ }
+
+ if ((branch = cmd_buf_read(hdl, NULL, ptr->ptr_name,
+ branchsz)) == NULL) {
+ fmd_hdl_abort(hdl, "failed to read branch buf %s",
+ ptr->ptr_name);
+ }
+
+ fmd_hdl_debug(hdl, "found %d in version field\n",
+ branch->branch_version);
+
+ switch (branch->branch_version) {
+ case CMD_BRANCH_VERSION_0:
+ branch = branch_wrapv0(hdl,
+ (cmd_branch_pers_t *)branch, branchsz);
+ break;
+ default:
+ fmd_hdl_abort(hdl, "unknown version (found %d) "
+ "for branch state referenced by case %s.\n",
+ branch->branch_version, fmd_case_uuid(hdl,
+ cp));
+ break;
+ }
+
+ cmd_fmri_restore(hdl, &branch->branch_asru);
+
+ if ((errno = nvlist_lookup_string(branch->branch_asru_nvl,
+ FM_FMRI_MEM_UNUM, (char **)&branch->branch_unum)) != 0)
+ fmd_hdl_abort(hdl, "failed to retrieve unum from asru");
+
+
+ cmd_list_append(&cmd.cmd_branches, branch);
+ }
+
+ switch (ptr->ptr_subtype) {
+ case CMD_PTR_BRANCH_CASE:
+ cmd_mem_case_restore(hdl, &branch->branch_case, cp, "branch",
+ branch->branch_unum);
+ break;
+ default:
+ fmd_hdl_abort(hdl, "invalid %s subtype %d\n",
+ ptr->ptr_name, ptr->ptr_subtype);
+ }
+
+ return (branch);
+}
+
+void
+cmd_branch_dirty(fmd_hdl_t *hdl, cmd_branch_t *branch)
+{
+ if (fmd_buf_size(hdl, NULL, branch->branch_bufname) !=
+ sizeof (cmd_branch_pers_t))
+ fmd_buf_destroy(hdl, NULL, branch->branch_bufname);
+
+ /* No need to rewrite the FMRIs in the branch - they don't change */
+ fmd_buf_write(hdl, NULL, branch->branch_bufname, &branch->branch_pers,
+ sizeof (cmd_branch_pers_t));
+}
+
+static void
+branch_dimmlist_free(fmd_hdl_t *hdl, cmd_branch_t *branch)
+{
+ cmd_branch_memb_t *bm;
+
+ while ((bm = cmd_list_next(&branch->branch_dimms)) != NULL) {
+ cmd_list_delete(&branch->branch_dimms, bm);
+ fmd_hdl_free(hdl, bm, sizeof (cmd_branch_memb_t));
+ }
+}
+
+static void
+branch_free(fmd_hdl_t *hdl, cmd_branch_t *branch, int destroy)
+{
+ fmd_hdl_debug(hdl, "Free branch %s\n", branch->branch_unum);
+ if (branch->branch_case.cc_cp != NULL) {
+ if (destroy) {
+ if (branch->branch_case.cc_serdnm != NULL) {
+ fmd_serd_destroy(hdl,
+ branch->branch_case.cc_serdnm);
+ fmd_hdl_strfree(hdl,
+ branch->branch_case.cc_serdnm);
+ branch->branch_case.cc_serdnm = NULL;
+ }
+ }
+ cmd_case_fini(hdl, branch->branch_case.cc_cp, destroy);
+ }
+
+ branch_dimmlist_free(hdl, branch);
+ cmd_fmri_fini(hdl, &branch->branch_asru, destroy);
+
+ if (destroy)
+ fmd_buf_destroy(hdl, NULL, branch->branch_bufname);
+ cmd_list_delete(&cmd.cmd_branches, branch);
+ fmd_hdl_free(hdl, branch, sizeof (cmd_branch_t));
+}
+
+void
+cmd_branch_destroy(fmd_hdl_t *hdl, cmd_branch_t *branch)
+{
+ branch_free(hdl, branch, FMD_B_TRUE);
+}
+
+int
+branch_exist(fmd_hdl_t *hdl, cmd_branch_t *branch)
+{
+ char dimm_unum[BUF_SIZE];
+ int channel, d;
+ nvlist_t *fmri;
+
+ fmd_hdl_debug(hdl, "branch_exist");
+ for (channel = 0; channel < MAX_CHANNELS_ON_CHIP; channel++) {
+ for (d = 0; d < MAX_DIMMS_IN_CHANNEL; d++) {
+ (void) snprintf(dimm_unum, BUF_SIZE, "%s/CH%1d/D%1d",
+ branch->branch_unum, channel, d);
+ fmri = cmd_mem_fmri_create(dimm_unum);
+ if (fmri != NULL &&
+ (fmd_nvl_fmri_expand(hdl, fmri) == 0)) {
+ nvlist_free(fmri);
+ return (1);
+ }
+ nvlist_free(fmri);
+ }
+ }
+ fmd_hdl_debug(hdl, "branch %s does not exist\n", branch->branch_unum);
+ return (0);
+}
+
+/*
+ * If the case has been solved, don't need to check the dimmlist
+ * If the case has not been solved, the branch is valid if there is least one
+ * existing dimm in the branch
+ */
+void
+cmd_branch_validate(fmd_hdl_t *hdl)
+{
+ cmd_branch_t *branch, *next;
+
+ fmd_hdl_debug(hdl, "cmd_branch_validate\n");
+
+ for (branch = cmd_list_next(&cmd.cmd_branches); branch != NULL;
+ branch = next) {
+ next = cmd_list_next(branch);
+ if (branch->branch_case.cc_cp != NULL &&
+ fmd_case_solved(hdl, branch->branch_case.cc_cp))
+ continue;
+ if (branch_exist(hdl, branch))
+ continue;
+ cmd_branch_destroy(hdl, branch);
+ }
+}
+
+void
+cmd_branch_gc(fmd_hdl_t *hdl)
+{
+ fmd_hdl_debug(hdl, "cmd_branch_gc\n");
+ cmd_branch_validate(hdl);
+}
+
+void
+cmd_branch_fini(fmd_hdl_t *hdl)
+{
+ cmd_branch_t *branch;
+ fmd_hdl_debug(hdl, "cmd_branch_fini\n");
+
+ while ((branch = cmd_list_next(&cmd.cmd_branches)) != NULL)
+ branch_free(hdl, branch, FMD_B_FALSE);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_branch.h Mon Jul 30 12:41:05 2007 -0700
@@ -0,0 +1,125 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _CMD_BRANCH_H
+#define _CMD_BRANCH_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Memory modules are described by the cmd_branch general-purpose state
+ * structure. This structure is used to track FBR errors
+ *
+ * Data structures:
+ *
+ * ,--------. ,--------.
+ * |branch | <---- |case_ptr| (CMD_PTR_BRANCH_CASE)
+ * | | `--------'
+ * |,-------| ,-------------.
+ * ,->||asru_t | ----> |packed nvlist|
+ * | |`-------| `-------------'
+ * `--| |
+ * | dimms | ----> cmd_branch_memb_t -----> cmd_branch_memb_t -----> ...
+ * `--------' | |
+ * cmd_dimm_t cmd_dimm_t
+ *
+ * Data structure P? Case? Notes
+ * ---------------- --- ----- ----------------------------------------------
+ * cmd_branch_pers_t Yes No Name is derived from the unum ("branch_%s")
+ * cmd_case_ptr_t Yes Yes Name is case's UUID
+ * branch_asru Yes No Name is derived from the unum
+ * ("branch_asru_%d")
+ * branch_unum No No Pointer into ASRU - relinked during restore
+ * branch_dimms No No Recreated during restore
+ */
+
+#include <cmd_mem.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_CHANNELS_ON_CHIP 4
+#define MAX_DIMMS_IN_CHANNEL 4
+#define BRANCH_UNUM_LEN 11
+#define CMD_MBFAULT_CERT 30
+
+typedef struct cmd_branch_memb {
+ cmd_list_t bm_list;
+ cmd_dimm_t *dimm;
+} cmd_branch_memb_t;
+
+#define CMD_BRANCH_VERSION_0 0
+#define CMD_BRANCH_VERSION CMD_BRANCH_VERSION_0
+
+typedef struct cmd_branch_pers {
+ cmd_header_t branchp_header; /* Nodetype must be CMD_NT_BRANCH */
+ uint_t branchp_version;
+ cmd_fmri_t branchp_asru; /* ASRU for this BRANCH */
+ uint_t branchp_flags; /* CMD_MEM_F_* */
+} cmd_branch_pers_t;
+
+struct cmd_branch {
+ cmd_branch_pers_t branch_pers;
+ const char *branch_unum; /* This BRANCH's name */
+ cmd_case_t branch_case; /* Open link errors case against */
+ /* this BRANCH */
+ cmd_list_t branch_dimms; /* This BRANCH's dimms */
+};
+
+#define CMD_BRANCH_MAXSIZE sizeof (cmd_branch_pers_t)
+#define CMD_BRANCH_MINSIZE sizeof (cmd_branch_pers_t)
+
+#define branch_header branch_pers.branchp_header
+#define branch_nodetype branch_pers.branchp_header.hdr_nodetype
+#define branch_bufname branch_pers.branchp_header.hdr_bufname
+#define branch_version branch_pers.branchp_version
+#define branch_asru branch_pers.branchp_asru
+#define branch_asru_nvl branch_pers.branchp_asru.fmri_nvl
+#define branch_flags branch_pers.branchp_flags
+
+extern cmd_branch_t *cmd_branch_lookup(fmd_hdl_t *, nvlist_t *);
+extern cmd_branch_t *cmd_branch_create(fmd_hdl_t *, nvlist_t *);
+extern cmd_branch_t *cmd_branch_lookup_by_unum(fmd_hdl_t *, const char *);
+
+extern void cmd_branch_create_fault(fmd_hdl_t *, cmd_branch_t *,
+ const char *, nvlist_t *);
+extern void cmd_branch_add_dimm(fmd_hdl_t *, cmd_branch_t *, cmd_dimm_t *);
+extern void cmd_branch_remove_dimm(fmd_hdl_t *, cmd_branch_t *, cmd_dimm_t *);
+
+
+extern void *cmd_branch_restore(fmd_hdl_t *, fmd_case_t *, cmd_case_ptr_t *);
+extern void cmd_branch_destroy(fmd_hdl_t *, cmd_branch_t *);
+extern void cmd_branch_validate(fmd_hdl_t *);
+extern void cmd_branch_gc(fmd_hdl_t *);
+extern void cmd_branch_fini(fmd_hdl_t *);
+extern void cmd_branch_dirty(fmd_hdl_t *, cmd_branch_t *);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CMD_BRANCH_H */
--- a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_hc_sun4v.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_hc_sun4v.c Mon Jul 30 12:41:05 2007 -0700
@@ -39,3 +39,60 @@
fmd_hdl_error(hdl, "unable to alloc location for fault\n");
return (flt);
}
+
+nvlist_t *
+cmd_motherboard_fru_create(fmd_hdl_t *hdl, nvlist_t *asru)
+{
+ nvlist_t *fru, *hcelem;
+ char *serialstr, *partstr;
+
+ if (nvlist_lookup_string(asru, FM_FMRI_HC_SERIAL_ID, &serialstr) != 0)
+ serialstr = NULL;
+ if (nvlist_lookup_string(asru, FM_FMRI_HC_PART, &partstr) != 0)
+ partstr = NULL;
+
+ if (nvlist_alloc(&hcelem, NV_UNIQUE_NAME, 0) != 0)
+ return (NULL);
+
+ if (nvlist_add_string(hcelem, FM_FMRI_HC_NAME, "motherboard") != 0 ||
+ nvlist_add_string(hcelem, FM_FMRI_HC_ID, "0") != 0) {
+ nvlist_free(hcelem);
+ return (NULL);
+ }
+
+ if (nvlist_alloc(&fru, NV_UNIQUE_NAME, 0) != 0) {
+ fmd_hdl_debug(hdl, "Failed to allocate memory");
+ nvlist_free(hcelem);
+ return (NULL);
+ }
+
+ if (nvlist_add_uint8(fru, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
+ nvlist_add_string(fru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
+ nvlist_add_string(fru, FM_FMRI_HC_ROOT, "/") != 0 ||
+ nvlist_add_uint32(fru, FM_FMRI_HC_LIST_SZ, 1) != 0 ||
+ nvlist_add_nvlist_array(fru, FM_FMRI_HC_LIST, &hcelem, 1) != 0 ||
+ (serialstr != NULL &&
+ nvlist_add_string(fru, FM_FMRI_HC_SERIAL_ID, serialstr) != 0) ||
+ (partstr != NULL &&
+ nvlist_add_string(fru, FM_FMRI_HC_PART, partstr) != 0)) {
+ nvlist_free(hcelem);
+ nvlist_free(fru);
+ return (NULL);
+ }
+ nvlist_free(hcelem);
+ return (fru);
+}
+
+nvlist_t *
+cmd_motherboard_create_fault(fmd_hdl_t *hdl, nvlist_t *asru, const char *fltnm,
+ uint_t cert)
+{
+ nvlist_t *mb_fru, *flt;
+
+ mb_fru = cmd_motherboard_fru_create(hdl, asru);
+ flt = fmd_nvl_create_fault(hdl, fltnm, cert, mb_fru, mb_fru, NULL);
+ flt = cmd_fault_add_location(hdl, flt, "MB");
+ if (mb_fru != NULL)
+ nvlist_free(mb_fru);
+ return (flt);
+}
--- a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_hc_sun4v.h Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_hc_sun4v.h Mon Jul 30 12:41:05 2007 -0700
@@ -36,6 +36,9 @@
#endif
extern nvlist_t *cmd_fault_add_location(fmd_hdl_t *, nvlist_t *, const char *);
+extern nvlist_t *cmd_motherboard_fru_create(fmd_hdl_t *, nvlist_t *);
+extern nvlist_t *cmd_motherboard_create_fault(fmd_hdl_t *, nvlist_t *,
+ const char *, uint_t);
#ifdef __cplusplus
}
--- a/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c Mon Jul 30 12:41:05 2007 -0700
@@ -34,6 +34,8 @@
#include <cmd_bank.h>
#include <cmd_page.h>
#include <cmd_cpu.h>
+#include <cmd_branch.h>
+#include <cmd_state.h>
#include <cmd.h>
#include <assert.h>
@@ -53,6 +55,11 @@
#include <sys/fm/ldom.h>
#include <ctype.h>
+#define VF_TS3_FCR 0x000000000000FFFFULL
+#define VF_L2ESYR_C2C 0x8000000000000000ULL
+#define UTS2_CPUS_PER_CHIP 64
+#define FBR_ERROR ".fbr"
+
extern ldom_hdl_t *cpumem_diagnosis_lhp;
static fmd_hdl_t *cpumem_hdl = NULL;
@@ -96,6 +103,73 @@
return (CMD_EVD_OK);
}
+static int
+cpu_present(fmd_hdl_t *hdl, nvlist_t *asru, uint32_t *cpuid)
+{
+ nvlist_t *cp_asru;
+ uint32_t i;
+
+ if (nvlist_dup(asru, &cp_asru, 0) != 0) {
+ fmd_hdl_debug(hdl, "unable to alloc asru for thread\n");
+ return (-1);
+ }
+
+ for (i = *cpuid; i < *cpuid + UTS2_CPUS_PER_CHIP; i++) {
+
+ (void) nvlist_remove_all(cp_asru, FM_FMRI_CPU_ID);
+
+ if (nvlist_add_uint32(cp_asru, FM_FMRI_CPU_ID, i) == 0) {
+ if (fmd_nvl_fmri_present(hdl, cp_asru) &&
+ !fmd_nvl_fmri_unusable(hdl, cp_asru)) {
+ nvlist_free(cp_asru);
+ *cpuid = i;
+ return (0);
+ }
+ }
+ }
+ nvlist_free(cp_asru);
+ return (-1);
+}
+
+/*ARGSUSED*/
+cmd_evdisp_t
+cmd_c2c(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
+ cmd_errcl_t clcode)
+{
+ uint32_t cpuid;
+ nvlist_t *det;
+ int rc;
+
+ (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
+ if (nvlist_lookup_uint32(det, FM_FMRI_CPU_ID, &cpuid) == 0) {
+ /*
+ * If the c2c bit is set, the sending cache of the
+ * cpu must be faulted instead of the memory.
+ * If the detector is chip0, the cache of the chip1
+ * is faulted and vice versa.
+ */
+ if (cpuid < UTS2_CPUS_PER_CHIP)
+ cpuid = UTS2_CPUS_PER_CHIP;
+ else
+ cpuid = 0;
+
+ rc = cpu_present(hdl, det, &cpuid);
+
+ if (rc != -1) {
+ (void) nvlist_remove(det, FM_FMRI_CPU_ID,
+ DATA_TYPE_UINT32);
+ if (nvlist_add_uint32(det,
+ FM_FMRI_CPU_ID, cpuid) == 0) {
+ clcode |= CMD_CPU_LEVEL_CHIP;
+ return (cmd_l2u(hdl, ep, nvl, class, clcode));
+ }
+
+ }
+ }
+ fmd_hdl_debug(hdl, "cmd_c2c: no cpuid discarding C2C error");
+ return (CMD_EVD_BAD);
+}
+
/*
* sun4v's xe_common routine has an extra argument, clcode, compared
* to routine of same name in sun4u.
@@ -106,7 +180,7 @@
const char *class, cmd_errcl_t clcode, cmd_xe_handler_f *hdlr)
{
uint64_t afar, l2_afar, dram_afar;
- uint64_t l2_afsr, dram_afsr;
+ uint64_t l2_afsr, dram_afsr, l2_esyr;
uint16_t synd;
uint8_t afar_status, synd_status;
nvlist_t *rsrc;
@@ -176,6 +250,13 @@
afar_status = ((l2_afsr & NI_L2AFSR_P05) == 0) ?
AFLT_STAT_VALID : AFLT_STAT_INVALID;
synd_status = AFLT_STAT_VALID;
+
+ if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_ESYR,
+ &l2_esyr) == 0) {
+ if (l2_esyr & VF_L2ESYR_C2C) {
+ return (cmd_c2c(hdl, ep, nvl, class, clcode));
+ }
+ }
break;
case CMD_ERRCL_DSU:
afar = dram_afar;
@@ -191,6 +272,7 @@
synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc));
}
+
/*ARGSUSED*/
cmd_evdisp_t
cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
@@ -215,6 +297,110 @@
return (CMD_EVD_UNUSED);
}
+
+/*ARGSUSED*/
+cmd_evdisp_t
+cmd_fb(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
+ cmd_errcl_t clcode)
+{
+ cmd_branch_t *branch;
+ const char *uuid;
+ nvlist_t *asru, *det;
+ uint64_t ts3_fcr;
+
+ if (nvlist_lookup_nvlist(nvl, FM_RSRC_RESOURCE, &asru) < 0) {
+ CMD_STAT_BUMP(bad_mem_asru);
+ return (NULL);
+ }
+
+ if (nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det) < 0) {
+ CMD_STAT_BUMP(bad_mem_asru);
+ return (NULL);
+ }
+
+ if (fmd_nvl_fmri_expand(hdl, det) < 0) {
+ fmd_hdl_debug(hdl, "Failed to expand detector");
+ return (NULL);
+ }
+
+ branch = cmd_branch_lookup(hdl, asru);
+ if (branch == NULL) {
+ if ((branch = cmd_branch_create(hdl, asru)) == NULL)
+ return (CMD_EVD_UNUSED);
+ }
+
+ if (branch->branch_case.cc_cp != NULL &&
+ fmd_case_solved(hdl, branch->branch_case.cc_cp)) {
+ fmd_hdl_debug(hdl, "Case solved\n");
+ return (CMD_EVD_REDUND);
+ }
+
+ if (branch->branch_case.cc_cp == NULL) {
+ branch->branch_case.cc_cp = cmd_case_create(hdl,
+ &branch->branch_header, CMD_PTR_BRANCH_CASE, &uuid);
+ }
+
+ if (strcmp(strrchr(class, '.'), FBR_ERROR) == 0) {
+ if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_TS3_FCR,
+ &ts3_fcr) == 0 && (ts3_fcr != VF_TS3_FCR)) {
+ fmd_hdl_debug(hdl,
+ "Processing fbr with lane failover\n");
+ cmd_branch_create_fault(hdl, branch,
+ "fault.memory.link-f", det);
+
+ } else {
+ fmd_hdl_debug(hdl, "Adding fbr event to serd engine\n");
+ if (branch->branch_case.cc_serdnm == NULL) {
+ branch->branch_case.cc_serdnm =
+ cmd_mem_serdnm_create(hdl,
+ "branch", branch->branch_unum);
+
+ fmd_serd_create(hdl,
+ branch->branch_case.cc_serdnm,
+ fmd_prop_get_int32(hdl, "fbr_n"),
+ fmd_prop_get_int64(hdl, "fbr_t"));
+ }
+
+ if (fmd_serd_record(hdl,
+ branch->branch_case.cc_serdnm, ep) == FMD_B_FALSE)
+ return (CMD_EVD_OK); /* engine hasn't fired */
+
+ fmd_hdl_debug(hdl, "fbr serd fired\n");
+
+ fmd_case_add_serd(hdl, branch->branch_case.cc_cp,
+ branch->branch_case.cc_serdnm);
+
+ cmd_branch_create_fault(hdl, branch,
+ "fault.memory.link-c", det);
+ }
+ } else {
+ fmd_hdl_debug(hdl, "Processing fbu event");
+ cmd_branch_create_fault(hdl, branch, "fault.memory.link-u",
+ det);
+ }
+
+ branch->branch_flags |= CMD_MEM_F_FAULTING;
+
+ if (branch->branch_case.cc_serdnm != NULL) {
+ fmd_serd_destroy(hdl, branch->branch_case.cc_serdnm);
+ fmd_hdl_strfree(hdl, branch->branch_case.cc_serdnm);
+ branch->branch_case.cc_serdnm = NULL;
+ }
+
+ fmd_case_add_ereport(hdl, branch->branch_case.cc_cp, ep);
+ fmd_case_solve(hdl, branch->branch_case.cc_cp);
+ cmd_branch_dirty(hdl, branch);
+
+ return (CMD_EVD_OK);
+}
+
+void
+cmd_branch_close(fmd_hdl_t *hdl, void *arg)
+{
+ cmd_branch_destroy(hdl, arg);
+}
+
+
/*ARGSUSED*/
ulong_t
cmd_mem_get_phys_pages(fmd_hdl_t *hdl)
@@ -250,10 +436,10 @@
}
listp = (mde_cookie_t *)cpumem_alloc(sizeof (mde_cookie_t) *
- num_nodes);
+ num_nodes);
nmblocks = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
- md_find_name(mdp, "mblock"),
- md_find_name(mdp, "fwd"), listp);
+ md_find_name(mdp, "mblock"),
+ md_find_name(mdp, "fwd"), listp);
for (i = 0; i < nmblocks; i++) {
if (md_get_prop_val(mdp, listp[i], "size", &bmem) < 0) {
physmem = 0;
@@ -429,7 +615,7 @@
namebuf[namelen] = '\0';
if ((j = map_name(namebuf)) < 0)
- continue; /* skip names that don't map */
+ continue; /* skip names that don't map */
if (instlen == 0) {
(void) strncpy(instbuf, "0", 2);
--- a/usr/src/cmd/fm/modules/sun4v/cpumem-retire/cma_main.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/modules/sun4v/cpumem-retire/cma_main.c Mon Jul 30 12:41:05 2007 -0700
@@ -78,6 +78,12 @@
NULL },
{ "fault.memory.datapath", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
NULL },
+ { "fault.memory.link-c", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
+ NULL },
+ { "fault.memory.link-u", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
+ NULL },
+ { "fault.memory.link-f", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
+ NULL },
/*
* The following ultraSPARC-T1/T2 faults do NOT retire a cpu thread,
@@ -96,6 +102,12 @@
FM_CPU_SCHEME_VERSION, NULL },
{ "fault.cpu.*.mau", FM_FMRI_SCHEME_CPU,
FM_CPU_SCHEME_VERSION, NULL },
+ { "fault.cpu.*.lfu-u", FM_FMRI_SCHEME_CPU,
+ FM_CPU_SCHEME_VERSION, NULL },
+ { "fault.cpu.*.lfu-f", FM_FMRI_SCHEME_CPU,
+ FM_CPU_SCHEME_VERSION, NULL },
+ { "fault.cpu.*.lfu-p", FM_FMRI_SCHEME_CPU,
+ FM_CPU_SCHEME_VERSION, NULL },
{ "fault.cpu.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
cma_cpu_retire },
{ NULL, NULL, 0, NULL }
--- a/usr/src/cmd/fm/schemes/mem/i386/mem_disc.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/schemes/mem/i386/mem_disc.c Mon Jul 30 12:41:05 2007 -0700
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -21,12 +20,15 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
+#include <mem.h>
+#include <fm/fmd_fmri.h>
+
/*
* We do not yet support DIMM enumeration in the x86 mem scheme because our
* diagnosis is using the new libtopo functionality and hopefully won't need
@@ -37,3 +39,21 @@
{
return (0);
}
+
+/*
+ * The following two routines are stubs for corresponding SPARC-only code.
+ */
+
+/*ARGSUSED*/
+int
+mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp)
+{
+ errno = ENOTSUP;
+ return (-1);
+}
+
+/*ARGSUSED*/
+void
+mem_expand_opt(nvlist_t *nvl, char *unum, char **serids)
+{
+}
--- a/usr/src/cmd/fm/schemes/mem/mem.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/schemes/mem/mem.c Mon Jul 30 12:41:05 2007 -0700
@@ -29,11 +29,8 @@
#include <mem.h>
#include <fm/fmd_fmri.h>
-#include <fcntl.h>
-#include <unistd.h>
#include <string.h>
#include <strings.h>
-#include <time.h>
#include <sys/mem.h>
#ifdef sparc
@@ -43,358 +40,6 @@
mem_t mem;
-#ifdef sparc
-
-extern int mem_update_mdesc(void);
-
-/*
- * Retry values for handling the case where the kernel is not yet ready
- * to provide DIMM serial ids. Some platforms acquire DIMM serial id
- * information from their System Controller via a mailbox interface.
- * The values chosen are for 10 retries 3 seconds apart to approximate the
- * possible 30 second timeout length of a mailbox message request.
- */
-#define MAX_MEM_SID_RETRIES 10
-#define MEM_SID_RETRY_WAIT 3
-
-static mem_dimm_map_t *
-dm_lookup(const char *name)
-{
- mem_dimm_map_t *dm;
-
- for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) {
- if (strcmp(name, dm->dm_label) == 0)
- return (dm);
- }
-
- return (NULL);
-}
-
-/*
- * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If
- * the unum (or a component of same) wasn't found, -1 is returned with errno
- * set to ENOENT. If the kernel doesn't have support for serial numbers,
- * -1 is returned with errno set to ENOTSUP.
- */
-static int
-mem_get_serids_from_kernel(const char *unum, char ***seridsp, size_t *nseridsp)
-{
- char **dimms, **serids;
- size_t ndimms, nserids;
- int i, rc = 0;
- int fd;
- int retries = MAX_MEM_SID_RETRIES;
- mem_name_t mn;
- struct timespec rqt;
-
- if ((fd = open("/dev/mem", O_RDONLY)) < 0)
- return (-1);
-
- if (mem_unum_burst(unum, &dimms, &ndimms) < 0) {
- (void) close(fd);
- return (-1); /* errno is set for us */
- }
-
- serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
- nserids = ndimms;
-
- bzero(&mn, sizeof (mn));
-
- for (i = 0; i < ndimms; i++) {
- mn.m_namelen = strlen(dimms[i]) + 1;
- mn.m_sidlen = MEM_SERID_MAXLEN;
-
- mn.m_name = fmd_fmri_alloc(mn.m_namelen);
- mn.m_sid = fmd_fmri_alloc(mn.m_sidlen);
-
- (void) strcpy(mn.m_name, dimms[i]);
-
- do {
- rc = ioctl(fd, MEM_SID, &mn);
-
- if (rc >= 0 || errno != EAGAIN)
- break;
-
- if (retries == 0) {
- errno = ETIMEDOUT;
- break;
- }
-
- /*
- * EAGAIN indicates the kernel is
- * not ready to provide DIMM serial
- * ids. Sleep MEM_SID_RETRY_WAIT seconds
- * and try again.
- * nanosleep() is used instead of sleep()
- * to avoid interfering with fmd timers.
- */
- rqt.tv_sec = MEM_SID_RETRY_WAIT;
- rqt.tv_nsec = 0;
- (void) nanosleep(&rqt, NULL);
-
- } while (retries--);
-
- if (rc < 0) {
- /*
- * ENXIO can happen if the kernel memory driver
- * doesn't have the MEM_SID ioctl (e.g. if the
- * kernel hasn't been patched to provide the
- * support).
- *
- * If the MEM_SID ioctl is available but the
- * particular platform doesn't support providing
- * serial ids, ENOTSUP will be returned by the ioctl.
- */
- if (errno == ENXIO)
- errno = ENOTSUP;
- fmd_fmri_free(mn.m_name, mn.m_namelen);
- fmd_fmri_free(mn.m_sid, mn.m_sidlen);
- mem_strarray_free(serids, nserids);
- mem_strarray_free(dimms, ndimms);
- (void) close(fd);
- return (-1);
- }
-
- serids[i] = fmd_fmri_strdup(mn.m_sid);
-
- fmd_fmri_free(mn.m_name, mn.m_namelen);
- fmd_fmri_free(mn.m_sid, mn.m_sidlen);
- }
-
- mem_strarray_free(dimms, ndimms);
-
- (void) close(fd);
-
- *seridsp = serids;
- *nseridsp = nserids;
-
- return (0);
-}
-
-/*
- * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If
- * the unum (or a component of same) wasn't found, -1 is returned with errno
- * set to ENOENT.
- */
-static int
-mem_get_serids_from_cache(const char *unum, char ***seridsp, size_t *nseridsp)
-{
- uint64_t drgen = fmd_fmri_get_drgen();
- char **dimms, **serids;
- size_t ndimms, nserids;
- mem_dimm_map_t *dm;
- int i, rc = 0;
-
- if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
- return (-1); /* errno is set for us */
-
- serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
- nserids = ndimms;
-
- for (i = 0; i < ndimms; i++) {
- if ((dm = dm_lookup(dimms[i])) == NULL) {
- rc = fmd_fmri_set_errno(EINVAL);
- break;
- }
-
- if (*dm->dm_serid == '\0' || dm->dm_drgen != drgen) {
- /*
- * We don't have a cached copy, or the copy we've got is
- * out of date. Look it up again.
- */
- if (mem_get_serid(dm->dm_device, dm->dm_serid,
- sizeof (dm->dm_serid)) < 0) {
- rc = -1; /* errno is set for us */
- break;
- }
-
- dm->dm_drgen = drgen;
- }
-
- serids[i] = fmd_fmri_strdup(dm->dm_serid);
- }
-
- mem_strarray_free(dimms, ndimms);
-
- if (rc == 0) {
- *seridsp = serids;
- *nseridsp = nserids;
- } else {
- mem_strarray_free(serids, nserids);
- }
-
- return (rc);
-}
-
-/*
- * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If
- * the unum (or a component of same) wasn't found, -1 is returned with errno
- * set to ENOENT.
- */
-static int
-mem_get_serids_from_mdesc(const char *unum, char ***seridsp, size_t *nseridsp)
-{
- uint64_t drgen = fmd_fmri_get_drgen();
- char **dimms, **serids;
- size_t ndimms, nserids;
- mem_dimm_map_t *dm;
- int i, rc = 0;
-
- if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
- return (-1); /* errno is set for us */
-
- serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
- nserids = ndimms;
-
- /*
- * first go through dimms and see if dm_drgen entries are outdated
- */
- for (i = 0; i < ndimms; i++) {
- if ((dm = dm_lookup(dimms[i])) == NULL ||
- dm->dm_drgen != drgen)
- break;
- }
-
- if (i < ndimms && mem_update_mdesc() != 0) {
- mem_strarray_free(dimms, ndimms);
- return (-1);
- }
-
- /*
- * get to this point if an up-to-date mdesc (and corresponding
- * entries in the global mem list) exists
- */
- for (i = 0; i < ndimms; i++) {
- if ((dm = dm_lookup(dimms[i])) == NULL) {
- rc = fmd_fmri_set_errno(EINVAL);
- break;
- }
-
- if (dm->dm_drgen != drgen)
- dm->dm_drgen = drgen;
-
- /*
- * mdesc and dm entry was updated by an earlier call to
- * mem_update_mdesc, so we go ahead and dup the serid
- */
- serids[i] = fmd_fmri_strdup(dm->dm_serid);
- }
-
- mem_strarray_free(dimms, ndimms);
-
- if (rc == 0) {
- *seridsp = serids;
- *nseridsp = nserids;
- } else {
- mem_strarray_free(serids, nserids);
- }
-
- return (rc);
-}
-
-/*
- * Returns 0 with part numbers if found, returns -1 for errors.
- */
-static int
-mem_get_parts_from_mdesc(const char *unum, char ***partsp, size_t *npartsp)
-{
- uint64_t drgen = fmd_fmri_get_drgen();
- char **dimms, **parts;
- size_t ndimms, nparts;
- mem_dimm_map_t *dm;
- int i, rc = 0;
-
- if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
- return (-1); /* errno is set for us */
-
- parts = fmd_fmri_zalloc(sizeof (char *) * ndimms);
- nparts = ndimms;
-
- /*
- * first go through dimms and see if dm_drgen entries are outdated
- */
- for (i = 0; i < ndimms; i++) {
- if ((dm = dm_lookup(dimms[i])) == NULL ||
- dm->dm_drgen != drgen)
- break;
- }
-
- if (i < ndimms && mem_update_mdesc() != 0) {
- mem_strarray_free(dimms, ndimms);
- mem_strarray_free(parts, nparts);
- return (-1);
- }
-
- /*
- * get to this point if an up-to-date mdesc (and corresponding
- * entries in the global mem list) exists
- */
- for (i = 0; i < ndimms; i++) {
- if ((dm = dm_lookup(dimms[i])) == NULL) {
- rc = fmd_fmri_set_errno(EINVAL);
- break;
- }
-
- if (dm->dm_drgen != drgen)
- dm->dm_drgen = drgen;
-
- /*
- * mdesc and dm entry was updated by an earlier call to
- * mem_update_mdesc, so we go ahead and dup the part
- */
- if (dm->dm_part == NULL) {
- rc = -1;
- break;
- }
- parts[i] = fmd_fmri_strdup(dm->dm_part);
- }
-
- mem_strarray_free(dimms, ndimms);
-
- if (rc == 0) {
- *partsp = parts;
- *npartsp = nparts;
- } else {
- mem_strarray_free(parts, nparts);
- }
-
- return (rc);
-}
-
-static int
-mem_get_parts_by_unum(const char *unum, char ***partp, size_t *npartp)
-{
- if (mem.mem_dm == NULL)
- return (-1);
- else
- return (mem_get_parts_from_mdesc(unum, partp, npartp));
-}
-
-#endif /* sparc */
-
-/*ARGSUSED*/
-
-static int
-mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp)
-{
- /*
- * Some platforms do not support the caching of serial ids by the
- * mem scheme plugin but instead support making serial ids available
- * via the kernel.
- */
-#ifdef sparc
- if (mem.mem_dm == NULL)
- return (mem_get_serids_from_kernel(unum, seridsp, nseridsp));
- else if (mem_get_serids_from_mdesc(unum, seridsp, nseridsp) == 0)
- return (0);
- else
- return (mem_get_serids_from_cache(unum, seridsp, nseridsp));
-#else
- errno = ENOTSUP;
- return (-1);
-#endif /* sparc */
-}
-
static int
mem_fmri_get_unum(nvlist_t *nvl, char **unump)
{
@@ -492,19 +137,16 @@
char *unum, **serids;
uint_t nnvlserids;
size_t nserids;
-#ifdef sparc
- char **parts;
- size_t nparts;
-#endif
int rc;
if ((mem_fmri_get_unum(nvl, &unum) < 0) || (*unum == '\0'))
return (fmd_fmri_set_errno(EINVAL));
if ((rc = nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID,
- &serids, &nnvlserids)) == 0)
- return (0); /* fmri is already expanded */
- else if (rc != ENOENT)
+ &serids, &nnvlserids)) == 0) { /* already have serial #s */
+ mem_expand_opt(nvl, unum, serids);
+ return (0);
+ } else if (rc != ENOENT)
return (fmd_fmri_set_errno(EINVAL));
if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
@@ -517,24 +159,14 @@
rc = nvlist_add_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, serids,
nserids);
+ mem_expand_opt(nvl, unum, serids);
mem_strarray_free(serids, nserids);
if (rc != 0)
return (fmd_fmri_set_errno(EINVAL));
-
-#ifdef sparc
- /*
- * Continue with the process if there are no part numbers.
- */
- if (mem_get_parts_by_unum(unum, &parts, &nparts) < 0)
+ else
return (0);
-
- rc = nvlist_add_string_array(nvl, FM_FMRI_HC_PART, parts, nparts);
-
- mem_strarray_free(parts, nparts);
-#endif
- return (0);
}
static int
@@ -726,13 +358,19 @@
fmd_fmri_fini(void)
{
mem_dimm_map_t *dm, *em;
+ mem_seg_map_t *sm, *tm;
for (dm = mem.mem_dm; dm != NULL; dm = em) {
em = dm->dm_next;
fmd_fmri_strfree(dm->dm_label);
+ fmd_fmri_strfree(dm->dm_part);
fmd_fmri_strfree(dm->dm_device);
fmd_fmri_free(dm, sizeof (mem_dimm_map_t));
}
+ for (sm = mem.mem_seg; sm != NULL; sm = tm) {
+ tm = sm->sm_next;
+ fmd_fmri_free(sm, sizeof (mem_seg_map_t));
+ }
#ifdef sparc
ldom_fini(mem_scheme_lhp);
#endif /* sparc */
--- a/usr/src/cmd/fm/schemes/mem/mem.h Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/schemes/mem/mem.h Mon Jul 30 12:41:05 2007 -0700
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -80,9 +80,8 @@
* generated. No attempt is made to determine whether or not the named
* item is still present in the system.
*
- * - expand: At the time of this writing, no platforms include bank or DIMM
- * serial numbers in their ereports. As such, the serial number(s) must
- * be added by the diagnosis engine. This entry point will read the
+ * - expand: For platforms which do not include bank or DIMM
+ * serial numbers in their ereports, this entry point will read the
* serial number(s) for the named item, and will add it/them to the passed
* FMRI. Errors will be returned if the FMRI (unum) was unparseable, or if
* the serial number could not be retrieved.
@@ -108,6 +107,15 @@
*/
#define MEM_SERID_MAXLEN 64
+typedef struct mem_seg_map {
+ struct mem_seg_map *sm_next; /* the next segment map */
+ uint64_t sm_base; /* base address for this segment */
+ uint64_t sm_size; /* size for this segment */
+ uint64_t sm_mask; /* mask denoting dimm selection bits */
+ uint64_t sm_match; /* value selecting this set of DIMMs */
+ uint16_t sm_shift; /* dimms-per-reference shift */
+} mem_seg_map_t;
+
typedef struct mem_dimm_map {
struct mem_dimm_map *dm_next; /* The next DIMM map */
char *dm_label; /* The UNUM for this DIMM */
@@ -115,18 +123,19 @@
char dm_serid[MEM_SERID_MAXLEN]; /* Cached serial number */
char *dm_part; /* DIMM part number */
uint64_t dm_drgen; /* DR gen count for cached S/N */
+ mem_seg_map_t *dm_seg; /* segment for this DIMM */
} mem_dimm_map_t;
typedef struct mem {
mem_dimm_map_t *mem_dm; /* List supported DIMMs */
uint64_t mem_memconfig; /* HV memory-configuration-id# */
- uint64_t mem_rank_mask; /* "rank" bit */
- int mem_ch_shift; /* # bits for "CH" */
- const char *mem_rank_str; /* string denoting "rank" */
+ mem_seg_map_t *mem_seg; /* list of defined segments */
} mem_t;
extern int mem_discover(void);
extern int mem_get_serid(const char *, char *, size_t);
+extern int mem_get_serids_by_unum(const char *, char ***, size_t *);
+extern void mem_expand_opt(nvlist_t *, char *, char **);
extern int mem_unum_burst(const char *, char ***, size_t *);
extern int mem_unum_contains(const char *, const char *);
--- a/usr/src/cmd/fm/schemes/mem/mem_unum.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/schemes/mem/mem_unum.c Mon Jul 30 12:41:05 2007 -0700
@@ -95,7 +95,9 @@
{ "/MBU_B/MEMB%*d/%n%nMEM%*d%*1c%n", " MEM%*d%*1c%n" },
{ "/CMU%*2d/%n%nMEM%*2d%*1c%n", " MEM%*2d%*1c%n" },
{ "MB/CMP%*d/BR%*d%n:%n%n", " CH%*d/D%*d/J%*4d%n", "/" },
- { "MB/CMP%*d/BR%*d%n%n%n", "/CH%*d/D%*d/J%*4d%n" },
+ { "%n%nMB/CMP%*d/BR%*d/CH%*d/D%*d/J%*4d%n",
+ "MB/CMP%*d/BR%*d/CH%*d/D%*d/J%*4d%n" },
+ { "%n%nMB/CMP%*d/BR%*d/CH%*d/D%*d%n", "MB/CMP%*d/BR%*d/CH%*d/D%*d%n" },
{ NULL }
};
@@ -179,7 +181,6 @@
(void) sscanf(pat, bd->bd_pat, &replace, &start, &matched);
if (matched == -1)
continue;
-
(void) strlcpy(dimmname, pat, sizeof (dimmname));
if (bd->bd_subst != NULL) {
(void) strlcpy(dimmname+replace, bd->bd_subst,
--- a/usr/src/cmd/fm/schemes/mem/sparc/mem_disc.c Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/cmd/fm/schemes/mem/sparc/mem_disc.c Mon Jul 30 12:41:05 2007 -0700
@@ -50,11 +50,15 @@
#include <mem.h>
#include <fm/fmd_fmri.h>
+#include <fcntl.h>
+#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <errno.h>
+#include <time.h>
+#include <sys/mem.h>
#include <sys/fm/ldom.h>
extern ldom_hdl_t *mem_scheme_lhp;
@@ -344,144 +348,106 @@
}
}
-int
-mem_discover_mdesc(md_t *mdp, size_t mdbufsz)
+uint16_t
+mem_log2(uint64_t v)
{
- mde_cookie_t *listp;
- int num_nodes, idx, mdesc_dimm_count, unique_ch;
- mem_dimm_map_t *dm;
- uint64_t sysmem_size, i, drgen = fmd_fmri_get_drgen();
- char curr_ch;
- int num_comps = 0;
- char *unum, *serial, *part, *dash;
-
- num_nodes = md_node_count(mdp);
- listp = fmd_fmri_alloc(sizeof (mde_cookie_t) * num_nodes);
-
- num_comps = md_scan_dag(mdp,
- MDE_INVAL_ELEM_COOKIE,
- md_find_name(mdp, "component"),
- md_find_name(mdp, "fwd"),
- listp);
- if (num_comps == 0) {
-
- /*
- * Find first 'memory' node -- there should only be one.
- * Extract 'memory-generation-id#' value from it.
- */
- mdesc_dimm_count = md_scan_dag(mdp,
- MDE_INVAL_ELEM_COOKIE, md_find_name(mdp, "memory"),
- md_find_name(mdp, "fwd"), listp);
-
- if (md_get_prop_val(mdp, listp[0], "memory-generation-id#",
- &mem.mem_memconfig))
- mem.mem_memconfig = 0;
-
- mdesc_dimm_count = md_scan_dag(mdp,
- MDE_INVAL_ELEM_COOKIE, md_find_name(mdp, "dimm_data"),
- md_find_name(mdp, "fwd"), listp);
-
- for (idx = 0; idx < mdesc_dimm_count; idx++) {
-
- if (md_get_prop_str(mdp, listp[idx], "nac", &unum) < 0)
- unum = "";
- if (md_get_prop_str(mdp, listp[idx], "serial#",
- &serial) < 0)
- serial = "";
- if (md_get_prop_str(mdp, listp[idx], "part#",
- &part) < 0)
- part = "";
-
- dm = fmd_fmri_zalloc(sizeof (mem_dimm_map_t));
- dm->dm_label = fmd_fmri_strdup(unum);
- (void) strncpy(dm->dm_serid, serial,
- MEM_SERID_MAXLEN - 1);
- dm->dm_part = fmd_fmri_strdup(part);
- dm->dm_drgen = drgen;
+ uint16_t i;
+ for (i = 0; v > 1; i++) {
+ v = v >> 1;
+ }
+ return (i);
+}
- dm->dm_next = mem.mem_dm;
- mem.mem_dm = dm;
- }
- } else {
- char *type, *sp, *jnum, *nac;
- size_t ss;
- for (idx = 0; idx < num_comps; idx++) {
- if (md_get_prop_str(mdp, listp[idx], "type", &type) < 0)
- continue;
- if (strcmp(type, "dimm") == 0) {
- if (md_get_prop_str(mdp, listp[idx], "nac",
- &nac) < 0)
- nac = "";
- if (md_get_prop_str(mdp, listp[idx], "label",
- &jnum) < 0)
- jnum = "";
- if (md_get_prop_str(mdp, listp[idx],
- "serial_number", &serial) < 0)
- serial = "";
- if (md_get_prop_str(mdp, listp[idx],
- "part_number", &part) < 0)
- part = "";
- if (md_get_prop_str(mdp, listp[idx],
- "dash_number", &dash) < 0)
- dash = "";
+static mem_dimm_map_t *
+get_dimm_by_sn(char *sn)
+{
+ mem_dimm_map_t *dp;
- ss = strlen(part) + strlen(dash) + 1;
- sp = fmd_fmri_alloc(ss);
- sp = strcpy(sp, part);
- sp = strncat(sp, dash, strlen(dash) + 1);
-
- dm = fmd_fmri_zalloc(sizeof (mem_dimm_map_t));
-
- if ((strcmp(nac, "") != 0) &&
- (strcmp(jnum, "") != 0)) {
- ss = strlen(nac) + strlen(jnum) + 2;
- unum = fmd_fmri_alloc(ss);
- (void) snprintf(unum, ss, "%s/%s", nac,
- jnum);
- dm->dm_label = unum;
- } else {
- unum = "";
- dm->dm_label = fmd_fmri_strdup(unum);
- }
-
- (void) strncpy(dm->dm_serid, serial,
- MEM_SERID_MAXLEN - 1);
- dm->dm_part = sp;
- dm->dm_drgen = drgen;
-
- dm->dm_next = mem.mem_dm;
- mem.mem_dm = dm;
- }
- }
+ for (dp = mem.mem_dm; dp != NULL; dp = dp->dm_next) {
+ if (strcmp(sn, dp->dm_serid) == 0)
+ return (dp);
}
- if (strstr(mem.mem_dm->dm_label, "BR") != NULL) { /* N2 */
- mem.mem_rank_str = "CH";
- } else { /* Niagara-1 */
- mem.mem_rank_str = "/R";
+ return (NULL);
+}
+
+#define MEM_BYTES_PER_CACHELINE 64
+
+static void
+mdesc_init_n1(md_t *mdp, mde_cookie_t *listp)
+{
+ int idx, mdesc_dimm_count;
+ mem_dimm_map_t *dm, *d;
+ uint64_t sysmem_size, i, drgen = fmd_fmri_get_drgen();
+ int dimms, min_chan, max_chan, min_rank, max_rank;
+ int chan, rank, dimm, chans, chan_step;
+ uint64_t mask, chan_mask, chan_value;
+ uint64_t rank_mask, rank_value;
+ char *unum, *serial, *part;
+ mem_seg_map_t *seg;
+ char s[20];
+
+ /*
+ * Find first 'memory' node -- there should only be one.
+ * Extract 'memory-generation-id#' value from it.
+ */
+ mdesc_dimm_count = md_scan_dag(mdp,
+ MDE_INVAL_ELEM_COOKIE, md_find_name(mdp, "memory"),
+ md_find_name(mdp, "fwd"), listp);
+
+ if (md_get_prop_val(mdp, listp[0], "memory-generation-id#",
+ &mem.mem_memconfig))
+ mem.mem_memconfig = 0;
+
+ mdesc_dimm_count = md_scan_dag(mdp,
+ MDE_INVAL_ELEM_COOKIE, md_find_name(mdp, "dimm_data"),
+ md_find_name(mdp, "fwd"), listp);
+
+ for (idx = 0; idx < mdesc_dimm_count; idx++) {
+
+ if (md_get_prop_str(mdp, listp[idx], "nac", &unum) < 0)
+ unum = "";
+ if (md_get_prop_str(mdp, listp[idx], "serial#",
+ &serial) < 0)
+ serial = "";
+ if (md_get_prop_str(mdp, listp[idx], "part#",
+ &part) < 0)
+ part = "";
+
+ dm = fmd_fmri_zalloc(sizeof (mem_dimm_map_t));
+ dm->dm_label = fmd_fmri_strdup(unum);
+ (void) strncpy(dm->dm_serid, serial,
+ MEM_SERID_MAXLEN - 1);
+ dm->dm_part = fmd_fmri_strdup(part);
+ dm->dm_drgen = drgen;
+
+ dm->dm_next = mem.mem_dm;
+ mem.mem_dm = dm;
+ }
+ /* N1 (MD) specific segment initialization */
+
+ dimms = 0;
+ min_chan = 99;
+ max_chan = -1;
+ min_rank = 99;
+ max_rank = -1;
+
+ for (d = mem.mem_dm; d != NULL; d = d->dm_next) {
+ if (sscanf(d->dm_label, "MB/CMP0/CH%d/R%d/D%d",
+ &chan, &rank, &dimm) != 3) /* didn't scan all 3 values */
+ return;
+ min_chan = MIN(min_chan, chan);
+ max_chan = MAX(max_chan, chan);
+ min_rank = MIN(min_rank, rank);
+ max_rank = MAX(max_rank, rank);
+ dimms++;
}
- curr_ch = '\0';
- unique_ch = 0;
- for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) {
- char my_ch;
- if (mem.mem_rank_str == "CH")
- my_ch = *(strstr(dm->dm_label, "BR") + 2);
- else my_ch = *(strstr(dm->dm_label, "CH") + 2);
- if (curr_ch != my_ch) {
- unique_ch++;
- curr_ch = my_ch;
- }
- }
-
- if (unique_ch == 1) mem.mem_ch_shift = 0;
- else if (unique_ch == 2) mem.mem_ch_shift = 1;
- else mem.mem_ch_shift = 2;
-
mdesc_dimm_count = md_scan_dag(mdp,
- MDE_INVAL_ELEM_COOKIE, md_find_name(mdp, "mblock"),
- md_find_name(mdp, "fwd"), listp);
-
+ MDE_INVAL_ELEM_COOKIE,
+ md_find_name(mdp, "mblock"),
+ md_find_name(mdp, "fwd"),
+ listp);
sysmem_size = 0;
for (idx = 0; idx < mdesc_dimm_count; idx++) {
uint64_t size = 0;
@@ -489,8 +455,180 @@
sysmem_size += size;
}
- for (i = 1 << 30; i < sysmem_size; i <<= 1); /* round up to 2^i */
- mem.mem_rank_mask = i >> 1; /* PA high order bit */
+ for (i = 1 << 30; i < sysmem_size; i = i << 1)
+ ;
+ if (max_rank > min_rank) {
+ chans = dimms/4;
+ rank_mask = i >> 1;
+ } else {
+ chans = dimms/2;
+ rank_mask = 0;
+ }
+
+ chan_mask = (uint64_t)((chans - 1) * MEM_BYTES_PER_CACHELINE);
+ mask = rank_mask | chan_mask;
+
+ if (chans > 2)
+ chan_step = 1;
+ else
+ chan_step = max_chan - min_chan;
+
+ for (rank = min_rank, rank_value = 0;
+ rank <= max_rank;
+ rank++, rank_value += rank_mask) {
+ for (chan = min_chan, chan_value = 0;
+ chan <= max_chan;
+ chan += chan_step,
+ chan_value += MEM_BYTES_PER_CACHELINE) {
+ seg = fmd_fmri_zalloc(sizeof (mem_seg_map_t));
+ seg->sm_next = mem.mem_seg;
+ mem.mem_seg = seg;
+ seg->sm_base = 0;
+ seg->sm_size = sysmem_size;
+ seg->sm_mask = mask;
+ seg->sm_match = chan_value | rank_value;
+ seg->sm_shift = 1;
+ (void) sprintf(s, "MB/CMP0/CH%1d/R%1d", chan, rank);
+ for (d = mem.mem_dm; d != NULL; d = d->dm_next) {
+ if (strncmp(s, d->dm_label, strlen(s)) == 0)
+ d->dm_seg = seg;
+ }
+ }
+ }
+}
+
+static void
+mdesc_init_n2(md_t *mdp, mde_cookie_t *listp, int num_comps)
+{
+ mde_cookie_t *dl, t;
+ int idx, mdesc_dimm_count, mdesc_bank_count;
+ mem_dimm_map_t *dm, *dp;
+ uint64_t i, drgen = fmd_fmri_get_drgen();
+ int n;
+ uint64_t mask, match, base, size;
+ char *unum, *serial, *part, *dash;
+ mem_seg_map_t *smp;
+ char *type, *sp, *jnum, *nac;
+ size_t ss;
+
+ mdesc_dimm_count = 0;
+ for (idx = 0; idx < num_comps; idx++) {
+ if (md_get_prop_str(mdp, listp[idx], "type", &type) < 0)
+ continue;
+ if (strcmp(type, "dimm") == 0) {
+ mdesc_dimm_count++;
+ if (md_get_prop_str(mdp, listp[idx], "nac",
+ &nac) < 0)
+ nac = "";
+ if (md_get_prop_str(mdp, listp[idx], "label",
+ &jnum) < 0)
+ jnum = "";
+ if (md_get_prop_str(mdp, listp[idx],
+ "serial_number", &serial) < 0)
+ serial = "";
+ if (md_get_prop_str(mdp, listp[idx],
+ "part_number", &part) < 0)
+ part = "";
+ if (md_get_prop_str(mdp, listp[idx],
+ "dash_number", &dash) < 0)
+ dash = "";
+
+ ss = strlen(part) + strlen(dash) + 1;
+ sp = fmd_fmri_alloc(ss);
+ sp = strcpy(sp, part);
+ sp = strncat(sp, dash, strlen(dash) + 1);
+
+ dm = fmd_fmri_zalloc(sizeof (mem_dimm_map_t));
+
+ if ((strcmp(nac, "") != 0) &&
+ (strcmp(jnum, "") != 0)) {
+ ss = strlen(nac) + strlen(jnum) + 2;
+ unum = fmd_fmri_alloc(ss);
+ (void) snprintf(unum, ss, "%s/%s", nac,
+ jnum);
+ dm->dm_label = unum;
+ } else {
+ unum = "";
+ dm->dm_label = fmd_fmri_strdup(unum);
+ }
+
+ (void) strncpy(dm->dm_serid, serial,
+ MEM_SERID_MAXLEN - 1);
+ dm->dm_part = sp;
+ dm->dm_drgen = drgen;
+
+ dm->dm_next = mem.mem_dm;
+ mem.mem_dm = dm;
+ }
+ }
+
+ /* N2 (PRI) specific segment initialization occurs here */
+
+ mdesc_bank_count = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
+ md_find_name(mdp, "memory-bank"),
+ md_find_name(mdp, "fwd"),
+ listp);
+
+ dl = fmd_fmri_zalloc(mdesc_dimm_count * sizeof (mde_cookie_t));
+
+ for (idx = 0; idx < mdesc_bank_count; idx++) {
+ if (md_get_prop_val(mdp, listp[idx], "mask", &mask) < 0)
+ mask = 0;
+ if (md_get_prop_val(mdp, listp[idx], "match", &match) < 0)
+ match = 0;
+ n = md_scan_dag(mdp, listp[idx],
+ md_find_name(mdp, "memory-segment"),
+ md_find_name(mdp, "back"),
+ &t); /* only 1 "back" arc, so n must equal 1 here */
+ if (md_get_prop_val(mdp, t, "base", &base) < 0)
+ base = 0;
+ if (md_get_prop_val(mdp, t, "size", &size) < 0)
+ size = 0;
+ smp = fmd_fmri_zalloc(sizeof (mem_seg_map_t));
+ smp->sm_next = mem.mem_seg;
+ mem.mem_seg = smp;
+ smp->sm_base = base;
+ smp->sm_size = size;
+ smp->sm_mask = mask;
+ smp->sm_match = match;
+
+ n = md_scan_dag(mdp, listp[idx],
+ md_find_name(mdp, "component"),
+ md_find_name(mdp, "fwd"),
+ dl);
+ smp->sm_shift = mem_log2(n);
+
+ for (i = 0; i < n; i++) {
+ if (md_get_prop_str(mdp, dl[i],
+ "serial_number", &serial) < 0)
+ continue;
+ if ((dp = get_dimm_by_sn(serial)) == NULL)
+ continue;
+ dp->dm_seg = smp;
+ }
+ }
+ fmd_fmri_free(dl, mdesc_dimm_count * sizeof (mde_cookie_t));
+}
+
+int
+mem_discover_mdesc(md_t *mdp, size_t mdbufsz)
+{
+ mde_cookie_t *listp;
+ int num_nodes;
+ int num_comps = 0;
+
+ num_nodes = md_node_count(mdp);
+ listp = fmd_fmri_alloc(sizeof (mde_cookie_t) * num_nodes);
+
+ num_comps = md_scan_dag(mdp,
+ MDE_INVAL_ELEM_COOKIE,
+ md_find_name(mdp, "component"),
+ md_find_name(mdp, "fwd"),
+ listp);
+ if (num_comps == 0)
+ mdesc_init_n1(mdp, listp);
+ else
+ mdesc_init_n2(mdp, listp, num_comps);
fmd_fmri_free(listp, sizeof (mde_cookie_t) * num_nodes);
fmd_fmri_free(*mdp, mdbufsz);
@@ -593,6 +731,7 @@
for (dm = mem.mem_dm; dm != NULL; dm = next) {
next = dm->dm_next;
fmd_fmri_strfree(dm->dm_label);
+ fmd_fmri_strfree(dm->dm_part);
fmd_fmri_free(dm, sizeof (mem_dimm_map_t));
}
mem.mem_dm = NULL;
@@ -600,3 +739,461 @@
return (mem_discover_mdesc(mdp, mdbufsz));
}
}
+
+/*
+ * Retry values for handling the case where the kernel is not yet ready
+ * to provide DIMM serial ids. Some platforms acquire DIMM serial id
+ * information from their System Controller via a mailbox interface.
+ * The values chosen are for 10 retries 3 seconds apart to approximate the
+ * possible 30 second timeout length of a mailbox message request.
+ */
+#define MAX_MEM_SID_RETRIES 10
+#define MEM_SID_RETRY_WAIT 3
+
+/*
+ * The comparison is asymmetric. It compares up to the length of the
+ * argument unum.
+ */
+static mem_dimm_map_t *
+dm_lookup(const char *name)
+{
+ mem_dimm_map_t *dm;
+
+ for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) {
+ if (strncmp(name, dm->dm_label, strlen(name)) == 0)
+ return (dm);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If
+ * the unum (or a component of same) wasn't found, -1 is returned with errno
+ * set to ENOENT. If the kernel doesn't have support for serial numbers,
+ * -1 is returned with errno set to ENOTSUP.
+ */
+static int
+mem_get_serids_from_kernel(const char *unum, char ***seridsp, size_t *nseridsp)
+{
+ char **dimms, **serids;
+ size_t ndimms, nserids;
+ int i, rc = 0;
+ int fd;
+ int retries = MAX_MEM_SID_RETRIES;
+ mem_name_t mn;
+ struct timespec rqt;
+
+ if ((fd = open("/dev/mem", O_RDONLY)) < 0)
+ return (-1);
+
+ if (mem_unum_burst(unum, &dimms, &ndimms) < 0) {
+ (void) close(fd);
+ return (-1); /* errno is set for us */
+ }
+
+ serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
+ nserids = ndimms;
+
+ bzero(&mn, sizeof (mn));
+
+ for (i = 0; i < ndimms; i++) {
+ mn.m_namelen = strlen(dimms[i]) + 1;
+ mn.m_sidlen = MEM_SERID_MAXLEN;
+
+ mn.m_name = fmd_fmri_alloc(mn.m_namelen);
+ mn.m_sid = fmd_fmri_alloc(mn.m_sidlen);
+
+ (void) strcpy(mn.m_name, dimms[i]);
+
+ do {
+ rc = ioctl(fd, MEM_SID, &mn);
+
+ if (rc >= 0 || errno != EAGAIN)
+ break;
+
+ if (retries == 0) {
+ errno = ETIMEDOUT;
+ break;
+ }
+
+ /*
+ * EAGAIN indicates the kernel is
+ * not ready to provide DIMM serial
+ * ids. Sleep MEM_SID_RETRY_WAIT seconds
+ * and try again.
+ * nanosleep() is used instead of sleep()
+ * to avoid interfering with fmd timers.
+ */
+ rqt.tv_sec = MEM_SID_RETRY_WAIT;
+ rqt.tv_nsec = 0;
+ (void) nanosleep(&rqt, NULL);
+
+ } while (retries--);
+
+ if (rc < 0) {
+ /*
+ * ENXIO can happen if the kernel memory driver
+ * doesn't have the MEM_SID ioctl (e.g. if the
+ * kernel hasn't been patched to provide the
+ * support).
+ *
+ * If the MEM_SID ioctl is available but the
+ * particular platform doesn't support providing
+ * serial ids, ENOTSUP will be returned by the ioctl.
+ */
+ if (errno == ENXIO)
+ errno = ENOTSUP;
+ fmd_fmri_free(mn.m_name, mn.m_namelen);
+ fmd_fmri_free(mn.m_sid, mn.m_sidlen);
+ mem_strarray_free(serids, nserids);
+ mem_strarray_free(dimms, ndimms);
+ (void) close(fd);
+ return (-1);
+ }
+
+ serids[i] = fmd_fmri_strdup(mn.m_sid);
+
+ fmd_fmri_free(mn.m_name, mn.m_namelen);
+ fmd_fmri_free(mn.m_sid, mn.m_sidlen);
+ }
+
+ mem_strarray_free(dimms, ndimms);
+
+ (void) close(fd);
+
+ *seridsp = serids;
+ *nseridsp = nserids;
+
+ return (0);
+}
+
+/*
+ * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If
+ * the unum (or a component of same) wasn't found, -1 is returned with errno
+ * set to ENOENT.
+ */
+static int
+mem_get_serids_from_cache(const char *unum, char ***seridsp, size_t *nseridsp)
+{
+ uint64_t drgen = fmd_fmri_get_drgen();
+ char **dimms, **serids;
+ size_t ndimms, nserids;
+ mem_dimm_map_t *dm;
+ int i, rc = 0;
+
+ if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
+ return (-1); /* errno is set for us */
+
+ serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
+ nserids = ndimms;
+
+ for (i = 0; i < ndimms; i++) {
+ if ((dm = dm_lookup(dimms[i])) == NULL) {
+ rc = fmd_fmri_set_errno(EINVAL);
+ break;
+ }
+
+ if (*dm->dm_serid == '\0' || dm->dm_drgen != drgen) {
+ /*
+ * We don't have a cached copy, or the copy we've got is
+ * out of date. Look it up again.
+ */
+ if (mem_get_serid(dm->dm_device, dm->dm_serid,
+ sizeof (dm->dm_serid)) < 0) {
+ rc = -1; /* errno is set for us */
+ break;
+ }
+
+ dm->dm_drgen = drgen;
+ }
+
+ serids[i] = fmd_fmri_strdup(dm->dm_serid);
+ }
+
+ mem_strarray_free(dimms, ndimms);
+
+ if (rc == 0) {
+ *seridsp = serids;
+ *nseridsp = nserids;
+ } else {
+ mem_strarray_free(serids, nserids);
+ }
+
+ return (rc);
+}
+
+/*
+ * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If
+ * the unum (or a component of same) wasn't found, -1 is returned with errno
+ * set to ENOENT.
+ */
+static int
+mem_get_serids_from_mdesc(const char *unum, char ***seridsp, size_t *nseridsp)
+{
+ uint64_t drgen = fmd_fmri_get_drgen();
+ char **dimms, **serids;
+ size_t ndimms, nserids;
+ mem_dimm_map_t *dm;
+ int i, rc = 0;
+
+ if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
+ return (-1); /* errno is set for us */
+
+ serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
+ nserids = ndimms;
+
+ /*
+ * first go through dimms and see if dm_drgen entries are outdated
+ */
+ for (i = 0; i < ndimms; i++) {
+ if ((dm = dm_lookup(dimms[i])) == NULL ||
+ dm->dm_drgen != drgen)
+ break;
+ }
+
+ if (i < ndimms && mem_update_mdesc() != 0) {
+ mem_strarray_free(dimms, ndimms);
+ return (-1);
+ }
+
+ /*
+ * get to this point if an up-to-date mdesc (and corresponding
+ * entries in the global mem list) exists
+ */
+ for (i = 0; i < ndimms; i++) {
+ if ((dm = dm_lookup(dimms[i])) == NULL) {
+ rc = fmd_fmri_set_errno(EINVAL);
+ break;
+ }
+
+ if (dm->dm_drgen != drgen)
+ dm->dm_drgen = drgen;
+
+ /*
+ * mdesc and dm entry was updated by an earlier call to
+ * mem_update_mdesc, so we go ahead and dup the serid
+ */
+ serids[i] = fmd_fmri_strdup(dm->dm_serid);
+ }
+
+ mem_strarray_free(dimms, ndimms);
+
+ if (rc == 0) {
+ *seridsp = serids;
+ *nseridsp = nserids;
+ } else {
+ mem_strarray_free(serids, nserids);
+ }
+
+ return (rc);
+}
+
+/*
+ * Returns 0 with part numbers if found, returns -1 for errors.
+ */
+static int
+mem_get_parts_from_mdesc(const char *unum, char ***partsp, uint_t *npartsp)
+{
+ uint64_t drgen = fmd_fmri_get_drgen();
+ char **dimms, **parts;
+ size_t ndimms, nparts;
+ mem_dimm_map_t *dm;
+ int i, rc = 0;
+
+ if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
+ return (-1); /* errno is set for us */
+
+ parts = fmd_fmri_zalloc(sizeof (char *) * ndimms);
+ nparts = ndimms;
+
+ /*
+ * first go through dimms and see if dm_drgen entries are outdated
+ */
+ for (i = 0; i < ndimms; i++) {
+ if ((dm = dm_lookup(dimms[i])) == NULL ||
+ dm->dm_drgen != drgen)
+ break;
+ }
+
+ if (i < ndimms && mem_update_mdesc() != 0) {
+ mem_strarray_free(dimms, ndimms);
+ mem_strarray_free(parts, nparts);
+ return (-1);
+ }
+
+ /*
+ * get to this point if an up-to-date mdesc (and corresponding
+ * entries in the global mem list) exists
+ */
+ for (i = 0; i < ndimms; i++) {
+ if ((dm = dm_lookup(dimms[i])) == NULL) {
+ rc = fmd_fmri_set_errno(EINVAL);
+ break;
+ }
+
+ if (dm->dm_drgen != drgen)
+ dm->dm_drgen = drgen;
+
+ /*
+ * mdesc and dm entry was updated by an earlier call to
+ * mem_update_mdesc, so we go ahead and dup the part
+ */
+ if (dm->dm_part == NULL) {
+ rc = -1;
+ break;
+ }
+ parts[i] = fmd_fmri_strdup(dm->dm_part);
+ }
+
+ mem_strarray_free(dimms, ndimms);
+
+ if (rc == 0) {
+ *partsp = parts;
+ *npartsp = nparts;
+ } else {
+ mem_strarray_free(parts, nparts);
+ }
+
+ return (rc);
+}
+
+static int
+mem_get_parts_by_unum(const char *unum, char ***partp, uint_t *npartp)
+{
+ if (mem.mem_dm == NULL)
+ return (-1);
+ else
+ return (mem_get_parts_from_mdesc(unum, partp, npartp));
+}
+
+static int
+get_seg_by_sn(char *sn, mem_seg_map_t **segmap)
+{
+ mem_dimm_map_t *dm;
+
+ for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) {
+ if (strcmp(sn, dm->dm_serid) == 0) {
+ *segmap = dm->dm_seg;
+ return (0);
+ }
+ }
+ return (-1);
+}
+
+/*
+ * Niagara-1, Niagara-2, and Victoria Falls all have physical address
+ * spaces of 40 bits.
+ */
+
+#define MEM_PHYS_ADDRESS_LIMIT 0x10000000000ULL
+
+/*
+ * The 'mask' argument to extract_bits has 1's in those bit positions of
+ * the physical address used to select the DIMM (or set of DIMMs) which will
+ * store the contents of the physical address. If we extract those bits, ie.
+ * remove them and collapse the holes, the result is the 'address' within the
+ * DIMM or set of DIMMs where the contents are stored.
+ */
+
+static uint64_t
+extract_bits(uint64_t paddr, uint64_t mask)
+{
+ uint64_t from, to;
+ uint64_t result = 0;
+
+ to = 1;
+ for (from = 1; from <= MEM_PHYS_ADDRESS_LIMIT; from <<= 1) {
+ if ((from & mask) == 0) {
+ if ((from & paddr) != 0)
+ result |= to;
+ to <<= 1;
+ }
+ }
+ return (result);
+}
+
+/*
+ * insert_bits is the reverse operation to extract_bits. Where extract_bits
+ * removes from the physical address those bits which select a DIMM or set
+ * of DIMMs, insert_bits reconstitutes a physical address given the DIMM
+ * selection 'mask' and the 'value' for the address bits denoted by 1s in
+ * the 'mask'.
+ */
+static uint64_t
+insert_bits(uint64_t offset, uint64_t mask, uint64_t value)
+{
+ uint64_t result = 0;
+ uint64_t from, to;
+
+ from = 1;
+ for (to = 1; to <= MEM_PHYS_ADDRESS_LIMIT; to <<= 1) {
+ if ((to & mask) == 0) {
+ if ((offset & from) != 0)
+ result |= to;
+ from <<= 1;
+ } else {
+ result |= to & value;
+ }
+ }
+ return (result);
+}
+
+int
+mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp)
+{
+ /*
+ * Some platforms do not support the caching of serial ids by the
+ * mem scheme plugin but instead support making serial ids available
+ * via the kernel.
+ */
+ if (mem.mem_dm == NULL)
+ return (mem_get_serids_from_kernel(unum, seridsp, nseridsp));
+ else if (mem_get_serids_from_mdesc(unum, seridsp, nseridsp) == 0)
+ return (0);
+ else
+ return (mem_get_serids_from_cache(unum, seridsp, nseridsp));
+}
+
+void
+mem_expand_opt(nvlist_t *nvl, char *unum, char **serids)
+{
+ mem_seg_map_t *seg;
+ uint64_t offset, physaddr;
+ char **parts;
+ uint_t nparts;
+
+ /*
+ * The following additional expansions are all optional.
+ * Failure to retrieve a data value, or failure to add it
+ * successfully to the FMRI, does NOT cause a failure of
+ * fmd_fmri_expand. All optional expansions will be attempted
+ * once expand_opt is entered.
+ */
+
+ if ((mem.mem_seg != NULL) &&
+ (get_seg_by_sn(*serids, &seg) == 0)) {
+
+ if (nvlist_lookup_uint64(nvl,
+ FM_FMRI_MEM_OFFSET, &offset) == 0) {
+ physaddr = insert_bits((offset<<seg->sm_shift),
+ seg->sm_mask, seg->sm_match);
+ (void) nvlist_add_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
+ physaddr); /* displaces any previous physaddr */
+ } else if (nvlist_lookup_uint64(nvl,
+ FM_FMRI_MEM_PHYSADDR, &physaddr) == 0) {
+ offset = extract_bits(physaddr,
+ seg->sm_mask) >> seg->sm_shift;
+ (void) (nvlist_add_uint64(nvl, FM_FMRI_MEM_OFFSET,
+ offset));
+ }
+ }
+
+ if ((nvlist_lookup_string_array(nvl, FM_FMRI_HC_PART,
+ &parts, &nparts) < 0) &&
+ (mem_get_parts_by_unum(unum, &parts, &nparts) == 0)) {
+ (void) nvlist_add_string_array(nvl,
+ FM_FMRI_HC_PART, parts, nparts);
+ mem_strarray_free(parts, nparts);
+ }
+}
--- a/usr/src/pkgdefs/SUNWonmtst.v/prototype_sparc Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/pkgdefs/SUNWonmtst.v/prototype_sparc Mon Jul 30 12:41:05 2007 -0700
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -61,4 +61,5 @@
f none usr/platform/sun4v/include/sys/memtestio_v.h 644 root bin
f none usr/platform/sun4v/include/sys/memtestio_ni.h 644 root bin
f none usr/platform/sun4v/include/sys/memtestio_n2.h 644 root bin
+f none usr/platform/sun4v/include/sys/memtestio_vf.h 644 root bin
f none usr/bin/mtst 555 root bin
--- a/usr/src/uts/sparc/sys/fm/cpu/UltraSPARC-T1.h Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/uts/sparc/sys/fm/cpu/UltraSPARC-T1.h Mon Jul 30 12:41:05 2007 -0700
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -42,6 +42,7 @@
#define FM_EREPORT_PAYLOAD_NAME_L2_SYND "l2-synd"
#define FM_EREPORT_PAYLOAD_NAME_L2_ESR "l2-esr"
#define FM_EREPORT_PAYLOAD_NAME_L2_EAR "l2-ear"
+#define FM_EREPORT_PAYLOAD_NAME_L2_ESYR "l2-esyr"
#define FM_EREPORT_PAYLOAD_NAME_DRAM_AFSR "dram-afsr"
#define FM_EREPORT_PAYLOAD_NAME_DRAM_AFAR "dram-afar"
@@ -50,6 +51,8 @@
#define FM_EREPORT_PAYLOAD_NAME_DRAM_ESR "dram-esr"
#define FM_EREPORT_PAYLOAD_NAME_DRAM_EAR "dram-ear"
+#define FM_EREPORT_PAYLOAD_NAME_TS3_FCR "ts3-fcr"
+
#define FM_EREPORT_CPU_UST1_DAU "dau"
#define FM_EREPORT_CPU_UST1_DAC "dac"
#define FM_EREPORT_CPU_UST1_DSU "dsu"
--- a/usr/src/uts/sun4v/Makefile.files Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/uts/sun4v/Makefile.files Mon Jul 30 12:41:05 2007 -0700
@@ -131,7 +131,8 @@
MEMTEST_OBJS += memtest.o memtest_asm.o \
memtest_v.o memtest_v_asm.o \
memtest_ni.o memtest_ni_asm.o \
- memtest_n2.o memtest_n2_asm.o
+ memtest_n2.o memtest_n2_asm.o \
+ memtest_vf.o
#
# sun4v virtual devices
--- a/usr/src/uts/sun4v/sys/Makefile Mon Jul 30 11:30:55 2007 -0700
+++ b/usr/src/uts/sun4v/sys/Makefile Mon Jul 30 12:41:05 2007 -0700
@@ -96,7 +96,8 @@
CLOSED_HDRS= \
memtestio_ni.h \
memtestio_n2.h \
- memtestio_v.h
+ memtestio_v.h \
+ memtestio_vf.h
ROOTHDRS= $(HDRS:%=$(USR_PSM_ISYS_DIR)/%)
$(CLOSED_BUILD)ROOTHDRS += $(CLOSED_HDRS:%=$(USR_PSM_ISYS_DIR)/%)