/* * Linux Machine check exception handler parser. * (c) 2002 Dave Jones * http://www.codemonkey.org.uk/parsemce/ * * TODO: * - Isolate Intel/AMD differences * - Work around various errata * Intel P3 E24: MC2_STATUS MSR has error codes reversed * Athlon: MSR 408H returns crap in upper 32 bits * * 0.0.1 - 0.0.4 Ancient history, initial program. * 0.0.5 Command line parsing courtesy of Randy Dunlap. * 0.0.6 parse_input and 'multishot' courtesy of Ingo Oeser. * 0.0.7 Fix incorrect parsing of certain flags. * 0.0.8 Minor fixes. * */ /* * Sample kernel output.. * Sep 4 21:43:41 hamlet kernel: CPU 0: Machine Check Exception: 0000000000000004 * Sep 4 21:43:41 hamlet kernel: Bank 1: f600200000000152 at 7600200000000152 * Sep 4 21:43:41 hamlet kernel: Bank 2: d40040000000017a at 540040000000017a * Sep 4 21:43:41 hamlet kernel: Kernel panic: CPU context corrupt */ #include #include #include #include #include #include #include typedef u_int64_t u64; union { u_int64_t full; struct { /* Note - little endian */ unsigned int low; unsigned int high; } split; } msr64; #define PROGNAME "parsemce" #define VERSION "0.0.7" static char optstring[] = "f:e:b:s:a:Vi"; static void usage(void) { fprintf (stderr, "usage: %s [options]\n", PROGNAME); fprintf (stderr, " options: -V \n"); fprintf (stderr, " -e \n"); fprintf (stderr, " -b \n"); fprintf (stderr, " -s \n"); fprintf (stderr, " -a \n"); fprintf (stderr, " -f \n"); fprintf (stderr, " -i \n"); exit (1); } static void decode_ll(int code) { printf ("\tMemory/IO : "); switch (code) { case 0: printf ("Memory access"); break; case 1: printf ("Reserved"); break; case 2: printf ("I/O"); break; case 3: printf ("Other"); break; default: printf ("Invalid"); break; } printf ("\n"); } static void decode_transaction(int code) { printf ("\tTransaction type : "); switch (code) { case 0: printf ("Instruction"); break; case 1: printf ("Data"); break; case 2: printf ("Generic"); break; default: printf ("Invalid"); break; } printf ("\n"); } static void decode_request(int code) { printf ("\tRequest: "); switch (code) { case 0: printf ("Generic error"); break; case 1: printf ("Generic read"); break; case 2: printf ("Generic write"); break; case 3: printf ("Data read"); break; case 4: printf ("Data write"); break; case 5: printf ("Instruction fetch"); break; case 6: printf ("Prefetch"); break; case 7: printf ("Eviction"); break; case 8: printf ("Snoop"); break; default: printf ("Invalid"); break; } printf ("\n"); } static void parse_bank(int bank, u64 value, u64 addr) { int low, high; int errorcode; int ms_errorcode; errorcode = value & 0xffff; ms_errorcode = (value>>17) & 0x7fff; /* bits 17:31*/ low = value & 0xffff; high = value >> 32; /* Could be simple MCA code */ switch (errorcode) { case 0: printf ("No error reported\n"); return; case 1: printf ("Not a known MCA error class\n"); return; case 2: printf ("Internal microcode ROM has parity error\n"); return; case 3: printf ("External error caused by another CPU\n"); return; case 4: printf ("Functional redundancy check failed\n"); return; } if ((errorcode & 0xfc00) == 0x0400) { printf ("Internal error is not a known MCA error class\n"); return; } printf ("parsebank(%d): %llx @ %llx\n", bank, value, addr); if ((high & (1<<31)) == 0) { printf ("Bank contents invalid, not parsing\n"); return; } if ((low & (1<<16)) == 1) printf ("\tIn"); else printf ("\tEx"); printf ("ternal tag parity error\n"); if ((high & (1<<13))) printf ("\tUncorrectable ECC error\n"); if ((high & (1<<14))) printf ("\tCorrectable ECC error\n"); if ((high & (1<<25))) printf ("\tCPU state corrupt. Restart not possible\n"); if ((high & (1<<26))) printf ("\tAddress in addr register valid\n"); if ((high & (1<<27))) printf ("\tMISC register information valid\n"); // FIXME: Parse MISC register. if ((high & (1<<28))) printf ("\tError enabled in control register\n"); if ((high & (1<<29))) printf ("\tError not corrected.\n"); if ((high & (1<<30))) printf ("\tError overflow\n"); /* Decode error code. */ if ((errorcode & (0xff00)) == 0) { printf ("\tTLB Error\n"); decode_transaction ((errorcode>>2 & (1<<0 | 1<<1))); decode_ll (errorcode & (1<<0 | 1<<1)); } if ((errorcode & (0xff00)) == 0x0100) { printf ("\tMemory heirarchy error\n"); decode_request ((errorcode & 0xf)>>4); decode_transaction ((errorcode>>2 & (1<<0 | 1<<1))); decode_ll (errorcode & (1<<0 | 1<<1)); return; } if ((errorcode & (0xf800)) == 0x0800) { printf ("\tBus and interconnect error\n"); printf ("\tParticipation: "); switch ((errorcode & 0x0600) >>9) { case 0: printf ("Local processor originated request"); break; case 1: printf ("Local processor responded to request"); break; case 2: printf ("Local processor observed error as third party"); break; case 3: printf ("Generic"); break; default: printf ("Invalid"); break; } printf ("\n"); printf ("\tTimeout: "); switch (errorcode & 0x0100) { case 0: printf ("Request did not timeout"); break; case 1: printf ("Request timed out"); break; } printf ("\n"); decode_request ((errorcode & 0xf)>>4); decode_transaction ((errorcode>>2 & (1<<0 | 1<<1))); decode_ll (errorcode & (1<<0 | 1<<1)); return; } } void parse_status(u64 status) { printf ("Status: (%llx) ", status); if (status & 1<<2) printf ("Machine Check in progress.\n"); if (status & 1<<1) printf ("Error IP valid\n"); if (status & 1<<0) printf ("Restart IP valid.\n"); else printf ("Restart IP invalid.\n"); } /* Input file parser */ struct state_string_struct { unsigned char len; char *string; }; typedef enum { PS_NONE, PS_CPU_STR, PS_CPU_NR, PS_MCE_STR, PS_MCE_VAL, PS_BANK_STR, PS_BANK_NR, PS_BANK_VAL, PS_BANK_AT, PS_BANK_ADDR, PS_STATE_LAST, /* Last entry */ } parse_state_t; const struct state_string_struct states[PS_STATE_LAST] = { [PS_NONE] = { .string = 0, .len = 0 }, [PS_CPU_STR] = { .string = "CPU", .len = 3 }, [PS_CPU_NR] = { .string = 0, .len = 1 }, [PS_MCE_STR] = { .string = "Machine Check Exception",.len = 23 }, [PS_MCE_VAL] = { .string = 0, .len = 16 }, [PS_BANK_STR] = { .string = "Bank", .len = 4 }, [PS_BANK_NR] = { .string = 0, .len = 1 }, [PS_BANK_VAL] = { .string = 0, .len = 16 }, [PS_BANK_AT] = { .string = "at", .len = 2 }, [PS_BANK_ADDR] = { .string = 0, .len = 16 }, }; /* Must be 2 chars larger than the largest ".len" above */ #define STRING_SZ 25 u64 scan_val64(const char *s, const char *what) { u64 val=0; if (sscanf(s,"%Lx",&val) != 1) { fprintf(stderr,"Error converting <%s>. Aborting\n",what); usage(); exit(1); } return val; } void parse_input(FILE* f) { char s[STRING_SZ]; int first_mce=1; int do_recovery=0,state_changed=0; int c; char *e=&s[0]; char *b; unsigned int collected = 0, todo; u64 nr=~0,val=~0,addr; /* keep GCC happy */ parse_state_t expect = PS_NONE + 1, prev = PS_NONE; b=states[expect].string; todo=states[expect].len; s[0]=0; while ((c=fgetc(f)) != EOF) { /* Mask out non-ascii or invalid characters */ c&=0x7f; if (!collected && !(isalnum(c))) continue; /* if we had a CPU already, expect could be bank or CPU */ if (!first_mce && !collected && b) { if (expect == PS_BANK_STR && c == 'C') { expect=PS_CPU_STR; state_changed=do_recovery=1; } else if (expect == PS_CPU_STR && c == 'B') { expect=PS_BANK_STR; state_changed=do_recovery=1; } } /* Collecting, but not all collected yet */ if (collected < todo) { /* Are we collecting the right string? */ if (b && (b[collected] == c)) { collected++; continue; /* Are we collecting number elements? */ } else if ((todo == 16 && isxdigit(c)) || isdigit(c)) { /* collect it */ *e++=c; *e=0; collected++; continue; /* String not equal -> restart scanning */ } else if (b && (expect == PS_BANK_STR || expect == PS_CPU_STR)) { if (collected) do_recovery=1; } else { /* Completely invalid input */ if (prev == PS_NONE) break; /* Recovery from unwanted input */ expect=prev; state_changed=do_recovery=1; } /* We have enough characters collected */ } else if (collected == todo) { /* Found string -> look for value */ if (b) { prev=expect++; /* This is special */ if (prev == PS_BANK_AT) prev=PS_BANK_STR; state_changed=do_recovery=1; /* found value string -> convert to number */ } else { prev=expect-1; switch (expect) { case PS_CPU_NR: nr=s[0]-'0'; break; case PS_MCE_VAL: val=scan_val64(&s[0],"MCE status"); first_mce=0; printf("CPU %Lu\n",nr); parse_status(val); break; case PS_BANK_NR: nr=s[0]-'0'; break; case PS_BANK_VAL: val=scan_val64(&s[0],"Bank status"); prev=PS_BANK_STR; break; case PS_BANK_ADDR: addr=scan_val64(&s[0],"Bank address"); parse_bank(nr,val,addr); prev=PS_BANK_STR; break; default: return; } expect++; state_changed=do_recovery=1; } } else { /* share code */ if (expect == PS_BANK_STR || expect == PS_CPU_STR) { do_recovery=1; } else if (prev != PS_NONE) { /* Recovery */ expect=prev; state_changed=do_recovery=1; fprintf(stderr, "Too much input: expected %u bytes ",todo); if (b) fprintf(stderr,"%s\n",b); else if (todo <= 16) fprintf(stderr,"number\n"); else fprintf(stderr,"64b hex value\n"); } else { /* Cannot recover */ break; } } /* These are here, to share code */ if (do_recovery) { collected=0; e=&s[0]; *e=0; ungetc(c,f); do_recovery=0; } if (state_changed) { if (expect == PS_STATE_LAST) expect=PS_BANK_STR; b=states[expect].string; todo=states[expect].len; state_changed=0; } } /* Nothing printed? Provide the user with a clue */ if (first_mce) fprintf(stderr, "This file contains no MCE dump\n"); } int main(int argc, char *argv[]) { int optch; int multishot = 0; u64 exception = 0, bank_status = 0, bank_address = 0; int bank_number = 0; int have_exception = 0; int have_bank_nr = 0; int have_bank_status = 0; int have_bank_address = 0; if (argc == 1) usage(); while ((optch = getopt (argc, argv, optstring)) != -1) { switch (optch) { case 'a': bank_address=scan_val64(optarg,"Bank address"); have_bank_address = 1; /* print it out, if user supplied right order */ if (have_exception && have_bank_nr && have_bank_status) { if (!multishot) parse_status (exception); parse_bank(bank_number, bank_status, bank_address); have_bank_address = 0; have_bank_nr = 0; have_bank_status = 0; multishot++; } break; case 'e': exception=scan_val64(optarg,"MCE status"); have_exception = 1; break; case 'b': bank_number=scan_val64(optarg,"Bank number"); have_bank_nr = 1; break; case 's': bank_status=scan_val64(optarg,"Bank status"); have_bank_status = 1; break; case 'V': printf ("%s version %s\n", PROGNAME, VERSION); exit (0); case 'i': parse_input(stdin); return 0; break; case 'f': if (optarg[0] == '-' && optarg[1] == 0) { parse_input(stdin); } else { FILE *f=fopen(optarg,"r"); if (!f) { fprintf (stderr, "Error opening data file: %s\n", strerror(errno)); return 1; } parse_input(f); fclose(f); } return 0; break; /* to keep GCC happy */ default: usage(); break; } } if (!have_exception) { fprintf (stderr, "'-e ' is required\n"); usage(); } if (!multishot) { parse_status (exception); if (have_bank_nr && have_bank_status && have_bank_address) parse_bank(bank_number, bank_status, bank_address); } return 0; }