home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: comp.unix.ultrix
- Path: sparky!uunet!zaphod.mps.ohio-state.edu!pacific.mps.ohio-state.edu!linac!convex!seas.smu.edu!doug
- From: doug@seas.smu.edu (Doug Davis)
- Subject: /dev/errlog (aka elcsd errors) into syslog.. (source)
- Message-ID: <1992Sep8.152630.1114@seas.smu.edu>
- Summary: putting elcsd into the normal syslog
- Keywords: elcsd errlog uerf
- Sender: news@seas.smu.edu (USENET News System)
- Nntp-Posting-Host: turbo_f.seas.smu.edu
- Organization: School of Engineering and Applied Science
- Date: Tue, 8 Sep 1992 15:26:30 GMT
- Lines: 342
-
-
-
- /*
- ** This program monitors the /dev/errlog device and sends the information
- ** found there to the syslog system.. This is useful to all of
- ** us that had existing systems where all the machines logged their
- ** syslog stuff to a single machine (or program) which people then
- ** monitored.
- **
- ** NOTE: Dec recommends that you use the UERF/ELI system. Which is
- ** probably better, if not, at least more complete than this.
- **
- ** HOWEVER! There is no way (that I have found) to monitor UERF over
- ** a bunch of machines in real time at once. Yes a program COULD
- ** be written to sit on all the elbuffer files that could be written
- ** from various machines on one machines but I dislike monitoring of
- ** non-ascii files its just not the unix(tm) way :-)
- **
- ** Also, to use this system effectively, I.e. with your sun's and
- ** and everything else that SUPPORTS syslog you need the current version
- ** of /etc/syslog which can be found on the BSD tape. An executable
- ** for DECstations is sitting in seas.smu.edu's ~ftp/pub/DEC directory
- ** along with a sample syslog.conf.
- **
- ** Of course theres always SNMP (ack, pfft!)
- **
- ** BUGS:
- ** This program needs a lot of work to make it truly complete, currently
- ** it recognizes only a tiny bit of the errors that your ultrix machine
- ** is capable of producing.
- **
- ** THANKS TO: DEC support, even though /dev/errlog isn't DOCUMENTED in
- ** the ultrix man pages they were able to locate the correct ioctls
- ** to interface with the errlog device driver..
- **
- ** AUTHOR: doug davis (doug@seas.smu.edu)
- **
- ** WARRANTY: none whatsoever
- **
- ** TODO:
- ** o) Add more error interpretations
- ** o) Add a "tunable" severity clause for syslog(xxxxx, ..)
- */
-
- #include <sys/types.h>
- #include <sys/param.h>
- #include <sys/time.h>
- #include <sys/socket.h>
- #include <sys/un.h>
- #include <stdio.h>
- #include <sys/errlog.h>
- #include <fcntl.h>
- #include <sys/ioctl.h>
- #include <elcsd.h>
- #include <syslog.h>
-
- #define NAME "/dev/errlog"
- extern char *ctime(), *subclass(), *devices();
-
- main()
- {
- int errlog, length;
- int i;
- struct el_rec el_rec;
- struct elparam elparam;
- fd_set readfds, writefds, exceptfds;
- int nf, cc;
-
- /* deattach from the calling terminal */
- if (fork())
- exit(0);
- for (i = 0; i < 10; i++)
- (void) close(i);
- (void) open("/", 0);
- (void) dup2(0, 1);
- (void) dup2(0, 2);
- if((i = open("/dev/tty", O_RDWR)) >= 0) {
- (void) ioctl(i, (int) TIOCNOTTY, (char *)0);
- (void) close(i);
- }
-
-
- /* open the syslog errlogging system */
- if(openlog("UERF", 0) == -1) {
- perror("openlog");
- exit(1);
- }
- syslog(LOG_INFO, "syslog error monitor restart");
-
- if((errlog = open(NAME, O_RDWR)) < 0) {
- if (errlog < 0) {
- perror("opening errlog device");
- exit(1);
- }
- }
-
- /* make sure they are clean.. */
- FD_ZERO(&readfds);
- FD_ZERO(&writefds);
- FD_ZERO(&exceptfds);
-
- /*
- ** This inits the kernel's DCB with the PID of this
- ** process. One wonders about the need for this since
- ** any system call should come complete with the u and proc pages
- ** from the calling user program..
- */
- elparam.pid = getpid();
- ioctl(errlog, ELSETPID, &elparam);
-
- for(;;) {
- FD_SET (errlog, &readfds);
- /* Something to read? */
- nf = select(20, &readfds, &writefds, &exceptfds, (struct timeval *) NULL);
-
- /* read data from the device */
- if((cc=read(errlog, &el_rec, sizeof(struct el_rec))) < 0) {
- panic("read from errlog");
- }
-
- /* output it.. */
- printerr(el_rec.elsubid.subid_class, &el_rec);
-
- /*
- ** This advances the pointer for the "in core" errlog buffer by (int) *cc bytes.
- */
- ioctl(errlog, ELMOVPTR, (char *) &cc);
- }
- /* NOTREACHED */
- close(errlog);
- }
- panic(s)
- char *s;
- {
- syslog(LOG_ALERT, "errlogger panic: %s\n", s);
- exit(-1);
- }
-
- /*
- ** This prints in quasi human readable form ;-) the error message
- ** to the syslog system. NOTE: There is only a TINY handful
- ** of the of errors supported. If you want to add some more
- ** feel free. Look in /usr/include/sys/errlog.h for the
- ** #defines and structures being used below..
- **
- ** And if you do add some more PLEASE send them back so I
- ** can incorporate the changes!!
- */
-
- printerr(n, el_rec)
- u_short n;
- struct el_rec *el_rec;
- {
- int i;
- switch(n) {
- case ELMSGT_TIM: { /* time stamp */
- syslog(LOG_ALERT, "%d TIMS: new time %24.24s %s",
- el_rec->elrhdr.rhdr_seqnum,
- ctime(&el_rec->el_body.eltimchg.timchg_time.tv_sec),
- el_rec->el_body.eltimchg.timchg_version);
- } break;
-
- case ELCT_MEM: { /* mem. crd/rds */
- static char *memtype[] = { "crd", "rds", "cntrl", "wmask" } ;
- syslog(LOG_ALERT, "%d MEMK: cntrl = %d type = %s number = %d",
- el_rec->elrhdr.rhdr_seqnum,
- el_rec->el_body.elmem.elmemerr.cntl,
- memtype[el_rec->el_body.elmem.elmemerr.type],
- el_rec->el_body.elmem.elmemerr.numerr);
- }; break;
-
- case ELCT_DCNTL: { /* device/controler errors */
- syslog(LOG_ALERT, "%d DEVE: %s",
- el_rec->elrhdr.rhdr_seqnum,
- devices(el_rec));
- }
-
- case ELMSGT_SU: {
- syslog(LOG_ALERT, "%d STRT: %s",
- el_rec->elrhdr.rhdr_seqnum,
- el_rec->el_body.elmsg.msg_asc);
- } break;
- case ELMSGT_SD: {
- syslog(LOG_ALERT, "%d STOP: %s",
- el_rec->elrhdr.rhdr_seqnum,
- el_rec->el_body.elmsg.msg_asc);
- } break;
- case ELMSGT_INFO: { /* ascii message */
- syslog(LOG_ALERT, "%d INFO: %s",
- el_rec->elrhdr.rhdr_seqnum,
- el_rec->el_body.elmsg.msg_asc);
- } break;
- default: {
- syslog(LOG_ALERT, "%d UNKN: %s %d/%d/%d/%d/%d",
- el_rec->elrhdr.rhdr_seqnum,
- subclass(el_rec->elsubid.subid_class),
- el_rec->elsubid.subid_type,
- el_rec->elsubid.subid_ctldevtyp,
- el_rec->elsubid.subid_num,
- el_rec->elsubid.subid_unitnum,
- el_rec->elsubid.subid_errcode);
- } break;
-
- }
- }
- char *
- subclass(n)
- u_short n;
- {
-
- static char *p;
- switch(n) {
- case ELCT_MCK: p="machine check"; break;
- case ELCT_MEM: p="mem. crd/rds"; break;
- case ELCT_DISK: p="disk errs"; break;
- case ELCT_TAPE: p="tape errs"; break;
- case ELCT_DCNTL: p="device controller errs"; break;
- case ELCT_ADPTR: p="adapter errs"; break;
- case ELCT_BUS: p="bus errs"; break;
- case ELCT_SINT: p="stray intr."; break;
- case ELCT_AWE: p="async. write err"; break;
- case ELCT_EXPTFLT: p="panic exception/fault"; break;
- case ELCT_NMIEMM: p="8800 emm exception"; break;
- case ELCT_CTE: p="console timeout entry"; break;
- case ELCT_STKDMP: p="stack dump"; break;
- case ELCT_ESR650: p="ka650 error & status regs"; break;
- case ELCT_6200_INT60: p="vector 60 errors"; break;
- case ELCT_6200_INT54: p="vector 54 errors"; break;
- case ELCT_ESR420: p="ka420 error & status regs"; break;
- case ELCT_ESRPMAX: p="PMAX error & status regs"; break;
- case ELCT_6400_INT60: p="ka6400 vector 0x60 error"; break;
- case ELCT_6400_INT54: p="ka6400 vector 0x54 error"; break;
- case ELCT_MBUS: p="Mbus errors"; break;
- case ELCT_ESR60: p="ka60 error & status regs "; break;
- case ELCT_ESR: p="Generic error&status regs"; break;
- case ELCT_INT60: p="Generic vector 0x60 (hard) error"; break;
- case ELCT_INT54: p="Generic vector 0x54 (soft) error"; break;
- case ELCT_9000_SYNDROME: p="9000 syndrome entry"; break;
- case ELCT_9000_KAF: p="9000 keep alive failure from spu"; break;
- case ELCT_9000_CLK: p="9000 clock"; break;
- case ELCT_9000_SCAN: p="9000 scan"; break;
- case ELCT_9000_CONFIG: p="9000 configuration message"; break;
- case ELCT_VECTOR: p="vector"; break;
- case ELSW_PNC : p="panic (bug check)"; break;
- case ELSW_CIPPD: p="ci ppd events"; break;
- case ELSW_SCS: p="scs events"; break;
- case ELMSGT_INFO: p="info. type msg"; break;
- case ELMSGT_SNAP8600: p="8600 snapshot taken"; break;
- case ELMSGT_SU: p="start up msg"; break;
- case ELMSGT_SD: p="shutdown msg"; break;
- case ELMSGT_TIM: p="time stamp"; break;
- case ELMSGT_DIAG: p="diag. info. type msg"; break;
- case ELMSGT_REPAIR: p="repair"; break;
- default: p="unknown"; break;
- }
- return(p);
- }
- char *
- devices(el_rec)
- struct el_rec *el_rec;
- {
- static char buf[1024];
- switch(el_rec->elsubid.subid_type) {
-
- case ELFZA: { /* FZA errors */
- sprintf(buf, "FZA%d, %x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x/%x",
- el_rec->el_body.el_fza.fza_id,
- el_rec->el_body.el_fza.reset_count, /* reset counter */
- el_rec->el_body.el_fza.timestamp_hi, /* time stamp hi */
- el_rec->el_body.el_fza.timestamp_lo, /* time stamp lo */
- el_rec->el_body.el_fza.write_count,
- el_rec->el_body.el_fza.int_reason, /* Internal failure reason */
- el_rec->el_body.el_fza.ext_reason, /* External failure reason */
- el_rec->el_body.el_fza.cmd_next_ptr, /* Next cmd entry to service */
- el_rec->el_body.el_fza.cmd_next_cmd, /* Next cmd descr, 1st entry */
- el_rec->el_body.el_fza.dma_next_rmc_ptr,
- el_rec->el_body.el_fza.dma_next_rmc_descr,
- el_rec->el_body.el_fza.dma_next_rmc_own,
- el_rec->el_body.el_fza.dma_next_host_ptr,
- el_rec->el_body.el_fza.dma_next_host_descr,
- el_rec->el_body.el_fza.lmgr_next_ptr,
- el_rec->el_body.el_fza.lmgr_next_descr,
- el_rec->el_body.el_fza.smt_next_put_ptr,
- el_rec->el_body.el_fza.smt_next_put_descr,
- el_rec->el_body.el_fza.smt_next_take_ptr,
- el_rec->el_body.el_fza.smt_next_take_descr,
- el_rec->el_body.el_fza.pm_csr, /* Packet mem CSR */
- el_rec->el_body.el_fza.int_68k_present, /* 68k interrupt ctrl reg */
- el_rec->el_body.el_fza.int_68k_mask, /* 68k interrupt ctrl mask reg */
- el_rec->el_body.el_fza.pint_event, /* Port interrupt reg */
- el_rec->el_body.el_fza.port_ctrl_a, /* Port control A */
- el_rec->el_body.el_fza.port_ctrl_a_mask, /* Port control A mask */
- el_rec->el_body.el_fza.port_ctrl_b, /* Port control B */
- el_rec->el_body.el_fza.port_status, /* Port status */
- el_rec->el_body.el_fza.ram_rom_map, /* Map register */
- el_rec->el_body.el_fza.phy_csr, /* Phy CSR */
- el_rec->el_body.el_fza.dma_done, /* DMA done */
- el_rec->el_body.el_fza.dma_err, /* DMA error */
- el_rec->el_body.el_fza.dma_start_lo, /* DMA start dma low addr */
- el_rec->el_body.el_fza.dma_start_hi, /* DMA start high addr */
- el_rec->el_body.el_fza.rmc_cmd, /* RMC command */
- el_rec->el_body.el_fza.rmc_mode, /* RMC mode */
- el_rec->el_body.el_fza.rmc_rcv_page, /* RMC rcv page */
- el_rec->el_body.el_fza.rmc_rcv_params, /* RMC rcv parameters */
- el_rec->el_body.el_fza.rmc_xmt_page, /* RMC xmt page */
- el_rec->el_body.el_fza.rmc_xmt_params, /* RMC xmt parameters */
- el_rec->el_body.el_fza.rmc_interrupts, /* RMC interrupts */
- el_rec->el_body.el_fza.rmc_int_mask, /* RMC interrupt mask */
- el_rec->el_body.el_fza.rmc_chan_status, /* RMC channel status */
- el_rec->el_body.el_fza.mac_rcv_cntrl, /* MAC */
- el_rec->el_body.el_fza.mac_xmt_cntrl,
- el_rec->el_body.el_fza.mac_int_mask_a,
- el_rec->el_body.el_fza.mac_int_mask_b,
- el_rec->el_body.el_fza.mac_rcv_status,
- el_rec->el_body.el_fza.mac_xmt_status,
- el_rec->el_body.el_fza.mac_mla_a,
- el_rec->el_body.el_fza.mac_mla_b,
- el_rec->el_body.el_fza.mac_mla_c,
- el_rec->el_body.el_fza.mac_t_req,
- el_rec->el_body.el_fza.mac_tvx_value);
- return(buf);
- } break;
- case ELSCSI_CNTRL:
- case ELCI_RDCNT:
- case ELCI_ATTN:
- case ELCI_LPKT:
- case ELUQ_ATTN:
- case ELBI_BLA:
- case ELBI_BVP:
- case ELMSCP_CNTRL:
- case ELTMSCP_CNTRL:
- case ELMSI_ATTN:
- case ELMSI_LPKT:
- case ELBI_XNA:
- case ELXMI_XNA:
- case ELVME_DEV_CNTL:
- default: {
- strcpy(buf, "undefined device error");
- return(buf);
- }
- }
- }
-