home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/perl
-
- # This script is designed to create a report about the validity of
- # cross references within an SGIDOC or SGIDOCBK SGML instance
-
- # test command line syntax
- $SGMLFILE=$ARGV[0];
- if ($SGMLFILE eq "") {
- print "Usage : xref <input-file-name>\n";
- exit;
- }
-
- if (! -r $SGMLFILE) {
- print "xref: ERROR unable to read file '$SGMLFILE'\n";
- exit;
- }
-
- # make an array of targets (with text as content of each entry);
- # push any repeated targets into a list for later use
- open(F,$SGMLFILE) || die "xref: Unable to open file '$SGMLFILE'\n";
-
- while (<F>) {
- while (s/<[^>]+\sID\s*=\s*\"([^\"]+)\"[^>]*>/$1/i) {
- $SAVED_PATTERNSPACE = $_;
- $TARGET = $1;
- s/<[^>]+>([^>]*)$TARGET([^>]*)<[^>]+>/$1 $2/i;
- if (! $TARGETS{$TARGET}) {
- $TARGETS{$TARGET} = "$1 $2"; }
- else {
- $TARGETS{$TARGET} .= ": " . "$1 $2";
- if (! grep(/$TARGET/, @REPEAT_TARGETS)) {
- push(@REPEAT_TARGETS,$TARGET);
- }
- }
- $_ = $SAVED_PATTERNSPACE;
- }
- }
-
- # print out number and content of repeated targets, using list from above
- if ($#REPEAT_TARGETS == -1) {
- print "\n Link targets appearing multiple times in book: none\n";
- }
- else {
- print "\n Link targets appearing multiple times in book:\n";
- print " ----------------------------------------------\n";
- foreach $REPEAT_TARGET (@REPEAT_TARGETS) {
- foreach $REPEAT (split(/:/,$TARGETS{$REPEAT_TARGET})) {
- print " '$REPEAT_TARGET' : $REPEAT\n";
- }
- }
- }
-
- # find xrefs and compare them to target array; print dangling pointers
- # SGIDOC <XREF> tags use IDREF attributes
- # SGIDOCBK <XREF> tags use LINKEND attributes
- seek(F,0,0);
- while (<F>) {
- while (s/<XREF[^>]+(IDREF|LINKEND)\s*=\s*\"([^\"]+)\"[^>]*>/$1/i) {
- $POINTER = $2;
- if (! $TARGETS{$POINTER}) {
- if ($POINTERS{$POINTER}) {
- $POINTERS{$POINTER} += 1;
- } else {
- $POINTERS{$POINTER} = 1;
- }
- }
- }
- }
- if (%POINTERS == "") {
- print "\n Unresolved References present in this book: none\n";
- }
- else {
- print "\n Unresolved References present in this book:\n";
- print " -------------------------------------------\n";
- foreach $POINTER (keys(%POINTERS)) {
- print " '$POINTER' appears $POINTERS{$POINTER} time(s)\n";
- }
- }
-
- # find and print any extrefs
- # SGIDOC uses <EXTREF IDREF="" BOOK="">
- # SGIDOCBK uses <LINK EXTREF="" BOOK="">
- seek(F,0,0);
- while (<F>) {
- while (s/<(EXTREF|LINK)([^>]+)>//i) {
- $LINK_INFO = $2;
- if($LINK_INFO =~ /\sBOOK\s*=\s*"([^\"]+)"/i) {
- $BOOK = $1;
- if($LINK_INFO =~ /\s(IDREF|EXTREF)\s*=\s*"([^\"]+)"/i) {
- $ID = $2;
- } else {
- $ID = "Unknown id destination\n";
- }
- $RESULT = "$BOOK => $ID";
- if ($EXTREFS{$RESULT}) {
- $EXTREFS{$RESULT} += 1;
- } else {
- $EXTREFS{$RESULT} = 1;
- }
- }
- }
- }
-
- if (%EXTREFS == "") {
- print "\n External Book References present in this book: none \n";
- }
- else {
- print "\n External Book References present in this book:\n";
- print " ----------------------------------------------\n";
- foreach $EXREF (keys(%EXTREFS)) {
- print " $EXREF appears $EXTREFS{$EXREF} time(s)\n";
- }
- }
-
- close(F);
-