home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power-Programmierung
/
CD2.mdf
/
doc
/
mir
/
head.c
< prev
next >
Wrap
Text File
|
1992-07-02
|
13KB
|
374 lines
/*
* Usage - head file_name [ line_count ] [/a][/t] > text
*
* HEAD Displays in printable format the first line_count lines
* within a file; the default is 10 lines. This clone of
* the UNIX HEAD and TAIL utilities provides a quick check on
* the likely contents of a file. If the "/a" option is used,
* accented characters are treated as printable text. If
* "/t" is specified, the display is of the TAIL of the
* file, the LAST line_count lines.
*
* input: Normally an ASCII text file.
*
* output: The specified number of lines is either displayed on the
* screen or sent to a file. Each non-printable character is
* replaced by an ^ symbol. If any line length exceeds 120
* characters, a warning is issued. If any line length exceeds
* 1024 or the file includes null bytes, the program advises
* that the target file is not ASCII text.
*
* writeup: MIR TUTORIAL ONE, topic 5
*
* Written: Douglas Lowry Jan 10 92
* Modified: Douglas Lowry May 11 92 Correct re small files
* Copyright (C) 1992 Marpex Inc.
*
* The MIR (Mass Indexing and Retrieval) Tutorials explain detailed
* usage and co-ordination of the MIR family of programs to analyze,
* prepare and index databases (small through gigabyte size), and
* how to build integrated retrieval software around the MIR search
* engine. The fifth of the five MIR tutorial series explains how
* to extend indexing capability into leading edge search-related
* technologies. For more information, GO IBMPRO on CompuServe;
* MIR files are in the DBMS library. The same files are on the
* Canada Remote Systems BBS. A diskette copy of the Introduction
* is available by mail ($10 US... check, Visa or Mastercard);
* diskettes with Introduction, Tutorial ONE software and the
* shareware Tutorial ONE text cost $29. Shareware registration
* for a tutorial is also $29.
*
* E-mail...
* Compuserve 71431,1337
* Internet doug.lowry%canrem.com
* UUCP canrem!doug.lowry
* Others: doug.lowry@canrem.uucp
*
* FAX... 416 963-5677
*
* "Snail mail"... Douglas Lowry, Ph.D.
* Marpex Inc.
* 5334 Yonge Street, #1102
* North York, Ontario
* Canada M2N 6M2
*
* Related database consultation and preparation services are
* available through:
* Innotech Inc., 2001 Sheppard Avenue E., Suite #118,
* North York, Ontario Canada M2J 4Z7
* Tel. 416 492-3838 FAX 416 492-3843
*
* This program is free software; you may redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License (file 05LICENS) along with this program; if not,
* write to the Free Software Foundation, Inc., 675 Mass Ave,
* Cambridge, MA 02139, USA.
*/
#include <stdio.h>
#include <ctype.h>
#include <io.h>
#define MAX_BYTES 1024
#define repeat for(;;)
typedef enum _bool
{ FALSE = 0, TRUE = 1 } Bool;
/*
* declarations
*/
void Usage_(), process();
Bool locate() ;
char *Cmdname_() { return( "head" ); }
/*
* MAIN
*/
main( argc, argv )
int argc;
char **argv;
{
char c10 ; /* argv[1][0] */
FILE *fp ;
Bool accent, /* user wants accented chars to show */
tail, /* show last lines, not first lines */
tag ; /* argument is a tag (-.. or /..) */
int i ;
long int line_ct ;
/* Usage - head file_name [ line_count ] [/a][/t] > text */
c10 = argv[1][0] ;
if( argc < 2 || argc > 5 || c10 == '-' || c10 == '/' || c10 == '?' )
Usage_();
if(( fp = fopen( argv[1], "rb" )) == NULL )
{
fprintf( stderr, "Can't open file %s\n", argv[1] ) ;
Usage_() ;
}
line_ct = 10 ;
accent = tail = FALSE ;
for( i = 2 ; i < argc ; i++ )
{
tag = FALSE ;
if( argv[i][0] == '-' || argv[i][0] == '/' )
tag = TRUE ;
if( islower( argv[i][1] ))
argv[i][1] = toupper( argv[i][1] ) ;
if( tag && argv[i][1] == 'A' )
accent = TRUE ;
else if( tag && argv[i][1] == 'T' )
tail = TRUE ;
else
{
line_ct = atol( argv[i] );
if( line_ct < 1 )
line_ct = 10 ;
}
}
process( fp, line_ct, accent, tail );
fclose( fp ) ;
exit( 0 );
}
/*
* Usage
*/
void
Usage_()
{
fprintf( stderr,
"usage: %s file_name [ line_count ] [/a][/t] > text\n\n\
Displays in printable format the first line_count lines\n\
within a file; the default is 10 lines. This clone of\n\
the UNIX HEAD and TAIL utilities provides a quick check on\n",
Cmdname_() );
fprintf( stderr,
" the likely contents of a file. If the \"/a\" option is used,\n\
accented characters are treated as printable text. If\n\
\"/t\" is specified, the display is of the TAIL of the\n\
file, the LAST line_count lines.\n\n" ) ;
fprintf( stderr,
"input: Normally an ASCII text file.\n\n\
output: The specified number of lines is either displayed on the\n\
screen or sent to a file. Each non-printable character is\n\
replaced by an ^ symbol. If any line length exceeds 120\n\
characters, a warning is issued. If any line length exceeds\n" );
fprintf( stderr,
" 1024 or the file includes null bytes, the program advises\n\
that the target file is not ASCII text.\n\n\
writeup: MIR TUTORIAL ONE, topic 5\n\n" ) ;
exit( 1 ) ;
}
#define NON_PRINT 0
#define WHITE_SPACE 1
#define PUNCTUATION 2
#define DIGIT 3
#define CONSONANT 4
#define VOWEL 5
#define HI_CONSONANT 6
#define HI_VOWEL 7
#define TYPE_CT 8 /* count of above types */
/*
* PROCESS
*/
void
process( fp, line_ct, accent, tail )
FILE *fp ;
long int line_ct ;
Bool accent, /* user wants accented chars to show */
tail ; /* show last lines, not first lines */
{
unsigned char table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, /* ctls */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ctls */
/* bl ! " # $ % & ' ( ) * + , - . / */
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
/* @ A B C D E F G H I J K L M N O */
4, 5, 4, 4, 4, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5,
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 2, 2, 2, 2, 2,
/* ` a b c d e f g h i j k l m n o */
2, 5, 4, 4, 4, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5,
/* p q r s t u v w x y z { | } ~ NULL */
4, 4, 4, 4, 4, 5, 4, 4, 4, 5, 4, 2, 2, 2, 2, 0,
/* Ç ü é â ä à å ç ê ë è ï î ì Ä Å */
6, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7,
/* É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ */
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0,
/* á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » */
7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
unsigned char line_in[ MAX_BYTES ] ;
Bool printable, /* file is ASCII text */
at_eof ; /* at end of file */
long int over_120, /* To warn re line length */
line_no;
int ch, /* one character */
len, i ;
over_120 = 0 ;
printable = TRUE ;
at_eof = FALSE ;
if( tail )
{
if( !locate( fp, line_ct ))
return ;
}
for( line_no = 0 ; line_no < line_ct ; line_no++ )
{
for( len = 0 ; len < MAX_BYTES ; len++ )
{
if(( ch = fgetc( fp )) == EOF )
{
if( feof( fp ))
{
at_eof = TRUE ;
break ;
}
}
if( isprint( ch ) || ch == '\t' )
line_in[ len ] = ch ;
else if( !ch )
{
printable = FALSE ;
break ;
}
else if( ch == '\n' )
break ;
else if( ch == '\015' || ch == '\032' )
len-- ; /* delete carriage return, EOF */
else if( !accent )
line_in[ len ] = '^' ;
else if( table[ch] == NON_PRINT )
line_in[ len ] = '^' ;
else
line_in[ len ] = ch ;
}
line_in[ len ] = '\0' ;
if( len > MAX_BYTES - 2 || !printable )
{
fprintf( stderr,
"\nNot printable ASCII. Use f_print filter for display.\n" ) ;
fclose( fp ) ;
Usage_();
}
if( len > 120 )
over_120++ ;
if( puts( line_in ))
{
fprintf( stderr, "Unable to write... FATAL.\n\n" ) ;
exit( 1 ) ;
}
if( at_eof )
break ;
}
if( over_120 )
fprintf( stderr, "\n*** %d LINES OVER 120 BYTES LONG ***\n\n",
over_120 ) ;
return ;
}
/*
* LOCATE - Find beginning point for the last line_ct lines
*/
Bool
locate( fp, line_ct )
FILE *fp ;
long int line_ct ;
{
unsigned char buf[ MAX_BYTES ] ;
Bool gotcha ; /* found desired offset */
long int line_no,
buf_len,
fil_len,
bgn_at ; /* offset of buffer start */
int i ;
fil_len = filelength( fileno( fp )) ;
if( fil_len < 1 )
return( FALSE ) ;
bgn_at = fil_len ;
line_no = 0 ;
gotcha = FALSE ;
while( !gotcha )
{
bgn_at -= MAX_BYTES ;
if( bgn_at < 0 )
bgn_at = 0 ;
if( fseek( fp, bgn_at, SEEK_SET ))
{
fprintf( stderr, "Unable to position file. FATAL!\n\n" );
return( FALSE ) ;
}
buf_len = fread( buf, sizeof( char ), MAX_BYTES, fp ) ;
if( fil_len >= MAX_BYTES && buf_len < MAX_BYTES )
{
fprintf( stderr, "Trouble reading back in file. FATAL!\n\n" );
return( FALSE ) ;
}
for( i = buf_len - 1 ; i > -1 ; i-- )
{
if( buf[i] == '\n' )
{
if( ++line_no > line_ct )
{
bgn_at += ( i + 1 ) ;
gotcha = TRUE ;
break ;
}
}
if( !buf[i] ) /* null byte */
{
fprintf( stderr,
"\nNot printable ASCII. Use f_print filter for display.\n" ) ;
fclose( fp ) ;
Usage_();
}
if( !bgn_at )
{
gotcha = TRUE ;
break ; /* Must start at beginning */
}
}
}
if( fseek( fp, bgn_at, SEEK_SET ))
{
fprintf( stderr, "Unable to position file. FATAL!\n\n" );
return( FALSE ) ;
}
return( TRUE ) ;
}