home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power-Programmierung
/
CD1.mdf
/
assemblr
/
library
/
sampler0
/
tk.asm
< prev
next >
Wrap
Assembly Source File
|
1987-05-05
|
21KB
|
614 lines
page 55,132
title TK - Token Parsing filter
;
; TK --- A Simple Token Parsing Filter for DOS 2.0
;
; (c) Copyright 1984 by Jim Mott
; 3710 Slopeview Drive
; Sunnyvale, CA 95148
; (408) 274-2620
; All rights reserved. Permission granted to use this software for
; personal, noncommercial purposes only.
;
;
; This program is designed to be a filter for DOS 2.0.
; It will tokenize its input and allow subsetting and/or
; single token per line output.
;
; The format of the command is:
;
; TK {/RJx | /LJx} {/0} {{/v} | {/v/v}}
; where /RJx means right justify all tokens to x positions
; /LJx means left justify all tokens to x positions
; In the two entries x must be in [1..15]
; /0 means output one token per line
; /v means select token v for output. You may select any
; number, up to 255, of tokens to output. Repeats are
; allowed and you may change the order of the input tokens
; on the output line.
;
; For example, to extract the list of users from a VM directory file and
; write a sorted list of them without passwords to the printer the
; following command line would be used.
;
; FIND "USER " < DIRECT.VM | TK/LJ8/2/4/5/6/7/8/9 | SORT > PRN
;
;
; For example, to find a list of all sub-directories of the current
; directory sorted by sub-directory name we would use the following
; command line:
;
; DIR | FIND "<DIR>" | TK/LJ8/1/3/4 | SORT | MORE
;
;
; For example, to generate a sorted list of all words used in a document
; with one word per line we could use the following command line:
;
; TK/RJ8/0 < FOOBAR.DOC | SORT | MORE
;
;
;
;
;
;
;
;
;
;
;
;
stack segment para stack 'STACK'
db 8 dup('Jim Mott (408) 274-2620')
stack ends
;
;
dsect segment para 'DATA'
buffer db 255 dup('?') ; where to put the data
db ' ' ; be sure to end scan correctly
;
glen dw 0 ; length of gbuff
gbptr dw gbuff ; point to start of buffer
gbuff db 255 dup('G') ; buffer used by bufget
;
flag1 db ? ; options enabled
f1rj equ 01h ; right justify tokens
f1lj equ 02h ; left justify tokens
f1one equ 04h ; output one token per line
f1sub equ 08h ; substring function requested
f1work equ 10h ; fill trailing spaces
f1oerr equ 20h ; error in options string
f1eof equ 40h ; end of file on standard input device
f1qeof equ 80h ; queue the end of file
;
spaces db ? ; number of trailing spaces required
;
toksiz db ? ; token size if (f1rj or f1lj)
;
three db 3 ; length of each entry
tokcnt db ? ; count of tokens in table
toktbl db 3*255 dup('0') ; table of token pointers and lengths
;
outcnt db ? ; number of subsetting entries in
; outers
outers db 255 dup('1') ; list of token numbers to output
;
tokptr dw ? ; pointer to free token space
tokens db 900 dup('2') ; string space of tokens
;
msgver db 'TK: Incorrect DOS version. Must be at least 2.00.'
db 0dh,0ah,'$'
optmsg db 'TK: Incorrect parameters given.'
db 0dh,0ah
optmgl equ $ - optmsg
noroom db 'TK: No room for user on device.'
db 0dh,0ah
lnoroom equ $ - noroom
chrspa db ' ' ; a space to output
chrclf db 0dh,0ah ; <cr><lf> sequence
dsect ends
;
;
csect segment para 'CODE'
assume cs:csect,ds:dsect,ss:stack
;
main proc far
;
push ds ; set up a return address
sub ax,ax ; we want to return to DS:0000
push ax
mov ax,dsect ; point to start of data area
mov ds,ax ; make assume and reality agree
mov ah,30h ; get DOS version number
int 21h ; call OS to get it
cmp al,2 ; is it at least 2.00?
jnl main00 ; yesy - good enough
lea dx,msgver ; no - point to the "Bad DOS version"
mov ah,9 ; message and use DOS 1.?? function
int 21h ; call to print it.
ret ; and do a long return
;
main00: call options ; parse the options (at ES:80) and set
; flags
mov ax,ds ; make ES and DS the same now
mov es,ax ; so the string moves work nicely.
test flag1,f1oerr ; was there and error in the options
jz main01 ; no - then go with this baby
lea dx,optmsg ; yes - point to options error message
mov cx,optmgl ; get length of message
mov bx,2 ; error output device handle
mov ah,40h ; set DOS function number for
int 21h ; "write to file or device" & call DOS
jmp short main03 ; and return as done
;
main01: mov tokcnt,0 ; no tokens in the table
lea ax,tokens ; point to start of token work area
mov tokptr,ax ; save pointer to next free byte
call bufget ; read in a buffer
test flag1,f1eof ; is there any data in the read buffer
jnz main03 ; no - we are done with this pgm then
dec cx ; yes - ignore the trailing <cr>
jle main01 ; if length is =<0 just get next line
lea bx,buffer ; point to the first byte of the data
;
main02: call nextok ; get the next token
or cx,cx ; are we done with this line yet
jnz main02 ; no - get yet another token
call write ; write the lines
jmp short main01 ; and loop for the next line
;
main03: call crlf ; write a final <cr><lf> sequence
mov al,0 ; put 0 in al - return code to post
mov ah,4ch ; terminate a process code
int 21h ; end this program
;
main endp
;
;
; OPTIONS - This subroutine will parse the options passed to the
; program and set the required bits in flag1. No registers
; are preserved since we are called only once, before the
; program has really started.
;
options proc near
;
mov outcnt,0 ; initialize outers count
mov si,81h ; point to the first parms character
;
opt01: mov al,byte ptr es:0[si] ; get a byte from the parm string
inc si ; point to the next byte
cmp al,0dh ; is it the end of the string?
jne opt02 ; no - goody, more data to process
ret ; yes, return to the caller then
;
opt02: cmp al,' ' ; allow spaces anywhere before slashes
je opt01 ; ignore them though
cmp al,'/' ; we have to start with a slash now
je opt04 ; if it is a slash then process it
;
opterr: or flag1,f1oerr ; otherwise set the options error flag
ret ; and return
;
opt04: mov al,byte ptr es:0[si] ; get the next character after slash
inc si ; point to next character in parms
cmp al,'a' ; is it lower case or funny?
jl opt4a ; no - process it normally then
sub al,'a'-'A' ; yes - map lower case to upper
;
opt4a: cmp al,'L' ; might it be left justify or numeric
jl optnum ; perhaps numeric - check it out
jne opt05 ; it is not LJ for sure
or flag1,f1lj ; assume it is LJ for the moment
test flag1,f1rj ; make sure this isn't a duplicate
jnz opterr ; if RJ already then big problems
jmp short opt06 ; and rejoin common justify code
;
opt05: cmp al,'R' ; might it be right justify (RJ)?
jne opterr ; no - then it is an error
or flag1,f1rj ; yes - assume for the moment it is
test flag1,f1lj ; make sure we aren't trying to left
jnz opterr ; justify too - if we are we are in
; deep s..t
opt06: mov al,byte ptr es:0[si] ; get the next character
inc si ; point to the next character in parms
cmp al,'J' ; is it the J we expect?
je opt6a ; yes - process it normally then
cmp al,'j' ; is it a lower case J
jne opterr ; no - that's too bad.
;
opt6a: mov al,byte ptr es:0[si] ; get the first byte of the number
inc si ; point to next character in parms
call decbin ; is it a number?
jc opterr ; no - then we have an error
or al,al ; is the field size 0?
je opterr ; yes - it is in error then
cmp al,15 ; is field size more than 15?
jg opterr ; yes - it is in error then
mov toksiz,al ; save the justified field size
jmp short opt01 ; and process further options
;
optnum: call decbin ; is it a number after slash?
jc opterr ; no - then it is an error
or al,al ; zero is special
jne opt08 ; not zero - save it in array then
or flag1,f1one ; zero means one token per line
jmp short opt01 ; process some other token then
;
opt08: sub cx,cx ; get a zeroed double register
mov cl,outcnt ; get offset into outers for this guy
lea bx,outers ; point just before list of outers
add bx,cx ; bx points to origin 1 save spot
mov byte ptr [bx],al ; save the token position to write
inc outcnt ; add one to outcnt
or flag1,f1sub ; make sure substitute flag is on
jmp opt01 ; and play it again Sam.
;
options endp
;
;
; nextok - This subroutine will find the next token in the string
; pointed to by bx, with length contained in cx, and move
; it to the end of the token space. An entry in toktbl will
; be created for this token. When the subroutine returns cx
; will be zero if the source data string is empty. bx will
; point to the first character past the last token.
;
nextok proc near
;
mov di,tokptr ; get pointer to where to put token
;
next01: mov al,byte ptr 0[bx] ; loop past junk
cmp al,' ' ; is it a leading space?
jne next03 ; no - then we have a token
inc bx ; yes - point to the next character
loop next01 ; and try that one
ret ; return if we are done with output
;
next03: mov si,bx ; save pointer to start of token
mov ah,1 ; initial guess for token length is 1
;
next04: inc bx ; point to the next character in input
mov al,byte ptr [bx] ; get the character
cmp al,' ' ; is it the end of the token?
je next05 ; yes - we have some good numbers
inc ah ; no - increment count of contiguous
; characters.
loop next04 ; continue till out of chars or a
; field separator
dec ah ; shouldn't get here but correct for
; it anyway
next05: push cx ; save number of chars left in source
; string
test flag1,f1rj + f1lj ; do we have a maximum token length?
jz next09 ; no - just a normal token write then
cmp ah,toksiz ; yes - is this token just right?
je next09 ; it sure is. we will keep it as is
jl next06 ; if token size < max token size - pad
mov ah,toksiz ; otherwise take max token size as own
jmp short next09 ; and continue normally
;
next06: mov al,toksiz ; get the toekn size we must pad to
sub al,ah ; al contains number of spaces needed
test flag1,f1lj ; left justify? (pad right with space)
jz next07 ; no - must pad to the left with space
mov spaces,al ; yes - save how many spaces to fill
or flag1,f1work ; mark as work to do later on
jmp short next09 ; and join mainline code
;
next07: mov cl,al ; cx contains number of leading spaces
;
next08: mov byte ptr [di],' ' ; put a leading space in this token
inc di ; point to the next slot
loop next08 ; and fill in all needed spaces
;
next09: mov cl,ah ; cx now contains total number of
; chars in token
cld ; make the direction ever upward
rep movsb ; move the token to its spot
test flag1,f1work ; is it left justified (need spaces)
jz next11 ; no - we are done with hard part then
mov cl,spaces ; get count of spaces needed
;
next10: mov byte ptr [di],' ' ; move in a trailing space
inc di ; point to the next slot and
loop next10 ; cont. till all trailing spaces done
and flag1,255-f1work ; reset the work to do bit
;
next11: mov dx,tokptr ; get pointer to start of this token
mov tokptr,di ; save pointer to next free token byte
test flag1,f1lj + f1rj ; do we have fixed length tokens?
jz next12 ; no - take them as we get them
mov ah,toksiz ; yes - set this tokens length
;
next12: mov cl,ah ; save length of token
mov al,3 ; number of bytes per entry
mul tokcnt ; ax is now an offset in toktbl
lea si,toktbl ; point to start of token table
add si,ax ; si points to an entry in toktbl
mov byte ptr [si],cl ; move in length of entry
mov word ptr 1[si],dx ; save pointer to start of token
inc tokcnt ; count one more token
pop cx ; cx contains number of source chars
or cx,cx ; left. Are we done yet?
jz next13 ; yes - return
dec cx ; no - correct for undercounting by 1
;
next13: ret ; and return
;
nextok endp
;
;
; write - This routine will write the tokens to the standard output
; device. It is controlled by the settings of flags in flag1.
;
write proc near
;
sub cx,cx ; get an empty loop counter
mov cl,tokcnt ; cl contains total number tokens read
or cx,cx ; do we have anything to write out?
jnz write1 ; yes - then go for it
ret ; no - we are done before we begin
;
write1: test flag1,f1sub ; are we changing their order?
jnz write3 ; yes - then use different write logic
sub dl,dl ; no - just output them all in order
;
write2: call tout ; write the sucker
inc dl ; point to the next token
loop write2 ; and go through them all
jmp short write6 ; return. A job well done
;
write3: mov cl,outcnt ; get the number tokens to write
lea bx,outers ; point to the first one to output
;
write4: mov dl,byte ptr [bx] ; get a token to write
cmp dl,tokcnt ; is it <= max token?
jg write5 ; no - don't write it then
dec dl ; yes - adjust for origin one and
call tout ; write this token then
;
write5: inc bx ; point to the next token count to
loop write4 ; write and loop through whole list
;
write6: test flag1,f1one ; are we outputting one token/line?
jnz write7 ; yes - the last <cr><lf> was written
call crlf ; no - write a trailing <cr><lf>
;
write7: ret ; Done. Go home now.
;
write endp
;
;
; tout - This routine will find and write the token from the input
; line that is in position dl on that line.
;
tout proc near
;
push bx ; save the registers
push cx
push dx
mov al,3 ; number of bytes per toktbl entry
mul dl ; get offset into toktbl for token
lea bx,toktbl ; point to the start of the table
add bx,ax ; point to the correct 3 byte entry
sub cx,cx ; zero the counter
mov cl,byte ptr [bx] ; get number of chars in this token
mov dx,word ptr 1[bx] ; and point to first byte of token
call oswrite ; write to standard output device
test flag1,f1one ; only one token per line?
jz tout02 ; no - write a space then
call crlf ; yes - write a <cr><lf> sequence
jmp short tout03 ; and return
;
tout02: mov cx,1 ; length of space is one
lea dx,chrspa ; point to a space
call oswrite ; write to standard output device
;
tout03: pop dx ; restore the registers
pop cx
pop bx
ret ; and return
;
tout endp
;
;
; crlf - Everybody knows what this routine does.
;
crlf proc near
;
push ax ; save the registers
push bx
push cx
push dx
mov cx,2 ; length of <cr><lf> string
lea dx,chrclf ; point to the data to write
call oswrite ; write to the standard output device
pop dx ; restore the registers
pop cx
pop bx
pop ax
ret ; and return
;
crlf endp
;
;
; oswrite - This routine will write characters pointed to by ds:dx
; of length contained in cx, to the standard output device
; I any errors are detected a message will be written to the
; standard error device and flag f1eof will be set.
;
oswrite proc near
;
mov bx,1 ; file handle of standard output
mov ah,40h ; write to file or device DOS function
int 21h ; call DOS
jc oswr01 ; if error 5 or 6 then end
cmp cx,ax ; as many chars as we wanted written?
je oswr99 ; return if all went well
;
oswr01: lea dx,noroom ; point to the "no space" message
mov cx,lnoroom ; get the length of the message
mov bx,2 ; get handle for standard error device
mov ah,40h ; write to file or device DOS function
int 21h ; let him know we erred
or flag1,f1eof ; pretend eof on input device so
;
oswr99: ret ; program stops and return
;
oswrite endp
;
;
; decbin - On entry this routine has the first character to convert
; to binary in al. si points to additional characters. On exit
; si points to the first non-numeric character found.
; al contains the binary value and carry isn't set. If carry
; is set on return then an invalid number was found.
;
decbin proc near
;
push bx ; save a register
call decb04 ; check for numeric in al
jnc decb02 ; if al was numeric it is now 0 .. 9
;
decb01: stc ; make sure carry flag set
pop bx ; restore the register
ret ; and return indicating an error
;
decb02: mov bl,al ; get total so far
mov al,byte ptr es:0[si] ; get a byte from the input stream
inc si
call decb04 ; check it for numeric
jnc decb03 ; if numeric then juggle some
mov al,bl ; otherwise get the value to return
clc ; clear carry flag to say it worked
dec si ; make sure next char is non-numeric
pop bx ; restore the register
ret ; and return
;
decb03: mov bh,al ; save the number for a minute
mov al,10 ; get the base
mul bl ; shift left one position (base al)
mov bl,bh ; make bx a good number
sub bh,bh ; bx now contains16 bit value of digit
add ax,bx ; add in the latest digit
or ah,ah ; make sure no overflow
jne decb01 ; if there was this is an error
jmp short decb02 ; continue on our way
;
decb04: sub al,'0' ; is it less than a number?
jl decb05 ; yes - return with carry set
cmp al,9 ; is it more than a number?
jg decb05 ; yes - return with carry set
clc ; no - make sure carry is off
ret ; then return the number
;
decb05: stc ; set carry on
ret ; and return
;
decbin endp
;
;
; bufget - This routine will read one 'line' from the standard input
; device to buffer. On exit cx contains the count of chars
; read. f1eof is set if an end of file condition is
; encountered.
;
bufget proc near
;
push ax ; save the registers
push bx
push dx
push di
push si
test flag1,f1qeof ; should we reflect an immediate eof?
jz bufg00 ; no - standard logic here then
or flag1,f1eof ; yes - set the end of file bit
and flag1,255 - f1qeof ; and say it is no longer pending
jmp short bufret ; return now
;
bufg00: sub cx,cx ; count of characters gotten
lea di,buffer ; point destination to buffer
;
bufg01: call cget ; get one character
test flag1,f1eof ; did we get an eof on that try?
jz bufg02 ; yes - let's hope it is an error
or cl,cl ; is there anything in the buffer?
jz bufret ; no - just return with cx=0 and f1eof
mov ah,0dh ; yes - slap a <cr> on the end
call cput ; put it at end of buffer
and flag1,255 - f1eof ; clear the end of file bit
or flag1,f1qeof ; say next time turn on eof for sure
jmp short bufret ; and return this last buffer
;
bufg02: cmp ah,0dh ; is the record terminator character?
je bufg03 ; yes - don't turn that into a space
cmp ah,20h ; no - if not <cr>
jge bufg03 ; if >= 20h then use as is
mov ah,' ' ; otherwise make it a space
;
bufg03: call cput ; write char to output buffer
cmp ah,0dh ; just write the record terminator?
je bufret ; yes - then return
cmp cl,255 ; written 255 characters yet?
jne bufg01 ; no - get the next character
mov byte ptr [di],0dh ; make it a terminator
;
bufret: pop si ; restore the registers
pop di
pop dx
pop bx
pop ax
ret ; and return
;
bufget endp
;
;
;
cget proc near
;
mov dx,glen ; is there any data in gbuff?
or dx,dx ; if count is zero there isn't
jnz cget01 ; there is data so read it
push cx ; save the registers we might need
push di
mov ah,3fh ; DOS function read from standard in
mov bx,0 ; file handle for standard in
mov cx,255 ; number of characters to read
lea dx,gbuff ; point to where to put the data
mov gbptr,dx ; save pointer to first character
int 21h ; call DOS function
mov glen,ax ; save the number of characters read
mov dx,ax ; put data count in dx
pop di ; restore the registers
pop cx
or dx,dx ; did we get data or eof?
jnz cget01 ; data this time
or flag1,f1eof ; set the end of file encountered bit
ret ; and return
;
cget01: mov bx,gbptr ; get pointer to character and return
mov ah,byte ptr [bx]
inc gbptr ; get character and increment pointer
dec glen ; decrement length
inc cl ; count this character
ret ; and return the character
;
cget endp
;
;
;
;
cput proc near
;
mov byte ptr [di],ah ; save the character
inc di ; point to next spot
cmp cl,255 ; will we overreach next time
jne cput01 ; no - good thing
dec di ; yes - can't let that happen
;
cput01: ret ; all done
;
cput endp
;
csect ends
end main