home *** CD-ROM | disk | FTP | other *** search
- TITLE CrLf Unix<>DOS EOL converter
-
- Comment ~
- Usage: CRLF [-r] file1 [>output]
- -r Convert from DOS CR/LF End Of Line (EOL) to Unix LF EOL.
- Default output is STDOUT (e.g., redirectable).
-
- Written to replace the C version by Steve Creps (which was a slug)
- Given to the public domain, 7 Jul 89
-
- David Kirschbaum
- Toad Hall
- kirsch@braggvax.ARPA
-
- v1.1, 3 Aug 89
- - Bug report from Erich Neuwirth <A4422DAB%AWIUNI11.BITNET@CUNYVM.CUNY.EDU>
- Unix -> DOS conversion breaks down at the BUFFSIZE boundary.
- Found bug when writing output buffer .. a constant CR or LF (in AH)
- was being lost by destroying AX during the output write.
- Fixed. Thanks, Erich.
-
- - While I was at it, relocated dynamic file buffers
- to overlay the startup code and messages.
-
- - Changed input and output buffer sizes to eliminate the multiple tests
- of the ES:DI output buffer pointer during conversion.
-
- Now we read an input buffer-full, process it to the output buffer,
- and then write the full output buffer .. not worrying about overrunning
- the output buffer end (and our stack!)
-
- This overrun problem would only arise during Unix --> DOS conversions
- (where output is longer than input).
- There's no telling just HOW much longer the output will be than the
- input .. worst case is input * 2 (a file of all CRs converted to
- a file of all CR/LFs).
-
- Assuming worst case, we're making our output buffer TWICE the size
- of the input buffer (most unlikely .. but still ..).
-
- Surprisingly, we've gained no significant speed! The more frequent
- input file reads offset the increase in processing speed .. sigh ...
-
- Toad Hall
-
- Comment ends ~
-
-
- CR EQU 0DH
- LF EQU 0AH
- FALSE EQU 0
- TRUE EQU NOT FALSE
-
- STDOUT EQU 1 ;DOS Std Out
- STDERR EQU 2 ;DOS Std Err
- ;BUFFSIZE EQU 30000 ;likely buffer size (arbitrary)
- BUFFSIZE EQU 20000 ;input buffer size v1.1
- ;output has space for twice that much
-
- CSEG SEGMENT PARA PUBLIC 'CODE'
- ASSUME CS:CSEG,DS:CSEG,ES:CSEG
-
- org 80H
- cmdline label byte
-
- org 100H
-
- CrLf proc near
- jmp Start ;skip over data
-
- handle dw 0 ;input file handle
-
- eofFlag db FALSE ;non-0 if EOF
- addcr db LOW(TRUE) ;assume we're converting LF to CR/LF
-
- unix$ db ' Unix --> DOS',CR,LF,0
- dos$ db ' DOS --> Unix',CR,LF,0
-
- u2d_warn$ db 'Warning: '
- db 'Unix file has CRs!',CR,LF,0 ;Unix -> DOS, but HAS CRs!
- d2u_warn$ db 'Warning: '
- db 'DOS file has no CRs!',CR,LF,0 ;DOS -> Unix, but no CRs!
-
- openErr$ db 'Can''t open target file',0 ;input file open err msg
- readErr$ db 'Input file read error',0 ;input file read err msg
- outErr$ db 'Output error',0 ;output write error msg
- switchErr$ db 'Unknown switch',0 ;error msg if not -r switch
- prompt$ db 'Continue? [Y/N]: ',0 ;prompt string
- abort$ db 'User abort',0 ;abort msg
-
- CrLf endp
-
-
- Start proc near
-
- call Parse_CmdLine ;get any switches,
- ;prepare target filename
- jc Msg_Exit ;no action, DX -> error msg
-
- ;else DX -> target filename's first character
-
- mov ax,3D00H ;open file, read only
- int 21H
- mov dx,offset openErr$ ;'Can't open target file'
- jc Msg_Exit ;open failed, die
-
- mov handle,ax ;save input file handle
-
- call Test_Buffer ;initial input file read,
- ;test for funny input file EOLs
- jnc Read_1 ;ok, skip the read/EOF test seq v1.1
-
- jmp short Read_Error ;first read failed, or empty file v1.1
-
- ;We loop here, processing each buffer full, until EOF or file error.
-
- Read_Lup:
- mov si,offset INBUFF ;DS:SI -> input buffer base
-
- cmp eofFlag,FALSE ;hit input EOF yet?
- jnz Flush ;yep, flush any remaining processed
- ;chars, terminate.
-
- mov dx,si ;read into input buffer (DS:SI)
- mov cx,BUFFSIZE ;try for a full buffer's worth
- mov bx,handle ;input file handle
- mov ah,3FH ;read from file/device
- int 21H
- jc Read_Error ;read failed
-
- or ax,ax ;read anything?
- jz Flush ;nope, flush any remaining processed
- ;chars, terminate.
-
- cmp ax,cx ;read all we requested?
- adc eofFlag,0 ;will make flag non-0 if EOF
- mov cx,ax ;CX = input buffer count
-
- Read_1:
- call Process_Buff ;convert input buffer EOLs
- call Write_Output ;write output buffer bytes v1.1
- jnb Read_Lup ;went ok v1.1
- jmp short Write_Error ;CF means output write failed v1.1
-
- Flush:
- call Write_Output ;write any output buffer bytes
- jc Write_Error ;failed
- xor ax,ax ;ERRORLEVEL=0
- jmp short Terminate
-
-
- Read_Error:
- mov dx,offset readErr$ ;'Input file read error'
- jmp short Msg_Exit ;terminate
-
- Write_Error:
- mov dx,offset outErr$ ;'Output error'
- ;fall thru to...
-
- ;Come here with any messages in DX
- Msg_Exit:
- push ax ;save any errors in AL
- call Write_StdErr ;output to StdErr (console)
- pop ax
-
- Terminate:
- mov ah,4CH ;terminate (errorlevel in AL)
- int 21H ;we let DOS close the input file.
-
- Start endp
-
-
- ;-- Make initial test of input buffer.
- ; Depending on type conversion, gives user a warning
- ; if there are unexpected EOLs in the first bufferfull.
-
- Test_Buffer proc near
-
- mov dx,offset INBUFF ;read into input buffer
- mov cx,BUFFSIZE ;try for a full buffer's worth
- mov bx,handle ;input file handle
- mov ah,3FH ;read from file/device
- int 21H
- jc TB_Ret ;read failed, return CF set
-
- or ax,ax ;read anything?
- jnz TB_1 ;yep, continue
- stc ;zero contents ..
- ret ; return CF set
-
- TB_1:
- cmp ax,cx ;read all we requested?
- adc eofFlag,0 ;will make flag non-0 if EOF
- mov cx,ax ;CX = input buffer count
-
- push cx ;save buffer size
- mov al,CR ;scan for CRs
- mov di,dx ;offset INBUFF ;input buffer start
- mov si,dx ;offset INBUFF ;may as well prepare SI
- repne scasb ;look for a CR
- pop cx ;restore
- pushf ;save those results
-
-
- cmp addCr,FALSE ;adding CRs? (Unix -> DOS)
- jz TB_D2U ;nope, DOS -> Unix
-
- ;We're doing Unix -> DOS.
- ;If there's a CR in the input buffer, maybe this is NOT a Unix file!
- ;Warn the user.
-
- mov dx,offset u2d_warn$ ;'Warning: Unix file has CRs!'
- popf ;restore the scasb flag
- jnz TB_Ok ;no CRs, ok
- jmp short TB_Warn ;there WAS a CR.
- ;Display warning msg, return
-
- ;We're doing DOS -> Unix.
- ;If there are no CRs in the input buffer, maybe this is NOT a DOS file!
- ;Warn the user.
-
- TB_D2U:
- mov dx,offset d2u_warn$ ;'Warning: DOS file has no CRs!'
- popf ;restore the scasb flag
- jz TB_Ok ;there WAS a CR, ok.
-
- ;Common warning routine for both conversion modes
- TB_Warn:
- call Write_StdErr ;display warning msg
- mov dx,offset prompt$ ;'Continue? [Y/N]: '
- call Write_StdErr ;display prompt
- mov ax,0C08H ;clear kbd, kbd input w/o echo
- int 21H
- and al,5FH ;uppercase response
- cmp al,'Y' ;Yes, continue?
- jz TB_Ok ;yep
- pop ax ;clear the call
- mov al,1 ;ERRORLEVEL 1
- mov dx,offset abort$ ;'User abort'
- jmp Msg_Exit ;display, terminate
-
- TB_Ok:
- mov di,offset OUTBUFF ;ES:DI -> output buffer base
- clc ;but return CF clear
- TB_Ret:
- ret
-
- Test_Buffer endp
-
-
- ;-- Tests type conversion, jumps to appropriate conversion procedure.
-
- Process_Buff proc near
-
- mov bx,offset Unix_To_Dos ;assume Unix -> DOS conversion
- cmp addcr,TRUE ;adding CRs? (Unix -> DOS)
- jz PB_Jump ;yep
- mov bx,offset Dos_To_Unix ;nope, CR/LF to LF conversion
- PB_Jump:
- jmp bx ;return from whichever procedure
-
- Process_Buff endp
-
-
- ;-- Converts DOS CR/LF EOLs to Unix-style EOLs (LF)
- ; DS:SI -> input buffer start
- ; ES:DI -> next free output buffer byte
- ; CX = bytes read (e.g., size of input buffer)
- ; Destroys most everything
- ; Preserves DI (output buffer pointer)
- ; Removed output buffer overrun testing at every byte.
-
- Dos_To_Unix proc near
-
- mov ah,CR ;handy constant
-
- D2U_Lup:
- lodsb ;snarf input byte
- cmp al,ah ;CR ;DOS EOL first char?
- jz D2U_Relup ;yep, gobble that CR
- stosb ;stuff normal char or LF
- D2U_Relup:
- loop D2U_Lup ;do all the input characters.
- ret
-
- Dos_To_Unix endp
-
-
- ;-- Converts Unix-style EOLs (LF) to normal DOS CR/LF EOL
- ; DS:SI -> input buffer start
- ; ES:DI -> next free output buffer byte
- ; CX = bytes read (e.g., size of input buffer)
- ; Destroys most everything.
- ; Preserves DI
- ;v1.1 Added CR/LF word stuffing. Some code is redundant,
- ; but this maximizes speed at minimal code increase.
- ; Removed output buffer overrun testing at every byte.
-
- Unix_To_Dos proc near
-
- mov ah,LF ;handy constant
-
- U2D_Lup:
- lodsb ;snarf input byte
- cmp al,ah ;LF ;Unix EOL?
- jz U2D_EOL ;yep
- stosb ;stuff normal char
- loop U2D_Lup ;reloop
- ret
-
- U2D_EOL:
- mov al,CR ;stuff CR/LF
- stosw ;as a word
- loop U2D_Lup ;redundant code, but faster
- ret
-
- Unix_To_Dos endp
-
-
- ;-- Write output buffer to StdOut
- ; Output buffer size may be bigger (Unix --> DOS)
- ; or smaller (DOS --> Unix) than input buffer size.
- ; Return CF set if error (with error in AX)
- ; Destroys AX,BX,DX
- ; Returns DI -> output buffer start
-
- Write_Output proc near
-
- mov dx,offset OUTBUFF ;output buffer start
- mov ax,di ;output buffer's last byte+1
- sub ax,dx ;last byte (+1) - start=bytes to write
- ja Write_Out1 ;ok, we have output to write
- xor ax,ax ;nothing to write
- clc ;insure CF clear
- ret
-
- Write_Out1:
- push cx ;preserve CX
- mov cx,ax ;bytes to write
- mov bx,STDOUT ;output to StdOut
- mov ah,40H ;write to file/device
- int 21H
- pop cx ;restore CX
- mov di,dx ;ES:DI -> output buffer start
- ret ;CF set if write error
-
- Write_Output endp
-
-
- ;-- Enter with DS:DX -> AsciiZ message.
- ; Writes msg to StdErr
- ; Destroys AX,BX
-
- Write_StdErr proc near
-
- push di
- push cx
-
- xor al,al ;scan for AsciiZ 0
- mov cx,0FFFFH ;max scan
- mov di,dx ;ES:DI -> message's first char
- repne scasb ;find AsciiZ 0
- not cx ;flip, CX = msg length
-
- mov bx,STDERR ;write to StdErr
- mov ah,40H ;write to file/device
- int 21H
-
- pop cx
- pop di
- ret
-
- Write_StdErr endp
-
-
- ;Runtime file buffers start here,
- ;and will overwrite startup code (Parse_CmdLine)
- ;and usage message.
-
- EVEN ;make it easy for 8086 family
-
- INBUFF label byte ;input buffer start
- OUTBUFF EQU INBUFF + BUFFSIZE ;output buffer start
- ;v1.1 OUTBUFF has about BUFFSIZE*2 bytes to play with.
- ;OUTBUFFEND EQU OUTBUFF+BUFFSIZE ;mark output buffer end
-
- usage$ db 'CRLF v1.1 - Convert Unix LF line endings to DOS CR/LF endings.'
- db CR,LF
- db 'Usage: CRLF [-r] filename.typ [>output]',CR,LF
- db 'Where',CR,LF
- db ' -r reverses the operation (CR/LF to LF)',CR,LF
- db ' filename.typ is the target filename',CR,LF
- db 'Default output is to STDOUT (redirect to any file/device).'
- db CR,LF,0
-
-
- ;-- Parse PSP command line for -r switch and target filename.
- ; Return CF set if errors, no output, whatever.
-
- Parse_CmdLine proc near
-
- mov si,offset cmdline ;PSP cmdline length byte
- xor ah,ah ;clear msb
- lodsb ;snarf length byte
- mov cx,ax ;CX=cmdline length
- mov dx,offset usage$ ;assume no cmdline
- jcxz PC_Bad ;return CF set
-
- call Next_Char ;gobble any spaces, tabs
- jcxz PC_Bad ;went illegal
-
- ;AL = first real cmdline char
- ;SI -> next cmdline char
- ;CX = remaining cmdline length
-
- cmp al,'-' ;got a switch?
- jz PC_Switch ;yep
- cmp al,'/' ;be nice, test for other switch
- jz PC_Switch
- cmp al,'?' ;asking for help?
- jz PC_Bad ;yep, DX -> usage msg
- jmp short PC_FileName ;should be target filename's first char
-
- ;We got a switch
- PC_Switch:
- call Next_Char ;get next char
- jcxz PC_Bad ;usage, die
-
- mov dx,offset switchErr$ ;'Unknown switch'
- and al,5FH ;uppercase
- cmp al,'R' ;we only take 'R' switches for now
- jnz PC_Bad ;bad
-
- mov dx,offset usage$ ;if no filename, usage msg
- not addCr ;flip flag to CR/LF -> LF conversion
- call Next_Char ;filename should be next
- jcxz PC_Bad
-
- PC_FileName:
- dec si ;back up to filename's first char
- mov dx,si ;remember in DX
- mov cx,80H ;should be long enough!
- mov ah,CR ;look for terminating CR
- PC_FNLup:
- lodsb ;snarf next char
- cmp al,ah ;CR ;hit CR?
- jnz PC_FNLup ;nope
- dec si ;back up to the CR
- mov byte ptr [si],0 ;AsciiZe it
-
- call Write_StdErr ;display filename
- push dx ;save filename ptr
- mov dx,offset unix$ ;assume 'Unix -> DOS' EOL conversion
- cmp addCr,LOW(TRUE) ;true?
- jz PC_1 ;yep
- mov dx,offset dos$ ;'DOS -> Unix'
- PC_1:
- call Write_StdErr ;display msg
- pop dx ;restore filename ptr
- clc ;return CF clear
- ret
-
- PC_Bad:
- stc ;return CF set for failure
- ret ;DX -> error msg
-
-
- ;Parse_CmdLine subroutine
-
- Next_Char:
- jcxz NC_Ret ;cmdline zeroed out, return
-
- NC_Lup:
- lodsb ;snarf cmdline char
- cmp al,' ' ;space?
- jz NC_ReLup ;yep, gobble
- cmp al,9 ;tab?
- jz NC_ReLup ;yep, gobble
- cmp al,CR ;CR terminates
- jnz NC_Ret ;normal char, return
-
- NC_ReLup:
- loop NC_Lup
- NC_Ret:
- ret
-
-
- Parse_CmdLine endp
-
- CSEG ENDS
- END CrLf