Reverse Code Engineering RCE CD +sandman 2000

home *** CD-ROM | disk | FTP | other *** search

/ Reverse Code Engineering RCE CD +sandman 2000 / ReverseCodeEngineeringRceCdsandman2000.iso / RCE / Library / Manuels & Misc / Assembly / AOA.ZIP / CH25 / IMGPRCS5.ASM < prev next >

Wrap

Assembly Source File | 1994-08-05 | 9.6 KB | 440 lines

; IMGPRCS.ASM ; ; An image processing program (Second optimization pass). ; ; This program blurs an eight-bit grayscale image by averaging a pixel ; in the image with the eight pixels around it. The average is computed ; by (CurCell*8 + other 8 cells)/16, weighting the current cell by 50%. ; ; Because of the size of the image (almost 64K), the input and output ; matrices are in different segments. ; ; Version #1: Straight-forward translation from Pascal to Assembly. ; ; Version #2: Three major optimizations. (1) used movsd instruction rather ; than a loop to copy data from DataOut back to DataIn. ; (2) Used repeat..until forms for all loops. (3) unrolled ; the innermost two loops (which is responsible for most of ; the performance improvement). ; ; Version #3: Used registers for all variables. Set up segment registers ; once and for all through the execution of the main loop so ; the code didn't have to reload ds each time through. Computed ; index into each row only once (outside the j loop). ; ; Version #4: Eliminated copying data from DataOut to DataIn on each pass. ; Removed hazards. Maintained common subexpressions. Did some ; more loop unrolling. ; ; Version #5: Converted data arrays to words rather than bytes and operated ; on 16-bit values. Yielded minimal speedup. ; ; Performance comparisons (66 MHz 80486 DX/2 system). ; ; This code- 2.4 seconds. ; 3rd optimization pass- 2.5 seconds. ; 2nd optimization pass- 4 seconds. ; 1st optimization pass- 6 seconds. ; Original ASM code- 36 seconds. ; Borland Pascal v7.0- 45 seconds. ; Borland C++ v4.02- 29 seconds. ; Microsoft C++ v8.00- 21 seconds. .xlist include stdlib.a includelib stdlib.lib .list .386 option segment:use16 dseg segment para public 'data' ImgData byte 251 dup (256 dup (?)) InName byte "roller1.raw",0 OutName byte "roller2.raw",0 Iterations word 0 dseg ends ; This code makes the naughty assumption that the following ; segments are loaded contiguously in memory! Also, because these ; segments are paragraph aligned, this code assumes that these segments ; will contain a full 65,536 bytes. You cannot declare a segment with ; exactly 65,536 bytes in MASM. However, the paragraph alignment option ; ensures that the extra byte of padding is added to the end of each ; segment. DataSeg1 segment para public 'ds1' Data1a byte 65535 dup (?) DataSeg1 ends DataSeg2 segment para public 'ds2' Data1b byte 65535 dup (?) DataSeg2 ends DataSeg3 segment para public 'ds3' Data2a byte 65535 dup (?) DataSeg3 ends DataSeg4 segment para public 'ds4' Data2b byte 65535 dup (?) DataSeg4 ends cseg segment para public 'code' assume cs:cseg, ds:dseg Main proc mov ax, dseg mov ds, ax meminit mov ax, 3d00h ;Open input file for reading. lea dx, InName int 21h jnc GoodOpen print byte "Could not open input file.",cr,lf,0 jmp Quit ; Optimization modification- read the data into DataOut rather than ; DataIn because we'll move it into DataIn at the beginning of the ; hloop. GoodOpen: mov bx, ax ;File handle. lea dx, ImgData mov cx, 256*251 ;Size of data file to read. mov ah, 3Fh int 21h cmp ax, 256*251 ;See if we read the data. je GoodRead print byte "Did not read the file properly",cr,lf,0 jmp Quit GoodRead: print byte "Enter number of iterations: ",0 getsm atoi free mov Iterations, ax cmp ax, 0 jle Quit printf byte "Computing Result for %d iterations",cr,lf,0 dword Iterations ; Copy the data and expand it from eight bits to sixteen bits. ; The first loop handles the first 32,768 bytes, the second loop ; handles the remaining bytes. mov ax, DataSeg1 mov es, ax mov ax, DataSeg3 mov fs, ax mov ah, 0 mov cx, 32768 lea si, ImgData xor di, di ;Output data is at ofs zero. CopyLoop: lodsb mov fs:[di], ax stosw dec cx jne CopyLoop mov di, DataSeg2 mov es, di mov di, DataSeg4 mov fs, di mov cx, (251*256) - 32768 xor di, di CopyLoop1: lodsb mov fs:[di], ax stosw dec cx jne CopyLoop1 ; hloop completes one iteration on the data moving it from Data1a/Data1b ; to Data2a/Data2b hloop: mov ax, DataSeg1 mov ds, ax mov ax, DataSeg3 mov es, ax ; Process the first 127 rows (65,024 bytes) of the array): mov cl, 127 lea si, Data1a+202h ;Start at [1,1] iloop0: mov ch, 254/2 ;# of times through loop. jloop0: mov dx, [si] ;[i,j] mov bx, [si-200h] ;[i-1,j] mov ax, dx shl dx, 3 ;[i,j] * 8 add bx, [si-1feh] ;[i-1,j+1] mov bp, [si+2] ;[i,j+1] add bx, [si+200h] ;[i+1,j] add dx, bp add bx, [si+202h] ;[i+1,j+1] add dx, [si-202h] ;[i-1,j-1] mov di, [si-1fch] ;[i-1,j+2] add dx, [si-2] ;[i,j-1] add di, [si+4] ;[i,j+2] add dx, [si+1feh] ;[i+1,j-1] add di, [si+204h] ;[i+1,j+2] shl bp, 3 ;[i,j+1] * 8 add dx, bx add bp, ax shr dx, 4 ;Divide by 16. add bp, bx mov es:[si], dx ;Store [i,j] entry. add bp, di add si, 4 ;Affects next store operation! shr bp, 4 ;Divide by 16. dec ch mov es:[si-2], bp ;Store [i,j+1] entry. jne jloop0 add si, 4 ;Skip to start of next row. dec cl jne iloop0 ; Process the last 124 rows of the array). This requires that we switch from ; one segment to the next. Note that the segments overlap. mov ax, DataSeg2 sub ax, 40h ;Back up to last 2 rows in DS2 mov ds, ax mov ax, DataSeg4 sub ax, 40h ;Back up to last 2 rows in DS4 mov es, ax mov cl, 251-127-1 ;Remaining rows to process. mov si, 202h ;Continue with next row. iloop1: mov ch, 254/2 ;# of times through loop. jloop1: mov dx, [si] ;[i,j] mov bx, [si-200h] ;[i-1,j] mov ax, dx shl dx, 3 ;[i,j] * 8 add bx, [si-1feh] ;[i-1,j+1] mov bp, [si+2] ;[i,j+1] add bx, [si+200h] ;[i+1,j] add dx, bp add bx, [si+202h] ;[i+1,j+1] add dx, [si-202h] ;[i-1,j-1] mov di, [si-1fch] ;[i-1,j+2] add dx, [si-2] ;[i,j-1] add di, [si+4] ;[i,j+2] add dx, [si+1feh] ;[i+1,j-1] add di, [si+204h] ;[i+1,j+2] shl bp, 3 ;[i,j+1] * 8 add dx, bx add bp, ax shr dx, 4 ;Divide by 16 add bp, bx mov es:[si], dx ;Store [i,j] entry. add bp, di add si, 4 ;Affects next store operation! shr bp, 4 dec ch mov es:[si-2], bp ;Store [i,j+1] entry. jne jloop1 add si, 4 ;Skip to start of next row. dec cl jne iloop1 mov ax, dseg mov ds, ax assume ds:dseg dec Iterations je Done0 ; Unroll the iterations loop so we can move the data from DataSeg2/4 back ; to DataSeg1/3 without wasting extra time. Other than the direction of the ; data movement, this code is virtually identical to the above. mov ax, DataSeg3 mov ds, ax mov ax, DataSeg1 mov es, ax mov cl, 127 lea si, Data1a+202h iloop2: mov ch, 254/2 jloop2: mov dx, [si] mov bx, [si-200h] mov ax, dx shl dx, 3 add bx, [si-1feh] mov bp, [si+2] add bx, [si+200h] add dx, bp add bx, [si+202h] add dx, [si-202h] mov di, [si-1fch] add dx, [si-2] add di, [si+4] add dx, [si+1feh] add di, [si+204h] shl bp, 3 add dx, bx add bp, ax shr dx, 4 add bp, bx mov es:[si], dx add bp, di add si, 4 shr bp, 4 dec ch mov es:[si-2], bp jne jloop2 add si, 4 dec cl jne iloop2 mov ax, DataSeg4 sub ax, 40h mov ds, ax mov ax, DataSeg2 sub ax, 40h mov es, ax mov cl, 251-127-1 mov si, 202h iloop3: mov ch, 254/2 jloop3: mov dx, [si] mov bx, [si-200h] mov ax, dx shl dx, 3 add bx, [si-1feh] mov bp, [si+2] add bx, [si+200h] add dx, bp add bx, [si+202h] add dx, [si-202h] mov di, [si-1fch] add dx, [si-2] add di, [si+4] add dx, [si+1feh] add di, [si+204h] shl bp, 3 add dx, bx add bp, ax shr dx, 4 add bp, bx mov es:[si], dx add bp, di add si, 4 shr bp, 4 dec ch mov es:[si-2], bp jne jloop3 add si, 4 dec cl jne iloop3 mov ax, dseg mov ds, ax assume ds:dseg dec Iterations je Done2 jmp hloop Done2: mov ax, DataSeg1 mov bx, DataSeg2 jmp Finish Done0: mov ax, DataSeg3 mov bx, DataSeg4 Finish: mov ds, ax print byte "Writing result",cr,lf,0 ; Convert data back to byte form and write to the output file: mov ax, dseg mov es, ax mov cx, 32768 lea di, ImgData xor si, si ;Output data is at ofs zero. CopyLoop3: lodsw stosb dec cx jne CopyLoop3 mov ds, bx mov cx, (251*256) - 32768 xor si, si CopyLoop4: lodsw stosb dec cx jne CopyLoop4 ; Okay, write the data to the output file: mov ah, 3ch ;Create output file. mov cx, 0 ;Normal file attributes. mov dx, dseg mov ds, dx lea dx, OutName int 21h jnc GoodCreate print byte "Could not create output file.",cr,lf,0 jmp Quit GoodCreate: mov bx, ax ;File handle. push bx mov dx, dseg ;Where the data can be found. mov ds, dx lea dx, ImgData mov cx, 256*251 ;Size of data file to write. mov ah, 40h ;Write operation. int 21h pop bx ;Retrieve handle for close. cmp ax, 256*251 ;See if we wrote the data. je GoodWrite print byte "Did not write the file properly",cr,lf,0 jmp Quit GoodWrite: mov ah, 3eh ;Close operation. int 21h Quit: ExitPgm ;DOS macro to quit program. Main endp cseg ends sseg segment para stack 'stack' stk byte 1024 dup ("stack ") sseg ends zzzzzzseg segment para public 'zzzzzz' LastBytes byte 16 dup (?) zzzzzzseg ends end Main