home *** CD-ROM | disk | FTP | other *** search
- #
- # AWK script to create a header file from an assembly language
- # source (Intel x86), listing labels and variables by segment.
- # Morten Elling, April 1995 + May 1997.
- #
- # Usage
- # awk -f ash.awk [NEARS=1] [TYPES=0] [IDEAL=1] file.asm [>output.fil]
- # NEARS=1 : INclude non-local near code labels (default = 0)
- # TYPES=0 : EXclude struc, record, enum types (default = 1)
- # IDEAL=1 : Assume Ideal syntax (auto-detect = default = 0)
- #
- # Notes
- # Despite several limitations, this script can be a time saver
- # by providing a list of labels, procedures, and variables per
- # segment in the form of global (externdef) declarations.
- #
- # Doesn't understand conditional blocks, includes, or line
- # continuation. Ignores comments, macros, and repeat blocks.
- # Understands generic or simplified segmentation, or mixed,
- # but does not properly handle:
- # - [generic] nested segments
- # - [simplified] named far segments (e.g. .code fharcode)
- # - segment ordering (due to AWK's associative arrays)
- # Understands MASM and Ideal mode syntax, strucs and records.
- # Treats 'sym: db 1' as a byte label if NEARS = 0.
-
-
- ##### Make set array from string
- function zsplit(str, arr ,i, j ,temp) { # i,j,temp = locals
- j = split(str, temp);
- for (i = 1; i <= j; i++) arr[temp[i]] = 1;
- }
-
-
- BEGIN { # Initialize; AWK zero-inits all variables.
- TYPES = 1; # Overridden by cmd. line if appropriate
- NOSEG = "unknown";
-
- print "Just a minute, Admiral" >"CON"
- printf ("\n; %s\n", tolower(ARGV[ARGC-1]) ); # Filespec
-
-
- segdir = ".code .data .const .data? .stack .fardata .fardata?" \
- " codeseg dataseg const udataseg stack fardata ufardata";
- labeldir = "proc label ";
- typdir = "struc record union enum typedef ";
- data2dir = "db dw dd df dp dq dt byte word dword" \
- " sbyte sword sdword real4 real8 real10";
- idealdir = "proc label ideal "; # 1st token on a line
- distspec = "near far near16 near32 far16 far32";
-
- # Make sets of directive names
- zsplit(segdir, cannedsegset);
- zsplit(labeldir, labelset);
- zsplit(typdir , typeset);
- zsplit(data2dir, data2set);
- zsplit(idealdir, idealset);
- zsplit(distspec, distset);
-
-
- curseg = NOSEG;
- symcount[curseg] = 1;
- }
-
- { ##### Main loop begins #######################
-
- if ($1 ~ /^;/) next; # Skip comment lines
- if ($1 ~ /^%$/) $1 = ""; # Strip immed. macro
-
- tok1 = tolower($1); # Lowercase field no. 1
- tok2 = tolower($2); # Lowercase field no. 2
-
-
- ##### Ideal/MASM mode change
- if (!IDEAL)
- IDEAL = (tok1 in idealset); # Auto-detect
- if (IDEAL)
- { tokk = tok1; toki = $2; } # Keyword before identifier
- else
- { tokk = tok2; toki = $1; } # Identifier before keyword
-
-
- ##### Ignore comment
- if (!IDEAL && tok1 == "comment") {
- tmp = substr($2,1,1);
- while (getline == 1) # Read until
- if (index($0, tmp)) break; # end of comment
- next; # Skip to next line
- }
-
- ##### Ignore macro/rept/irp
- if (tokk == "macro" || tok1 == "rept" || tok1 == "irp") {
- while (getline == 1) # Read until "endm"
- if ($1 ~ /^(E|e)(N|n)(D|d)(M|m)$/) break;
- next; # Skip to next line
- }
-
-
- ##### Segment directive
- if (tokk == "segment" || tok1 in cannedsegset) {
- if (tokk == "segment") curseg = toki
- else curseg = toupper(tok1); # Set current segment
- if (symcount[curseg] == 0)
- symcount[curseg]++; # Need this for void segs
- next; # Skip to next line
- }
-
-
- ##### Proc/label declaration
- if (tokk in labelset) {
- i = ++symcount[curseg]; # Step symbol counter
- syms[curseg,i] = toki; # Add symbol name to array
- gtyp = (($3 != "") ? $3 : "unknown"); # Get distance/type, if any
- if ((tokk == "proc") &&
- !(tolower(gtyp) in distset)) # Default to model-
- gtyp = "proc"; # dependent size ("proc")
- syms[curseg,i,typ] = gtyp; # Set type
- next; # Skip to next line
- }
-
-
- if (NEARS) {
- ##### Non-local near code label
- if ($1 ~ /^[A-Za-z_$?][A-Za-z_$?0-9@]*:$/ ) {
- i = ++symcount[curseg]; # Step symbol counter
- syms[curseg,i] = \
- substr($1, 1, index($1,":")-1); # Add symbol name to array
- syms[curseg,i,typ] = "Near"; # Distance is near
- next; # Skip to next line
- }
- } # endif (NEARS)
-
-
- ##### Data allocation (Ideal and MASM syntax identical)
- # (Avoid false match on "mov dword ptr memvar, eax")
- if ((tok2 in data2set) && ($3 !~ /^(P|p)(T|t)(R|r)$/)) {
- i = ++symcount[curseg]; # Step symbol counter
- syms[curseg,i] = $1; # Add symbol name to array
- if (tok2 == "db") gtyp = "byte"
- else if (tok2 == "dw") gtyp = "word"
- else if (tok2 == "dd") gtyp = "dword"
- else if (tok2 == "df") gtyp = "fword"
- else if (tok2 == "dp") gtyp = "pword"
- else if (tok2 == "dq") gtyp = "qword"
- else if (tok2 == "dt") gtyp = "tbyte"
- else if (tok2 == "byte") gtyp = "BYTE"
- else if (tok2 == "word") gtyp = "WORD"
- else if (tok2 == "dword") gtyp = "DWORD"
- else if (tok2 == "sbyte") gtyp = "SBYTE"
- else if (tok2 == "sword") gtyp = "SWORD"
- else if (tok2 == "sdword") gtyp = "SDWORD"
- else if (tok2 == "real4") gtyp = "REAL4"
- else if (tok2 == "real8") gtyp = "REAL8"
- else if (tok2 == "real10") gtyp = "REAL10"
- else gtyp = "unknown";
- syms[curseg,i,typ] = gtyp; # Set data type
- next; # Skip to next line
- }
-
-
- if (TYPES) {
- ##### Struc/record/enum definition
- if (tokk in typeset) {
- typs[++typecount] = tolower(toki); # Add type name to array
- if (tokk=="struc" || tokk=="union") {
- while (getline == 1) # Read until "ends"
- if ($2 ~ /^(E|e)(N|n)(D|d)(S|s)$/ ||
- $1 ~ /^(E|e)(N|n)(D|d)(S|s)$/) break;
- }
- next; # Skip to next line
- }
-
- ##### Struc/record/enum allocation
- j = 1;
- while (j <= typecount) # See if token is
- if (tok2 == typs[j++]) { # a struc, record etc.
- i = ++symcount[curseg]; # Step counter
- syms[curseg,i] = $1; # Add symbol name to array
- syms[curseg,i,typ] = $2; # Set data type
- }
- } # endif (TYPES)
-
-
- } ##### Main loop ends #########################
- END {
- ##### For each segment, print results on the form:
- # segname [SEGMENT]
- # global sym_name :sym_type
- # [segname ENDS]
-
- if (IDEAL) printf("; ideal\n")
- else printf("; global equ externdef\n");
- for (curseg in symcount) {
- if (curseg == NOSEG && symcount[curseg] == 1) continue;
- pad = (length(curseg) < 8 ? "\t" : " ");
- if (tolower(curseg) in cannedsegset) {
- printf ("\n\t%s\n", curseg) }
- else {
- if (IDEAL) printf ("\n\tSEGMENT\t%s\n", curseg)
- else printf ("\n\t%s%sSEGMENT\n", curseg, pad);
- }
- for (i = 2; i <= symcount[curseg]; i++) # 1st is void
- printf "\tglobal\t%-24s%s\n",
- syms[curseg,i], ":" syms[curseg,i,typ];
- if (!(tolower(curseg) in cannedsegset)) {
- if (IDEAL) { printf ("\tENDS\t%s\n", curseg) }
- else { printf ("\t%s%sENDS\n", curseg, pad) }
- }
- } # endfor
- } # eof
-