home *** CD-ROM | disk | FTP | other *** search
/ Computer Shopper 275 / DPCS0111DVD.ISO / Toolkit / Audio-Visual / VirtualDub / Source / VirtualDub-1.9.10-src.7z / src / disasm / source / parser.cpp < prev    next >
Encoding:
C/C++ Source or Header  |  2009-09-14  |  9.6 KB  |  348 lines

  1. #include <stdio.h>
  2.  
  3. #include "ruleset.h"
  4. #include "utils.h"
  5.  
  6. void parse_ia(tRuleSystem& rsys, FILE *f) {
  7.     char linebuf[4096];
  8.     ruleset        *pRuleset = NULL;
  9.  
  10.     while(fgets(linebuf, sizeof linebuf, f)) {
  11.         strtrim(linebuf);
  12.  
  13.         if (!linebuf[0] || linebuf[0] == '#')
  14.             continue;
  15.  
  16.         puts(linebuf);
  17.  
  18.         if (linebuf[0] == '%') {            // ruleset definition
  19.             strtrim(linebuf+1);
  20.  
  21.             ruleset r;
  22.             r.name = linebuf+1;
  23.             rsys.push_back(r);
  24.             pRuleset = &rsys.back();
  25.         } else {                            // rule definition
  26.  
  27.             if (!pRuleset)
  28.                 oops("Not in ruleset:\n>%s\n", linebuf);
  29.  
  30.             rule r;
  31.  
  32.             r.rule_line = linebuf;
  33.             r.argcount = 0;
  34.  
  35.             // Find colon
  36.  
  37.             char *colon = linebuf;
  38.  
  39.             while(*colon != ':') {
  40.                 if (!*colon)
  41.                     oops("Colon missing in rule:\n>%s\n", linebuf);
  42.  
  43.                 ++colon;
  44.             }
  45.  
  46.             // Nuke colon
  47.  
  48.             *colon++ = 0;
  49.  
  50.             // Parse tokens until colon is found
  51.  
  52.             static const char whitespace[]=" \t\n\v";
  53.             const char *token = strtok(linebuf, whitespace);
  54.  
  55.             std::vector<bool> argumentTypeStack;        // true if arg is a string
  56.  
  57.             if (token) do {
  58.                 if (*token == '*') {                        // any character
  59.                     if (!r.match_stream.empty() && !r.match_stream.rbegin()->second && r.match_stream.rbegin()->first < 15)
  60.                         ++r.match_stream.rbegin()->first;
  61.                     else {
  62.                         r.match_stream.push_back(std::pair<uint8, uint8>(1,0));
  63.                     }
  64.  
  65.                     argumentTypeStack.push_back(false);
  66.                     ++r.argcount;
  67.                 } else if (*token == '[') {
  68.                     if (!strcmp(token+1, "66]"))
  69.                         r.match_stream.push_back(std::pair<uint8, uint8>(16,0));
  70.                     else if (!strcmp(token+1, "67]"))
  71.                         r.match_stream.push_back(std::pair<uint8, uint8>(17,0));
  72.                     else if (!strcmp(token+1, "F2]"))
  73.                         r.match_stream.push_back(std::pair<uint8, uint8>(18,0));
  74.                     else if (!strcmp(token+1, "F3]"))
  75.                         r.match_stream.push_back(std::pair<uint8, uint8>(19,0));
  76.                     else if (!strcmp(token+1, "!s]"))
  77.                         r.match_stream.push_back(std::pair<uint8, uint8>(20,0));
  78.                     else if (!strcmp(token+1, "q]"))
  79.                         r.match_stream.push_back(std::pair<uint8, uint8>(21,0));
  80.                     else
  81.                         oops("unknown prefix match token '%s'\n", token);
  82.                 } else if (isxdigit((unsigned char)token[0]) && isxdigit((unsigned char)token[1])
  83.                         && (token[2] == '-' || !token[2])) {        // match character
  84.                     int byteval, byteend;
  85.                     int c;
  86.  
  87.                     c = sscanf(token, "%x-%x", &byteval, &byteend);
  88.  
  89.                     if (byteval < 0 || byteval >= 256)
  90.                         oops("uint8 start value out of range\n");
  91.  
  92.                     if (c<2) {
  93.                         byteend = byteval;
  94.                     } else if (byteend != byteval) {
  95.                         if (byteend < 0 || byteend >= 256)
  96.                             oops("uint8 end value out of range\n");
  97.                     }
  98.  
  99.                     r.match_stream.push_back(std::pair<uint8, uint8>(byteval, ~(byteval ^ byteend)));
  100.                     argumentTypeStack.push_back(false);
  101.                     ++r.argcount;
  102.  
  103.                 } else {                                    // macro invocation
  104.                     tRuleSystem::iterator it = rsys.begin();
  105.                     tRuleSystem::iterator itEnd = rsys.end();
  106.                     int index = 128;
  107.  
  108.                     if (*token == '!') {    // reuse last uint8 char
  109.                         index = 192;
  110.                         ++token;
  111.                     }
  112.  
  113.                     for(; it!=itEnd; ++it, ++index) {
  114.                         if (!_stricmp((*it).name.c_str(), token))
  115.                             break;
  116.                     }
  117.  
  118.                     if (it == itEnd)
  119.                         oops("unknown ruleset '%s'\n", token);
  120.  
  121.                     r.match_stream.push_back(std::pair<uint8, uint8>(index, 0));
  122.                     r.argcount += 2;
  123.                     argumentTypeStack.push_back(false);
  124.                     argumentTypeStack.push_back(true);
  125.                 }
  126.             } while(token = strtok(NULL, whitespace));
  127.  
  128.             // match sequence parsed -- parse the result string.
  129.  
  130.             char *s = colon;
  131.  
  132.             for(;;) {
  133.                 while(*s && strchr(whitespace, *s))
  134.                     ++s;
  135.  
  136.                 if (!*s || *s == '#')
  137.                     break;
  138.  
  139.                 if (*s == '"') {    // string literal
  140.                     const char *start = ++s;
  141.  
  142.                     while(*s != '"') {
  143.                         if (!*s)
  144.                             oops("unterminated string constant\n");
  145.  
  146.                         ++s;
  147.                     }
  148.                     
  149.                     r.result.append(start, s-start);
  150.                     ++s;
  151.                 } else if (*s == '$') {    // macro expansion
  152.                     ++s;
  153.  
  154.                     if (!_strnicmp(s, "p_cs", 4)) {
  155.                         r.result += kTarget_p_cs;
  156.                         s += 4;
  157.                     } else if (!_strnicmp(s, "p_ss", 4)) {
  158.                         r.result += kTarget_p_ss;
  159.                         s += 4;
  160.                     } else if (!_strnicmp(s, "p_ds", 4)) {
  161.                         r.result += kTarget_p_ds;
  162.                         s += 4;
  163.                     } else if (!_strnicmp(s, "p_es", 4)) {
  164.                         r.result += kTarget_p_es;
  165.                         s += 4;
  166.                     } else if (!_strnicmp(s, "p_fs", 4)) {
  167.                         r.result += kTarget_p_fs;
  168.                         s += 4;
  169.                     } else if (!_strnicmp(s, "p_gs", 4)) {
  170.                         r.result += kTarget_p_gs;
  171.                         s += 4;
  172.                     } else if (!_strnicmp(s, "p_66", 4)) {
  173.                         r.result += kTarget_p_66;
  174.                         r.is_66 = true;
  175.                         s += 4;
  176.                     } else if (!_strnicmp(s, "p_67", 4)) {
  177.                         r.result += kTarget_p_67;
  178.                         r.is_67 = true;
  179.                         s += 4;
  180.                     } else if (!_strnicmp(s, "p_F2", 4)) {
  181.                         r.result += kTarget_p_F2;
  182.                         r.is_f2 = true;
  183.                         s += 4;
  184.                     } else if (!_strnicmp(s, "p_F3", 4)) {
  185.                         r.result += kTarget_p_F3;
  186.                         r.is_f3 = true;
  187.                         s += 4;
  188.                     } else if (!_strnicmp(s, "ap", 2)) {
  189.                         r.result += kTarget_ap;
  190.                         s += 2;
  191.                     } else if (!_strnicmp(s, "p_rex", 5)) {
  192.                         r.result += kTarget_p_rex;
  193.                         s += 5;
  194.                     } else if (!_strnicmp(s, "return", 6)) {
  195.                         s += 6;
  196.                         r.is_return = true;
  197.                     } else if (!_strnicmp(s, "call", 4)) {
  198.                         s += 4;
  199.                         r.is_call = true;
  200.                     } else if (!_strnicmp(s, "jmp", 3)) {
  201.                         s += 3;
  202.                         r.is_jump = true;
  203.                     } else if (!_strnicmp(s, "jcc", 3)) {
  204.                         s += 3;
  205.                         r.is_jcc = true;
  206.                     } else if (!_strnicmp(s, "imm8", 4)) {
  207.                         s += 4;
  208.                         r.is_imm8 = true;
  209.                     } else if (!_strnicmp(s, "imm16", 5)) {
  210.                         s += 5;
  211.                         r.is_imm16 = true;
  212.                     } else if (!_strnicmp(s, "imm32", 5)) {
  213.                         s += 5;
  214.                         r.is_imm32 = true;
  215.                     } else if (!_strnicmp(s, "invalid", 7)) {
  216.                         s += 7;
  217.                         r.is_invalid = true;
  218.                     } else {
  219.                         unsigned long id = strtoul(s, &s, 10);
  220.  
  221.                         if (!id || (int)id > r.argcount)
  222.                             oops("macro argument $%lu out of range\n", id);
  223.  
  224.                         if (!r.result.empty() && *r.result.rbegin() == ' ')
  225.                             *r.result.rbegin() = (char)(id + 0x80);
  226.                         else
  227.                             r.result += (char)id;
  228.  
  229.                         int firstbit = 0;
  230.                         int lastbit = 7;
  231.  
  232.                         if (*s == '[') {
  233.                             ++s;
  234.  
  235.                             firstbit = strtol(s, &s, 10);
  236.  
  237.                             if (*s++ != '-')
  238.                                 oops("macro argument bitfield range missing '-'\n");
  239.  
  240.                             lastbit = strtol(s, &s, 10);
  241.  
  242.                             if (firstbit < 0 || lastbit > 7 || firstbit > lastbit)
  243.                                 oops("invalid bitfield %d-%d\n", firstbit, lastbit);
  244.  
  245.                             if (*s++ != ']')
  246.                                 oops("invalid bitfield\n");
  247.                         }
  248.  
  249.                         if (!*s)
  250.                             oops("macro expansion missing format\n");
  251.  
  252.                         char *t = s;
  253.  
  254.                         while(*t && !isspace((unsigned char)*t))
  255.                             ++t;
  256.  
  257.                         *t = 0;
  258.  
  259.                         char control_byte;
  260.                         char ext_byte = 0;
  261.  
  262.                         if (!_stricmp(s, "r32")) {
  263.                             control_byte = kTarget_r32;
  264.                         } else if (!_stricmp(s, "r16")) {
  265.                             control_byte = kTarget_r16;
  266.                         } else if (!_stricmp(s, "r1632")) {
  267.                             control_byte = kTarget_r1632;
  268.                         } else if (!_stricmp(s, "r8")) {
  269.                             control_byte = kTarget_r8;
  270.                         } else if (!_stricmp(s, "rm")) {
  271.                             control_byte = kTarget_rm;
  272.                         } else if (!_stricmp(s, "rx")) {
  273.                             control_byte = kTarget_rx;
  274.                         } else if (!_stricmp(s, "rmx")) {
  275.                             control_byte = kTarget_rmx;
  276.                         } else if (!_stricmp(s, "rc")) {
  277.                             control_byte = kTarget_rc;
  278.                         } else if (!_stricmp(s, "rd")) {
  279.                             control_byte = kTarget_rd;
  280.                         } else if (!_stricmp(s, "rs")) {
  281.                             control_byte = kTarget_rs;
  282.                         } else if (!_stricmp(s, "rf")) {
  283.                             control_byte = kTarget_rf;
  284.                         } else if (!_stricmp(s, "x")) {
  285.                             control_byte = kTarget_x;
  286.                         } else if (!_stricmp(s, "hx")) {
  287.                             control_byte = kTarget_hx;
  288.                         } else if (!_stricmp(s, "lx")) {
  289.                             control_byte = kTarget_lx;
  290.                         } else if (!_stricmp(s, "o")) {
  291.                             control_byte = kTarget_o;
  292.                         } else if (!_stricmp(s, "ho")) {
  293.                             control_byte = kTarget_ho;
  294.                         } else if (!_stricmp(s, "lo")) {
  295.                             control_byte = kTarget_lo;
  296.                         } else if (!_stricmp(s, "a")) {
  297.                             control_byte = kTarget_a;
  298.                         } else if (!_stricmp(s, "ha")) {
  299.                             control_byte = kTarget_ha;
  300.                         } else if (!_stricmp(s, "la")) {
  301.                             control_byte = kTarget_la;
  302.                         } else if (!_stricmp(s, "s")) {
  303.                             control_byte = kTarget_s;
  304.                         } else if (!_stricmp(s, "r3264")) {
  305.                             control_byte = kTarget_r3264;
  306.                         } else if (!_stricmp(s, "r163264")) {
  307.                             control_byte = kTarget_r163264;
  308.                         } else if (!_stricmp(s, "r3264rexX")) {
  309.                             control_byte = kTarget_ext;
  310.                             ext_byte = kTarget_ext_r3264rexX;
  311.                         } else if (!_stricmp(s, "r3264rexB")) {
  312.                             control_byte = kTarget_ext;
  313.                             ext_byte = kTarget_ext_r3264rexB;
  314.                         } else if (!_stricmp(s, "r163264rexB")) {
  315.                             control_byte = kTarget_ext;
  316.                             ext_byte = kTarget_ext_r163264rexB;
  317.                         } else {
  318.                             oops("unknown macro expansion mode: '%s'\n", s);
  319.                         }
  320.  
  321.                         if (argumentTypeStack[id-1] != (control_byte == kTarget_s))
  322.                             oops("bad argument type: $%d (not a %s)\n", id, argumentTypeStack[id-1] ? "uint8" : "string");
  323.  
  324.                         if (firstbit == 0 && lastbit == 2) {
  325.                             r.result += (char)(control_byte + 0x20);
  326.                         } else if (firstbit == 3 && lastbit == 5) {
  327.                             r.result += (char)(control_byte + 0x40);
  328.                         } else if (firstbit != 0 || lastbit != 7) {
  329.                             r.result += (char)(control_byte + 0xe0);
  330.                             r.result += (char)((lastbit+1-firstbit)*16 + firstbit);
  331.                         } else {
  332.                             r.result += (char)control_byte;
  333.                         }
  334.  
  335.                         if (ext_byte)
  336.                             r.result += (char)ext_byte;
  337.  
  338.                         s = t+1;
  339.                     }
  340.                 } else
  341.                     oops("indecipherable result string\n");
  342.             }
  343.  
  344.             pRuleset->rules.push_back(r);
  345.         }
  346.     }
  347. }
  348.