home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 2 / AACD 2.iso / AACD / Programming / jikes-1.02 / src / gencode.java < prev    next >
Encoding:
Java Source  |  1999-08-26  |  21.9 KB  |  484 lines

  1. import java.lang.Integer;
  2. import java.lang.Character;
  3. import java.io.PrintStream;
  4. import java.io.FileOutputStream;
  5.  
  6. class gencode
  7. {
  8.     static final int NEWLINE_CODE      = 1; // \n, \r
  9.     static final int SPACE_CODE        = 2; // \t, \v, \f, ' '
  10.     static final int BAD_CODE          = 3; // everything not covered by other codes ...
  11.     static final int DIGIT_CODE        = 4; // '0'..'9'                    
  12.     static final int OTHER_DIGIT_CODE  = 5; // all unicode digits
  13.     static final int LOWER_CODE        = 6; // 'a'..'z'
  14.     static final int UPPER_CODE        = 7; // 'A'..'Z'
  15.     static final int OTHER_LETTER_CODE = 8; // '$', '_', all other unicode letters
  16.  
  17.     static final int LOG_BASE_SIZE       = 9; // must be a value between 0..16
  18.     static final int LOG_COMPLEMENT_SIZE = (16 - LOG_BASE_SIZE);
  19.     static final int BASE_SIZE           = (1 << LOG_BASE_SIZE);
  20.     static final int SLOT_SIZE           = (1 << LOG_COMPLEMENT_SIZE);
  21.     static final int SLOT_MASK           = (SLOT_SIZE - 1);
  22.  
  23.     static final int BaseIndex(int i) { return i >> LOG_COMPLEMENT_SIZE; }
  24.     static final int DataIndex(int i) { return i & SLOT_MASK; }
  25.  
  26.     static public void main(String args[]) throws java.io.FileNotFoundException, java.io.IOException
  27.     {
  28.         int num_elements = 65536,
  29.             num_slots = BASE_SIZE;
  30.  
  31.         byte base[][] = new byte[BASE_SIZE + 1][SLOT_SIZE];
  32.         if (LOG_BASE_SIZE > 0 && LOG_BASE_SIZE < 16)
  33.         {
  34.             for (int i = 0; i < SLOT_SIZE; i++)
  35.                 base[BASE_SIZE][i] = BAD_CODE;
  36.             num_slots++;
  37.         }
  38.  
  39.         for (int i = 0; i < 65536; i++)
  40.         {
  41.             char a = (char) i;
  42.  
  43.             if (a == '\n' || a == '\r')
  44.                  base[BaseIndex(i)][DataIndex(i)] = NEWLINE_CODE;
  45.             else if (Character.isWhitespace(a))
  46.                  base[BaseIndex(i)][DataIndex(i)] = SPACE_CODE;
  47.             else if (a < 128 && Character.isLowerCase(a)) // Ascii lower case
  48.                  base[BaseIndex(i)][DataIndex(i)] = LOWER_CODE;
  49.             else if (a < 128 && Character.isUpperCase(a)) // Ascii upper case
  50.                  base[BaseIndex(i)][DataIndex(i)] = UPPER_CODE;
  51.             else if (a < 128 && Character.isDigit(a)) // Ascii digit
  52.                  base[BaseIndex(i)][DataIndex(i)] = DIGIT_CODE;
  53.             else if (Character.isJavaIdentifierStart(a))
  54.                  base[BaseIndex(i)][DataIndex(i)] = OTHER_LETTER_CODE;
  55.             else if (Character.isJavaIdentifierPart(a))
  56.                  base[BaseIndex(i)][DataIndex(i)] = OTHER_DIGIT_CODE;
  57.             else
  58.             {
  59.                  base[BaseIndex(i)][DataIndex(i)] = BAD_CODE;
  60.                  num_elements--;
  61.             }
  62.         }
  63.  
  64.         for (int i = 0; i < BASE_SIZE; i++)
  65.         {
  66.             int k;
  67.             for (k = 0; k < SLOT_SIZE; k++)
  68.             {
  69.                 if (base[i][k] != BAD_CODE)
  70.                     break;
  71.             }
  72.  
  73.             if (k == SLOT_SIZE)
  74.             {
  75.                 base[i] = base[BASE_SIZE];
  76.                 num_slots--;
  77.             }
  78.         }
  79.  
  80.         //
  81.         // Process the code.h file
  82.         //
  83.         PrintStream hfile = new PrintStream(new FileOutputStream("code.h"));
  84.  
  85.         //
  86.         // Process the code.cpp file
  87.         //
  88.         PrintStream cfile = new PrintStream(new FileOutputStream("code.cpp"));
  89.  
  90.         if (LOG_BASE_SIZE > 0 && LOG_BASE_SIZE < 16)
  91.         {
  92.             hfile.println("#ifndef code_INCLUDED");
  93.             hfile.println("#define code_INCLUDED");
  94.             hfile.println();
  95.             hfile.println("#include \"config.h\"");
  96.             hfile.println("#include <ctype.h>");
  97.             hfile.println("#include <assert.h>");
  98.             hfile.println();
  99.             hfile.println("class Code");
  100.             hfile.println("{");
  101.             hfile.println("    //");
  102.             hfile.println("    // To facilitate the scanning, the character set is partitioned into");
  103.             hfile.println("    // 8 classes using the array CODE. The classes are described below");
  104.             hfile.println("    // together with some self-explanatory functions defined on CODE.");
  105.             hfile.println("    //");
  106.             hfile.println("    enum {");
  107.  
  108.             hfile.println("             LOG_BASE_SIZE       = " + LOG_BASE_SIZE + ',');
  109.             hfile.println("             LOG_COMPLEMENT_SIZE = " + LOG_COMPLEMENT_SIZE + ',');
  110.             hfile.println("             BASE_SIZE           = " + BASE_SIZE + ',');
  111.             hfile.println("             SLOT_SIZE           = " + SLOT_SIZE + ',');
  112.             hfile.println("             SLOT_MASK           = " + SLOT_MASK + ',');
  113.             hfile.println();
  114.             hfile.println("             NEWLINE_CODE        = " + NEWLINE_CODE + ',');
  115.             hfile.println("             SPACE_CODE          = " + SPACE_CODE + ',');
  116.             hfile.println("             BAD_CODE            = " + BAD_CODE + ',');
  117.             hfile.println("             DIGIT_CODE          = " + DIGIT_CODE + ',');
  118.             hfile.println("             OTHER_DIGIT_CODE    = " + OTHER_DIGIT_CODE + ',');
  119.             hfile.println("             LOWER_CODE          = " + LOWER_CODE + ',');
  120.             hfile.println("             UPPER_CODE          = " + UPPER_CODE + ',');
  121.             hfile.println("             OTHER_LETTER_CODE   = " + OTHER_LETTER_CODE);
  122.             hfile.println("         };");
  123.             hfile.println();
  124.             hfile.println("    static char code[" + num_slots * SLOT_SIZE + "];");
  125.             hfile.println("    static char *base[" +  BASE_SIZE + "];");
  126.             hfile.println();
  127.             hfile.println("#ifdef EBCDIC");
  128.             hfile.println("    static char to_ascii[256];");
  129.             hfile.println("    static char to_ebcdic[256];");
  130.             hfile.println("#endif");
  131.             hfile.println();
  132.             hfile.println("public:");
  133.             hfile.println();
  134.             hfile.println("    static inline void SetBadCode(wchar_t c)");
  135.             hfile.println("    {");
  136.             hfile.println("        base[c >> LOG_COMPLEMENT_SIZE][c] = BAD_CODE;");
  137.             hfile.println("    }");
  138.             hfile.println();
  139.             hfile.println("    static inline void CodeCheck(wchar_t c)");
  140.             hfile.println("    {");
  141.             hfile.println("         assert(c >> LOG_COMPLEMENT_SIZE < BASE_SIZE);");
  142.             hfile.println("         assert(base[c >> LOG_COMPLEMENT_SIZE] + c >= (&code[0]));");
  143.             hfile.println("         assert(base[c >> LOG_COMPLEMENT_SIZE] + c < (&code[" + num_slots * SLOT_SIZE + "]));");
  144.             hfile.println("    }");
  145.             hfile.println();
  146.             hfile.println("    static inline bool IsNewline(wchar_t c) // \\r characters are replaced by \\x0a in read_input.");
  147.             hfile.println("    {");
  148.             hfile.println("        return c == '\\x0a';");
  149.             hfile.println("    }");
  150.             hfile.println();
  151.             hfile.println("    static inline bool IsSpaceButNotNewline(wchar_t c)");
  152.             hfile.println("    {");
  153.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] == SPACE_CODE;");
  154.             hfile.println("    }");
  155.             hfile.println();
  156.             hfile.println("    static inline bool IsSpace(wchar_t c)");
  157.             hfile.println("    {");
  158.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] <= SPACE_CODE;");
  159.             hfile.println("    }");
  160.             hfile.println();
  161.             hfile.println("    static inline bool IsDigit(wchar_t c)");
  162.             hfile.println("    {");
  163.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] == DIGIT_CODE;");
  164.             hfile.println("    }");
  165.             hfile.println();
  166.             hfile.println("    static inline bool IsUpper(wchar_t c)");
  167.             hfile.println("    {");
  168.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] == UPPER_CODE;");
  169.             hfile.println("    }");
  170.             hfile.println();
  171.             hfile.println("    static inline bool IsLower(wchar_t c)");
  172.             hfile.println("    {");
  173.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] == LOWER_CODE;");
  174.             hfile.println("    }");
  175.             hfile.println();
  176.             hfile.println("    static inline bool IsAlpha(wchar_t c)");
  177.             hfile.println("    {");
  178.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] >= LOWER_CODE;");
  179.             hfile.println("    }");
  180.             hfile.println();
  181.             hfile.println("    static inline bool IsAlnum(wchar_t c)");
  182.             hfile.println("    {");
  183.             hfile.println("        return base[c >> LOG_COMPLEMENT_SIZE][c] >= DIGIT_CODE;");
  184.             hfile.println("    }");
  185.             hfile.println();
  186.             hfile.println("#ifdef EBCDIC");
  187.             hfile.println("    static inline char ToASCII(char c)         { return to_ascii[c]; }");
  188.             hfile.println("    static inline char ToEBCDIC(char c)        { return to_ebcdic[c]; }");
  189.             hfile.println("#endif");
  190.             hfile.println();
  191.             hfile.println("};");
  192.             hfile.println();
  193.             hfile.println("#endif");
  194.  
  195.             cfile.println("#include \"code.h\"");
  196.             cfile.println();
  197.             cfile.println("char Code::code[" +  num_slots * SLOT_SIZE + "] =");
  198.             cfile.println("{");
  199.  
  200.             int base_index[] = new int[BASE_SIZE + 1],
  201.                 num = 0;
  202.  
  203.             for (int j = 0; j <= BASE_SIZE; j++)
  204.             {
  205.                 cfile.println("    //");
  206.                 cfile.println("    // Slot " + j  + ":");
  207.                 cfile.println("    //");
  208.  
  209.                 byte slot[] = base[j];
  210.                 if (j != BASE_SIZE && slot == base[BASE_SIZE])
  211.                 {
  212.                     base_index[j] = -1;
  213.                 }
  214.                 else
  215.                 {
  216.                     base_index[j] = num;
  217.                     num += SLOT_SIZE;
  218.                     for (int k = 0; k < SLOT_SIZE; k += 4)
  219.                     {
  220.                         for (int l = 0; l < 4; l++)
  221.                         {
  222.                             cfile.print(l == 0 ? "    " : " ");
  223.                             switch(slot[k + l])
  224.                             {
  225.                                 case NEWLINE_CODE:
  226.                                      cfile.print("NEWLINE_CODE,");
  227.                                      break;
  228.                                 case SPACE_CODE:
  229.                                      cfile.print("SPACE_CODE,");
  230.                                      break;
  231.                                 case BAD_CODE:
  232.                                      cfile.print("BAD_CODE,");
  233.                                      break;
  234.                                 case DIGIT_CODE:
  235.                                      cfile.print("DIGIT_CODE,");
  236.                                      break;
  237.                                 case OTHER_DIGIT_CODE:
  238.                                      cfile.print("OTHER_DIGIT_CODE,");
  239.                                      break;
  240.                                 case LOWER_CODE:
  241.                                      cfile.print("LOWER_CODE,");
  242.                                      break;
  243.                                 case UPPER_CODE:
  244.                                      cfile.print("UPPER_CODE,");
  245.                                      break;
  246.                                 default:
  247.                                      cfile.print("OTHER_LETTER_CODE,");
  248.                                      break;
  249.                             }
  250.                         }
  251.                         cfile.println();
  252.                     }
  253.                 }
  254.  
  255.                 cfile.println();
  256.             }
  257.  
  258.             cfile.println("};");
  259.  
  260.             cfile.println();
  261.             cfile.println();
  262.             cfile.println("//");
  263.             cfile.println("// The Base vector:");
  264.             cfile.println("//");
  265.             cfile.println("char *Code::base[" + BASE_SIZE + "] =");
  266.             cfile.println("{");
  267.             for (int k = 0; k < BASE_SIZE; k += 4)
  268.             {
  269.                 for (int i = 0; i < 4; i++)
  270.                 {
  271.                     int j = k + i;
  272.                     cfile.print(i == 0 ? "   " : " ");
  273.                     cfile.print(" &code[" + 
  274.                                 (base_index[j] >= 0 ? base_index[j] : base_index[BASE_SIZE]) +
  275.                                 "] - " +
  276.                                 (j * SLOT_SIZE) +
  277.                                   ",");
  278.                 }
  279.                 cfile.println();
  280.             }
  281.             cfile.println("};");
  282.  
  283.             //
  284.             // Print Statistics
  285.             //
  286.             System.out.println(" The number of slots used is " + num_slots);
  287.             System.out.println(" Total static storage utilization is " +
  288.                                num_slots * SLOT_SIZE + " bytes for encoding plus " +
  289.                                BASE_SIZE * 4 + " bytes for the base");
  290.             System.out.println(" The number of unicode characters is " + num_elements);
  291.             System.out.println(" Total static storage utilization is 65536");
  292.         }
  293.         else
  294.         {
  295.             hfile.println("#ifndef code_INCLUDED");
  296.             hfile.println("#define code_INCLUDED");
  297.             hfile.println();
  298.             hfile.println("#include \"config.h\"");
  299.             hfile.println("#include <ctype.h>");
  300.             hfile.println("#include \"bool.h\"");
  301.             hfile.println();
  302.             hfile.println("class Code");
  303.             hfile.println("{");
  304.             hfile.println("    //");
  305.             hfile.println("    // To facilitate the scanning, the character set is partitioned into");
  306.             hfile.println("    // 8 classes using the array CODE. The classes are described below");
  307.             hfile.println("    // together with some self-explanatory functions defined on CODE.");
  308.             hfile.println("    //");
  309.             hfile.println("    enum {");
  310.             hfile.println("             NEWLINE_CODE      = " + NEWLINE_CODE + ",");
  311.             hfile.println("             SPACE_CODE        = " + SPACE_CODE + ",");
  312.             hfile.println("             BAD_CODE          = " + BAD_CODE + ",");
  313.             hfile.println("             DIGIT_CODE        = " + DIGIT_CODE + ",");
  314.             hfile.println("             OTHER_DIGIT_CODE  = " + OTHER_DIGIT_CODE + ",");
  315.             hfile.println("             LOWER_CODE        = " + LOWER_CODE + ",");
  316.             hfile.println("             UPPER_CODE        = " + UPPER_CODE + ",");
  317.             hfile.println("             OTHER_LETTER_CODE = " + OTHER_LETTER_CODE);
  318.             hfile.println("         };");
  319.             hfile.println();
  320.             hfile.println("    static char code[65536];");
  321.             hfile.println();
  322.             hfile.println("#ifdef EBCDIC");
  323.             hfile.println("    static char to_ascii[256];");
  324.             hfile.println("    static char to_ebcdic[256];");
  325.             hfile.println("#endif");
  326.             hfile.println();
  327.             hfile.println("public:");
  328.             hfile.println();
  329.             hfile.println("    //");
  330.             hfile.println("    // \\r characters are replaced by \\x0a in read_input.");
  331.             hfile.println("    //");
  332.             hfile.println("    static inline bool IsNewline(wchar_t c)            { return c == '\\x0a'; }");
  333.             hfile.println("    static inline bool IsSpaceButNotNewline(wchar_t c) { return code[c] == SPACE_CODE; }");
  334.             hfile.println("    static inline bool IsSpace(wchar_t c)              { return code[c] <= SPACE_CODE; }");
  335.             hfile.println("    static inline bool IsDigit(wchar_t c)              { return code[c] == DIGIT_CODE; }");
  336.             hfile.println("    static inline bool IsUpper(wchar_t c)              { return code[c] == UPPER_CODE; }");
  337.             hfile.println("    static inline bool IsLower(wchar_t c)              { return code[c] == LOWER_CODE; }");
  338.             hfile.println("    static inline bool IsAlpha(wchar_t c)              { return code[c] >= LOWER_CODE; }");
  339.             hfile.println("    static inline bool IsAlnum(wchar_t c)              { return code[c] >= DIGIT_CODE; }");
  340.             hfile.println();
  341.             hfile.println("#ifdef EBCDIC");
  342.             hfile.println("    static inline char ToASCII(char c)         { return to_ascii[c]; }");
  343.             hfile.println("    static inline char ToEBCDIC(char c)        { return to_ebcdic[c]; }");
  344.             hfile.println("#endif");
  345.             hfile.println("};");
  346.             hfile.println();
  347.             hfile.println("#endif");
  348.  
  349.             cfile.println("#include \"code.h\"");
  350.             cfile.println();
  351.             cfile.println("char Code::code[65536] =");
  352.             cfile.println("{");
  353.  
  354.             int k = 0;
  355.             for (int i = 0; i < 65536; i += 256)
  356.             {
  357.                 cfile.println("    //");
  358.                 cfile.println("    // Slot " + i + ":");
  359.                 cfile.println("    //");
  360.  
  361.                 for (int j = 0; j < 256; j += 4)
  362.                 {
  363.                     for (int l = 0; l < 4; l++)
  364.                     {
  365.                         byte b = base[BaseIndex(k)][DataIndex(k)];
  366.                         k++;
  367.                         cfile.print(l == 0 ? "    " : " ");
  368.                         switch(b)
  369.                         {
  370.                             case NEWLINE_CODE:
  371.                                  cfile.print("NEWLINE_CODE,");
  372.                                  break;
  373.                             case SPACE_CODE:
  374.                                  cfile.print("SPACE_CODE,");
  375.                                  break;
  376.                             case BAD_CODE:
  377.                                  cfile.print("BAD_CODE,");
  378.                                  break;
  379.                             case DIGIT_CODE:
  380.                                  cfile.print("DIGIT_CODE,");
  381.                                  break;
  382.                             case OTHER_DIGIT_CODE:
  383.                                  cfile.print("OTHER_DIGIT_CODE,");
  384.                                  break;
  385.                             case LOWER_CODE:
  386.                                  cfile.print("LOWER_CODE,");
  387.                                  break;
  388.                             case UPPER_CODE:
  389.                                  cfile.print("UPPER_CODE,");
  390.                                  break;
  391.                             default:
  392.                                  cfile.print("OTHER_LETTER_CODE,");
  393.                                  break;
  394.                         }
  395.                     }
  396.                     cfile.println();
  397.                 }
  398.  
  399.                 cfile.println();
  400.             }
  401.  
  402.             cfile.println("};");
  403.  
  404.             //
  405.             // Print Statistics
  406.             //
  407.             System.out.println(" The number of unicode letters is " + num_elements);
  408.             System.out.println(" Total static storage utilization is 65536");
  409.         }
  410.  
  411.         cfile.println("#ifdef EBCDIC");
  412.         cfile.println("char Code::to_ascii[256] = {");
  413.         cfile.println("      0");
  414.         cfile.println("     //Marius, insert ebcdic to ascii translation table here");
  415.         cfile.println("};");
  416.         cfile.println();
  417.         cfile.println("char Code::to_ebcdic[256] = {");
  418.         cfile.println("      0");
  419.         cfile.println("     //Marius, insert ascii to ebcdic translation table here");
  420.         cfile.println("};");
  421.         cfile.println("// variants of system functions requiring EBCDIC translation");
  422.         cfile.println();
  423.         cfile.println("#include <stdio.h>");
  424.         cfile.println("#include <sys/stat.h>");
  425.         cfile.println();
  426.         cfile.println("int system_stat(const char * name, struct stat * stat_struct)");
  427.         cfile.println("{");
  428.         cfile.println("    int n = strlen(name) + 1;");
  429.         cfile.println("    int rc;");
  430.         cfile.println("    char *ebcdic_name = new char[n];");
  431.         cfile.println();
  432.         cfile.println("    for (int i = 0; i <= n; i++)");
  433.         cfile.println("        ebcdic_name[i] = Code::ToEBCDIC(name[i]);");
  434.         cfile.println("    rc = stat(ebcdic_name, stat_struct);");
  435.         cfile.println("    delete[] ebcdic_name;");
  436.         cfile.println();
  437.         cfile.println("    return rc;");
  438.         cfile.println("}");
  439.         cfile.println();
  440.         cfile.println("FILE * system_fopen(char *name, char * mode)");
  441.         cfile.println("{");
  442.         cfile.println("    int n = strlen(name) + 1;");
  443.         cfile.println("    FILE * fp;");
  444.         cfile.println("    char *ebcdic_name = new char[n];");
  445.         cfile.println();
  446.         cfile.println("    for (int i = 0; i <= n; i++)");
  447.         cfile.println("        ebcdic_name[i] = Code::ToEBCDIC(name[i]);");
  448.         cfile.println("    fp = fopen(ebcdic_name, mode);");
  449.         cfile.println("    delete[] ebcdic_name;");
  450.         cfile.println();
  451.         cfile.println("    return fp;");
  452.         cfile.println("}");
  453.         cfile.println();
  454.         cfile.println("size_t system_fread(char *ptr, size_t element_size, size_t count, FILE *stream)");
  455.         cfile.println("{");
  456.         cfile.println("    size_t rc;");
  457.         cfile.println("    rc = fread(ptr, element_size, count, stream);");
  458.         cfile.println("    for (int i = 0; i <= count; i++)");
  459.         cfile.println("        ptr[i] = Code::ToEBCDIC(ptr[i]);");
  460.         cfile.println();
  461.         cfile.println("    return rc;");
  462.         cfile.println("}");
  463.         cfile.println();
  464.         cfile.println("int system_is_directory(char * name)");
  465.         cfile.println("{");
  466.         cfile.println("    char *ebcdic_name = new char[n];");
  467.         cfile.println("    int rc;");
  468.         cfile.println("    struct stat status;");
  469.         cfile.println("    for (int i = 0; i <= n; i++)");
  470.         cfile.println("        ebcdic_name[i] = Code::ToEBCDIC(name[i]);");
  471.         cfile.println("        Marius, insert proper OPEN_EDITION name for STAT_S_IFDIR below");
  472.         cfile.println("        rc =  ((system_stat(name, &status) == 0) && (status.st_mode & STAT_S_IFDIR)) ? 1: 0;");
  473.         cfile.println("    delete[] ebcdic_name;");
  474.         cfile.println();
  475.         cfile.println("    return rc;");
  476.         cfile.println("}");
  477.         cfile.println();
  478.         cfile.println("#endif");
  479.  
  480.         hfile.close();
  481.         cfile.close();
  482.     }
  483. }
  484.