home *** CD-ROM | disk | FTP | other *** search
/ Computer Shopper 275 / DPCS0111DVD.ISO / Toolkit / Audio-Visual / VirtualDub / Source / VirtualDub-1.9.10-src.7z / src / Asuka / source / glc_nvrc.cpp < prev    next >
Encoding:
C/C++ Source or Header  |  2010-03-04  |  24.7 KB  |  903 lines

  1. #include "stdafx.h"
  2. #include "glc.h"
  3. #include <vd2/system/vdalloc.h>
  4. #include <vd2/Riza/opengl.h>
  5.  
  6. using namespace GLCIL;
  7.  
  8. namespace {
  9.     struct RegisterCombinerSrc {
  10.         uint16    mReg;
  11.         uint16    mMapping;
  12.         uint16    mPortion;
  13.  
  14.         void SetZero() { 
  15.             mReg = GL_ZERO;
  16.             mMapping = GL_SIGNED_IDENTITY_NV;
  17.             mPortion = GL_RGBA;
  18.         }
  19.  
  20.         void SetOne() { 
  21.             mReg = GL_ZERO;
  22.             mMapping = GL_UNSIGNED_INVERT_NV;
  23.             mPortion = GL_RGBA;
  24.         }
  25.  
  26.         void SetMinusOne() {
  27.             mReg = GL_ZERO;
  28.             mMapping = GL_EXPAND_NORMAL_NV;
  29.             mPortion = GL_RGBA;
  30.         }
  31.  
  32.         void Print(uint16 defaultPortion) const {
  33.             const char *s = "zero";
  34.  
  35.             switch(mReg) {
  36.                 case GL_SPARE0_NV:                s = "r0"; break;
  37.                 case GL_SPARE1_NV:                s = "r1"; break;
  38.                 case GL_PRIMARY_COLOR_NV:        s = "v0"; break;
  39.                 case GL_SECONDARY_COLOR_NV:        s = "v1"; break;
  40.                 case GL_TEXTURE0_ARB:            s = "t0"; break;
  41.                 case GL_TEXTURE1_ARB:            s = "t1"; break;
  42.                 case GL_TEXTURE2_ARB:            s = "t2"; break;
  43.                 case GL_TEXTURE3_ARB:            s = "t3"; break;
  44.                 case GL_CONSTANT_COLOR0_NV:        s = "c0"; break;
  45.                 case GL_CONSTANT_COLOR1_NV:        s = "c1"; break;
  46.             }
  47.  
  48.             switch(mMapping) {
  49.                 case GL_UNSIGNED_IDENTITY_NV:    printf("%s_sat", s); break;
  50.                 case GL_UNSIGNED_INVERT_NV:        printf("1-%s", s); break;
  51.                 case GL_SIGNED_IDENTITY_NV:        printf("%s", s); break;
  52.                 case GL_SIGNED_NEGATE_NV:        printf("-%s", s); break;
  53.                 case GL_EXPAND_NORMAL_NV:        printf("%s_bx2", s); break;
  54.                 case GL_EXPAND_NEGATE_NV:        printf("-%s_bx2", s); break;
  55.                 case GL_HALF_BIAS_NORMAL_NV:    printf("%s_bias", s); break;
  56.                 case GL_HALF_BIAS_NEGATE_NV:    printf("-%s_bias", s); break;
  57.             }
  58.  
  59.             if (mPortion != defaultPortion) {
  60.                 switch(mPortion) {
  61.                     case GL_RGB:
  62.                         printf(".rgb");
  63.                         break;
  64.                     case GL_ALPHA:
  65.                         printf(".a");
  66.                         break;
  67.                     case GL_BLUE:
  68.                         printf(".b");
  69.                         break;
  70.                 }
  71.             }
  72.         }
  73.  
  74.         static uint8 GetRegisterCode(uint16 reg) {
  75.             uint8 code;
  76.  
  77.             switch(reg) {
  78.                 case GL_ZERO:                    code = 0x00; break;
  79.                 case GL_DISCARD_NV:                code = 0x01; break;
  80.                 case GL_SPARE0_NV:                code = 0x02; break;
  81.                 case GL_SPARE1_NV:                code = 0x03; break;
  82.                 case GL_PRIMARY_COLOR_NV:        code = 0x04; break;
  83.                 case GL_SECONDARY_COLOR_NV:        code = 0x05; break;
  84.                 case GL_CONSTANT_COLOR0_NV:        code = 0x06; break;
  85.                 case GL_CONSTANT_COLOR1_NV:        code = 0x07; break;
  86.                 case GL_SPARE0_PLUS_SECONDARY_COLOR_NV:        code = 0x08; break;
  87.                 case GL_E_TIMES_F_NV:            code = 0x09; break;
  88.                 case GL_TEXTURE0_ARB:            code = 0x0C; break;
  89.                 case GL_TEXTURE1_ARB:            code = 0x0D; break;
  90.                 case GL_TEXTURE2_ARB:            code = 0x0E; break;
  91.                 case GL_TEXTURE3_ARB:            code = 0x0F; break;
  92.             }
  93.  
  94.             return code;
  95.         }
  96.  
  97.         void Write(FILE *f) const {
  98.             uint8 code = GetRegisterCode(mReg);
  99.  
  100.             switch(mMapping) {
  101.                 case GL_UNSIGNED_IDENTITY_NV:    break;
  102.                 case GL_UNSIGNED_INVERT_NV:        code |= 0x10; break;
  103.                 case GL_SIGNED_IDENTITY_NV:        code |= 0x20; break;
  104.                 case GL_SIGNED_NEGATE_NV:        code |= 0x30; break;
  105.                 case GL_EXPAND_NORMAL_NV:        code |= 0x40; break;
  106.                 case GL_EXPAND_NEGATE_NV:        code |= 0x50; break;
  107.                 case GL_HALF_BIAS_NORMAL_NV:    code |= 0x60; break;
  108.                 case GL_HALF_BIAS_NEGATE_NV:    code |= 0x70; break;
  109.             }
  110.  
  111.             if (mPortion == GL_ALPHA)
  112.                 code |= 0x80;
  113.  
  114.             fprintf(f, "0x%02x", code);
  115.         }
  116.     };
  117.  
  118.     struct RegisterCombinerHalf {
  119.         uint16    mDst[3];
  120.         uint16    mScale;
  121.         uint16    mBias;
  122.         RegisterCombinerSrc    mSrc[4];
  123.         bool    mbDotAB;
  124.         bool    mbDotCD;
  125.         bool    mbMux;
  126.  
  127.         RegisterCombinerHalf() {
  128.             mDst[0] = mDst[1] = mDst[2] = GL_DISCARD_NV;
  129.             mScale = GL_NONE;
  130.             mBias = GL_NONE;
  131.             mSrc[0].SetZero();
  132.             mSrc[1].SetZero();
  133.             mSrc[2].SetZero();
  134.             mSrc[3].SetZero();
  135.             mbDotAB = false;
  136.             mbDotCD = false;
  137.             mbMux = false;
  138.         }
  139.  
  140.         void Write(FILE *f) {
  141.             uint8 scaleBiasCode = 0;
  142.  
  143.             if (mBias == GL_BIAS_BY_NEGATIVE_ONE_HALF_NV) {
  144.                 if (mScale == GL_SCALE_BY_TWO_NV)
  145.                     scaleBiasCode = 5;
  146.                 else
  147.                     scaleBiasCode = 4;
  148.             } else {
  149.                 switch(mScale) {
  150.                     case GL_SCALE_BY_TWO_NV:
  151.                         scaleBiasCode = 1;
  152.                         break;
  153.                     case GL_SCALE_BY_FOUR_NV:
  154.                         scaleBiasCode = 2;
  155.                         break;
  156.                     case GL_SCALE_BY_ONE_HALF_NV:
  157.                         scaleBiasCode = 3;
  158.                         break;
  159.                 }
  160.             }
  161.  
  162.             uint8 dst0Code = RegisterCombinerSrc::GetRegisterCode(mDst[0]);
  163.             uint8 dst1Code = RegisterCombinerSrc::GetRegisterCode(mDst[1]);
  164.             uint8 dst2Code = RegisterCombinerSrc::GetRegisterCode(mDst[2]);
  165.  
  166.             fprintf(f, "0x%02x,0x%02x,", scaleBiasCode + (dst0Code << 4), dst1Code + (dst2Code << 4));
  167.             fprintf(f, "0x%02x,", (mbDotAB ? 1 : 0) + (mbDotCD ? 2 : 0) + (mbMux ? 4 : 0));
  168.  
  169.             for(int i=0; i<4; ++i) {
  170.                 mSrc[i].Write(f);
  171.                 putc(',', f);
  172.             }
  173.         }
  174.     };
  175.  
  176.     struct RegisterCombiner {
  177.         RegisterCombinerHalf mColor;
  178.         RegisterCombinerHalf mAlpha;
  179.         int mConstantMapping[2];
  180.  
  181.         RegisterCombiner() {
  182.             mConstantMapping[0] = -1;
  183.             mConstantMapping[1] = -1;
  184.         }
  185.  
  186.         void Write(FILE *f, bool rc2) {
  187.             if (rc2)
  188.                 fprintf(f, "\t0x%02x,0x%02x,", (uint8)mConstantMapping[0], (uint8)mConstantMapping[1]);
  189.             mColor.Write(f);
  190.             mAlpha.Write(f);
  191.             putc('\n', f);
  192.         }
  193.     };
  194.  
  195.     struct RegisterCombinerFinal {
  196.         RegisterCombinerSrc mSrc[7];
  197.         int mConstantMapping[2];
  198.  
  199.         RegisterCombinerFinal() {
  200.             mConstantMapping[0] = -1;
  201.             mConstantMapping[1] = -1;
  202.             mSrc[0].SetZero();
  203.             mSrc[0].mMapping = GL_UNSIGNED_IDENTITY_NV;
  204.             mSrc[1].mReg = GL_SPARE0_NV;
  205.             mSrc[1].mMapping = GL_UNSIGNED_IDENTITY_NV;
  206.             mSrc[1].mPortion = GL_RGB;
  207.             mSrc[2] = mSrc[1];
  208.             mSrc[3].SetZero();
  209.             mSrc[3].mMapping = GL_UNSIGNED_IDENTITY_NV;
  210.             mSrc[4].SetZero();
  211.             mSrc[4].mMapping = GL_UNSIGNED_IDENTITY_NV;
  212.             mSrc[5].SetZero();
  213.             mSrc[5].mMapping = GL_UNSIGNED_IDENTITY_NV;
  214.             mSrc[6].mReg = GL_SPARE0_NV;
  215.             mSrc[6].mMapping = GL_UNSIGNED_IDENTITY_NV;
  216.             mSrc[6].mPortion = GL_ALPHA;
  217.         }
  218.  
  219.         void Write(FILE *f, bool rc2) {
  220.             fputc('\t', f);
  221.             if (rc2)
  222.                 fprintf(f, "0x%02x,0x%02x,", (uint8)mConstantMapping[0], (uint8)mConstantMapping[1]);
  223.  
  224.             for(int i=0; i<7; ++i) {
  225.                 mSrc[i].Write(f);
  226.                 fputc(',', f);
  227.             }
  228.             fputc('\n', f);
  229.         }
  230.     };
  231.  
  232.     class RegisterCombinerConfig : public vdrefcounted<IGLCFragmentShader> {
  233.     public:
  234.         RegisterCombinerConfig()
  235.             : mGeneralCombinerCount(0)
  236.             , mConstantsUsed(0)
  237.         {
  238.             memset(mConstants, 0, sizeof mConstants);
  239.         }
  240.  
  241.         const char *GetTypeString() {
  242.             return mConstantsUsed <= 2 && mGeneralCombinerCount <= 2 ? "kVDOpenGLFragmentShaderModeNVRC" : "kVDOpenGLFragmentShaderModeNVRC2";
  243.         }
  244.  
  245.         void Write(FILE *f, const char *sym) {
  246.             if (mConstantsUsed > 0) {
  247.                 fprintf(f, "static const float %s_constants[][4]={\n", sym);
  248.                 for(int i=0; i<mConstantsUsed; ++i) {
  249.                     fprintf(f, "\t{");
  250.  
  251.                     for(int j=0; j<4; ++j) {
  252.                         char buf[512];
  253.                         sprintf(buf, "%g", mConstants[i][j]);
  254.                         if (strchr(buf, '.'))
  255.                             fprintf(f, " %sf", buf);
  256.                         else
  257.                             fprintf(f, " %s.f", buf);
  258.  
  259.                         if (j != 3)
  260.                             putc(',', f);
  261.                     }
  262.  
  263.                     fprintf(f, " },\n");
  264.                 };
  265.                 fprintf(f, "};\n");
  266.             }
  267.  
  268.             bool rc2 = mConstantsUsed > 2 || mGeneralCombinerCount > 2;
  269.             fprintf(f, "static const uint8 %s_bytecode[]={\n", sym);
  270.             for(int i=0; i<mGeneralCombinerCount; ++i)
  271.                 mGeneralCombiners[i].Write(f, rc2);
  272.             mFinalCombiner.Write(f, rc2);
  273.             fprintf(f, "};\n");
  274.  
  275.             fprintf(f, "static const struct VDOpenGLNVRegisterCombinerConfig %s={\n", sym);
  276.             fprintf(f, "\t%d, ", mConstantsUsed);
  277.             fprintf(f, "%d, ", mGeneralCombinerCount);
  278.             if (mConstantsUsed > 0)
  279.                 fprintf(f, "%s_constants, ", sym);
  280.             else
  281.                 fprintf(f, "NULL, ");
  282.             fprintf(f, "%s_bytecode\n", sym);
  283.             fprintf(f, "};\n");
  284.         }
  285.  
  286.     public:
  287.         RegisterCombiner        mGeneralCombiners[8];
  288.         RegisterCombinerFinal    mFinalCombiner;
  289.         float    mConstants[16][4];
  290.         int mConstantsUsed;
  291.         int mGeneralCombinerCount;
  292.     };
  293. }
  294.  
  295. IGLCFragmentShader *CompileFragmentShaderNVRegisterCombiners(GLCErrorSink& errout, const GLCFragmentShader& shader, bool NV_register_combiners_2) {
  296.     int combinerLimit = NV_register_combiners_2 ? 8 : 2;
  297.  
  298.     if (!NV_register_combiners_2 && shader.mUsedConstants > 3)
  299.         errout.ThrowError(shader.mLocation, "NV_register_combiners only allows two constant registers");
  300.  
  301.     GLCFragmentShader::FragmentOps::const_iterator it(shader.mOps.begin()), itEnd(shader.mOps.end());
  302.     bool seenFinalCombiner = false;
  303.     int constantStageMask = 0;
  304.     int constantStageCount = 0;
  305.     int combinerOutputMask = 0;
  306.     bool combinerAlphaOp = false;
  307.     bool combinerColorOp = false;
  308.  
  309.     vdautoptr<RegisterCombinerConfig> config(new RegisterCombinerConfig);
  310.     RegisterCombiner *pCombiner = config->mGeneralCombiners;
  311.  
  312.     int constantMapping[16]={-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
  313.  
  314.     while(it!=itEnd) {
  315.         const GLCFragmentOp& op = *it;
  316.         bool isFinalCombiner = op.mInsn == kFSOpFinal;
  317.  
  318.         if (isFinalCombiner) {
  319.             if (seenFinalCombiner)
  320.                 errout.ThrowError(op.mLocation, "Final combiner already configured");
  321.  
  322.             seenFinalCombiner = true;
  323.         } else {
  324.             if (config->mGeneralCombinerCount >= combinerLimit)
  325.                 errout.ThrowError(shader.mLocation, "Exceeded instruction count limit for profile (%d max)\n", combinerLimit);
  326.         }
  327.  
  328.         // convert sources
  329.         RegisterCombinerSrc rsrcs[7];
  330.         bool allowAlpha = true;
  331.         bool allowColor = true;
  332.  
  333.         for(int i=0; i<7; ++i) {
  334.             const GLCSourceArg& opsrc = op.mSrcArgs[i];
  335.             RegisterCombinerSrc& rsrc = rsrcs[i];
  336.             int reg = opsrc.mReg;
  337.  
  338.             // map register
  339.             switch(opsrc.mReg & kRegTypeMask) {
  340.                 case 0:
  341.                     rsrc.mReg = 0;
  342.                     break;
  343.                 case kRegC0:
  344.                     {
  345.                         int index = reg - kRegC0;
  346.  
  347.                         if (constantMapping[index] == -1) {
  348.                             constantMapping[index] = config->mConstantsUsed;
  349.                             memcpy(config->mConstants[config->mConstantsUsed], shader.mConstants[index], sizeof(float)*4);
  350.                             index = config->mConstantsUsed++;
  351.                         }
  352.  
  353.                         if (NV_register_combiners_2) {
  354.                             int *mappings = isFinalCombiner ? config->mFinalCombiner.mConstantMapping : pCombiner->mConstantMapping;
  355.  
  356.                             if (index == mappings[0])
  357.                                 rsrc.mReg = GL_CONSTANT_COLOR0_NV;
  358.                             else if (index == mappings[1])
  359.                                 rsrc.mReg = GL_CONSTANT_COLOR1_NV;
  360.                             else if (mappings[0] == -1) {
  361.                                 mappings[0] = index;
  362.                                 rsrc.mReg = GL_CONSTANT_COLOR0_NV;
  363.                             } else if (mappings[1] == -1) {
  364.                                 mappings[1] = index;
  365.                                 rsrc.mReg = GL_CONSTANT_COLOR1_NV;
  366.                             } else
  367.                                 errout.ThrowError(op.mLocation, "Too many constants used in combiner stage (2 max)");
  368.                         } else {
  369.                             rsrc.mReg = index ? GL_CONSTANT_COLOR1_NV : GL_CONSTANT_COLOR0_NV;
  370.                         }
  371.                     }
  372.                     break;
  373.                 case kRegV0:
  374.                     if (opsrc.mReg == kRegV0)
  375.                         rsrc.mReg = GL_PRIMARY_COLOR_NV;
  376.                     else
  377.                         rsrc.mReg = GL_SECONDARY_COLOR_NV;
  378.                     break;
  379.                 case kRegR0:
  380.                     if (opsrc.mReg - kRegR0 >= 2)
  381.                         errout.ThrowError(op.mLocation, "Invalid spare register (max 2)");
  382.  
  383.                     if (opsrc.mReg == kRegR0)
  384.                         rsrc.mReg = GL_SPARE0_NV;
  385.                     else
  386.                         rsrc.mReg = GL_SPARE1_NV;
  387.                     break;
  388.  
  389.                 case kRegT0:
  390.                     if (opsrc.mReg - kRegT0 >= 4)
  391.                         errout.ThrowError(op.mLocation, "Invalid spare register (max 2)");
  392.  
  393.                     rsrc.mReg = GL_TEXTURE0_ARB + (opsrc.mReg - kRegT0);
  394.                     break;
  395.  
  396.                 default:
  397.                     errout.ThrowError(op.mLocation, "Internal error");
  398.             }
  399.  
  400.             // map swizzle
  401.             if (opsrc.mSwizzle == kSwizzleNone || opsrc.mReg == 0)
  402.                 rsrc.mPortion = GL_RGBA;
  403.             else if (opsrc.mSwizzle == kSwizzleRGB && opsrc.mSize == 3) {
  404.                 rsrc.mPortion = GL_RGB;
  405.                 allowAlpha = false;
  406.             } else if (opsrc.mSwizzle == kSwizzleAlpha)
  407.                 rsrc.mPortion = GL_ALPHA;
  408.             else if (opsrc.mSwizzle == kSwizzleBlue) {
  409.                 rsrc.mPortion = GL_BLUE;
  410.                 allowColor = false;
  411.             } else
  412.                 errout.ThrowError(op.mLocation, "Swizzle not allowed in this profile: must be .a, .b, .rgb, or .rgba (none)");
  413.  
  414.             // map modifiers
  415.             switch(opsrc.mMods) {
  416.             case 0:
  417.                 rsrc.mMapping = GL_SIGNED_IDENTITY_NV;
  418.                 break;
  419.             case kRegModNegate:
  420.                 rsrc.mMapping = GL_SIGNED_NEGATE_NV;
  421.                 break;
  422.             case kRegModSaturate:
  423.                 rsrc.mMapping = GL_UNSIGNED_IDENTITY_NV;
  424.                 break;
  425.             case kRegModComplement:
  426.             case kRegModSaturate | kRegModComplement:
  427.                 rsrc.mMapping = GL_UNSIGNED_INVERT_NV;
  428.                 break;
  429.             case kRegModBias | kRegModX2:
  430.                 rsrc.mMapping = GL_EXPAND_NORMAL_NV;
  431.                 break;
  432.             case kRegModBias | kRegModX2 | kRegModNegate:
  433.                 rsrc.mMapping = GL_EXPAND_NEGATE_NV;
  434.                 break;
  435.             case kRegModBias:
  436.                 rsrc.mMapping = GL_HALF_BIAS_NORMAL_NV;
  437.                 break;
  438.             case kRegModBias | kRegModNegate:
  439.                 rsrc.mMapping = GL_HALF_BIAS_NEGATE_NV;
  440.                 break;
  441.             default:
  442.                 errout.ThrowError(op.mLocation, "Unsupported source modifier");
  443.                 break;
  444.             }
  445.         }
  446.  
  447.         if (!isFinalCombiner) {
  448.             // sanity check and convert destinations
  449.             int dstMask = 0;
  450.             uint16 rdsts[3];
  451.  
  452.             for(int dst=0; dst<3; ++dst) {
  453.                 int reg = op.mDstArgs[dst].mReg;
  454.  
  455.                 rdsts[dst] = GL_DISCARD_NV;
  456.  
  457.                 if (reg && reg != kRegDiscard) {
  458.                     int regMask = op.mDstArgs[dst].mWriteMask;
  459.                     if (!dstMask)
  460.                         dstMask = regMask;
  461.                     else if (dstMask != regMask)
  462.                         errout.ThrowError(op.mLocation, "Inconsistent destination write masks");
  463.  
  464.                     switch(reg & kRegTypeMask) {
  465.                         case kRegC0:
  466.                             errout.ThrowError(op.mLocation, "Constant register cannot be used as destination");
  467.                             break;
  468.                         case kRegV0:
  469.                             errout.ThrowError(op.mLocation, "Interpolator register cannot be used as destination");
  470.                             break;
  471.                         case kRegT0:
  472.                             rdsts[dst] = GL_TEXTURE0_ARB + (reg - kRegT0);
  473.                             break;
  474.                         case kRegR0:
  475.                             if (reg - kRegR0 >= 2)
  476.                                 errout.ThrowError(op.mLocation, "Invalid spare register (max 2)");
  477.  
  478.                             rdsts[dst] = GL_SPARE0_NV + (reg - kRegR0);
  479.                             break;
  480.                         default:
  481.                             errout.ThrowError(op.mLocation, "Invalid destination register");
  482.                     }
  483.                 }
  484.             }
  485.  
  486.             // assign to color and alpha combiner halves
  487.             bool colorOp = false;
  488.             bool alphaOp = false;
  489.  
  490.             switch(dstMask) {
  491.                 case 7:
  492.                     colorOp = true;
  493.                     break;
  494.                 case 8:
  495.                     alphaOp = true;
  496.                     break;
  497.                 case 15:
  498.                     alphaOp = true;
  499.                     colorOp = true;
  500.                     break;
  501.                 default:
  502.                     errout.ThrowError(op.mLocation, "Invalid destination write mask. Must be one of: .rgb, .a, none (.rgba)");
  503.             }
  504.  
  505.             // convert instruction modifiers
  506.             RegisterCombinerHalf chalf;
  507.  
  508.             switch(op.mModifiers) {
  509.                 case 0:
  510.                     chalf.mScale = GL_NONE;
  511.                     chalf.mBias = GL_NONE;
  512.                     break;
  513.                 case kInsnModD2:
  514.                     chalf.mScale = GL_SCALE_BY_ONE_HALF_NV;
  515.                     chalf.mBias = GL_NONE;
  516.                     break;
  517.                 case kInsnModX2:
  518.                     chalf.mScale = GL_SCALE_BY_TWO_NV;
  519.                     chalf.mBias = GL_NONE;
  520.                     break;
  521.                 case kInsnModX4:
  522.                     chalf.mScale = GL_SCALE_BY_FOUR_NV;
  523.                     chalf.mBias = GL_NONE;
  524.                     break;
  525.                 case kInsnModBias:
  526.                     chalf.mScale = GL_NONE;
  527.                     chalf.mBias = GL_BIAS_BY_NEGATIVE_ONE_HALF_NV;
  528.                     break;
  529.                 case kInsnModBX2:
  530.                     chalf.mScale = GL_SCALE_BY_TWO_NV;
  531.                     chalf.mBias = GL_BIAS_BY_NEGATIVE_ONE_HALF_NV;
  532.                     break;
  533.                 default:
  534.                     errout.ThrowError(op.mLocation, "Unsupported instruction modifier");
  535.             }
  536.  
  537.             // create combiner configuration
  538.             switch(op.mInsn) {
  539.                 case kFSOpMov:
  540.                     // A*1 + B*0
  541.                     chalf.mDst[0] = rdsts[0];
  542.                     chalf.mDst[1] = GL_DISCARD_NV;
  543.                     chalf.mDst[2] = GL_DISCARD_NV;
  544.                     chalf.mSrc[0] = rsrcs[0];
  545.                     chalf.mSrc[1].SetOne();
  546.                     chalf.mSrc[2].SetZero();
  547.                     chalf.mSrc[3].SetZero();
  548.                     chalf.mbDotAB = false;
  549.                     chalf.mbDotCD = false;
  550.                     chalf.mbMux = false;
  551.                     break;
  552.                 case kFSOpAdd:
  553.                     // A*1 + B*1
  554.                     chalf.mDst[0] = GL_DISCARD_NV;
  555.                     chalf.mDst[1] = GL_DISCARD_NV;
  556.                     chalf.mDst[2] = rdsts[0];
  557.                     chalf.mSrc[0] = rsrcs[0];
  558.                     chalf.mSrc[1].SetOne();
  559.                     chalf.mSrc[2] = rsrcs[1];
  560.                     chalf.mSrc[3].SetOne();
  561.                     chalf.mbDotAB = false;
  562.                     chalf.mbDotCD = false;
  563.                     chalf.mbMux = false;
  564.                     break;
  565.                 case kFSOpSub:
  566.                     // A*1 + B*-1
  567.                     chalf.mDst[0] = GL_DISCARD_NV;
  568.                     chalf.mDst[1] = GL_DISCARD_NV;
  569.                     chalf.mDst[2] = rdsts[0];
  570.                     chalf.mSrc[0] = rsrcs[0];
  571.                     chalf.mSrc[1].SetOne();
  572.                     chalf.mSrc[2] = rsrcs[1];
  573.                     chalf.mSrc[3].SetMinusOne();
  574.                     chalf.mbDotAB = false;
  575.                     chalf.mbDotCD = false;
  576.                     chalf.mbMux = false;
  577.                     break;
  578.                 case kFSOpMul:
  579.                     // A*B + 0*0
  580.                     chalf.mDst[0] = rdsts[0];
  581.                     chalf.mDst[1] = GL_DISCARD_NV;
  582.                     chalf.mDst[2] = GL_DISCARD_NV;
  583.                     chalf.mSrc[0] = rsrcs[0];
  584.                     chalf.mSrc[1] = rsrcs[1];
  585.                     chalf.mSrc[2].SetZero();
  586.                     chalf.mSrc[3].SetZero();
  587.                     chalf.mbDotAB = false;
  588.                     chalf.mbDotCD = false;
  589.                     chalf.mbMux = false;
  590.                     break;
  591.                 case kFSOpMad:
  592.                     // A*B + C*0
  593.                     chalf.mDst[0] = GL_DISCARD_NV;
  594.                     chalf.mDst[1] = GL_DISCARD_NV;
  595.                     chalf.mDst[2] = rdsts[0];
  596.                     chalf.mSrc[0] = rsrcs[0];
  597.                     chalf.mSrc[1] = rsrcs[1];
  598.                     chalf.mSrc[2] = rsrcs[2];
  599.                     chalf.mSrc[3].SetOne();
  600.                     chalf.mbDotAB = false;
  601.                     chalf.mbDotCD = false;
  602.                     chalf.mbMux = false;
  603.                     break;
  604.                 case kFSOpLrp:
  605.                     // A*B + (1-A)*C
  606.                     chalf.mDst[0] = GL_DISCARD_NV;
  607.                     chalf.mDst[1] = GL_DISCARD_NV;
  608.                     chalf.mDst[2] = rdsts[0];
  609.                     chalf.mSrc[0] = rsrcs[0];
  610.                     chalf.mSrc[1] = rsrcs[1];
  611.                     chalf.mSrc[2] = rsrcs[0];
  612.                     chalf.mSrc[3] = rsrcs[2];
  613.  
  614.                     switch(chalf.mSrc[0].mMapping) {
  615.                     case GL_UNSIGNED_IDENTITY_NV:        // _sat
  616.                     case GL_SIGNED_IDENTITY_NV:            // (none)
  617.                         chalf.mSrc[0].mMapping = GL_UNSIGNED_IDENTITY_NV;
  618.                         chalf.mSrc[2].mMapping = GL_UNSIGNED_INVERT_NV;
  619.                         break;
  620.  
  621.                     case GL_UNSIGNED_INVERT_NV:            // 1-reg
  622.                         chalf.mSrc[0].mMapping = GL_UNSIGNED_INVERT_NV;
  623.                         chalf.mSrc[2].mMapping = GL_UNSIGNED_IDENTITY_NV;
  624.                         break;
  625.  
  626.                     default:
  627.                         errout.ThrowError(op.mLocation, "The first argument to 'lrp' can only use _sat and 1-reg modifiers");
  628.                     }
  629.                     chalf.mbDotAB = false;
  630.                     chalf.mbDotCD = false;
  631.                     chalf.mbMux = false;
  632.                     break;
  633.                 case kFSOpDp3:
  634.                     // dot(A.rgb, B.rgb)
  635.                     if (alphaOp)
  636.                         errout.ThrowError(op.mLocation, "'dp3' cannot be issued as an alpha instruction");
  637.                     chalf.mDst[0] = rdsts[0];
  638.                     chalf.mDst[1] = GL_DISCARD_NV;
  639.                     chalf.mDst[2] = GL_DISCARD_NV;
  640.                     chalf.mSrc[0] = rsrcs[0];
  641.                     chalf.mSrc[1] = rsrcs[1];
  642.                     chalf.mSrc[2].SetZero();
  643.                     chalf.mSrc[3].SetZero();
  644.                     chalf.mbDotAB = true;
  645.                     chalf.mbDotCD = false;
  646.                     chalf.mbMux = false;
  647.                     break;
  648.                 case kFSOpMma:
  649.                     chalf.mDst[0] = rdsts[0];
  650.                     chalf.mDst[1] = rdsts[1];
  651.                     chalf.mDst[2] = rdsts[2];
  652.                     chalf.mSrc[0] = rsrcs[0];
  653.                     chalf.mSrc[1] = rsrcs[1];
  654.                     chalf.mSrc[2] = rsrcs[2];
  655.                     chalf.mSrc[3] = rsrcs[3];
  656.                     chalf.mbDotAB = false;
  657.                     chalf.mbDotCD = false;
  658.                     chalf.mbMux = false;
  659.                     break;
  660.                 case kFSOpMms:
  661.                     chalf.mDst[0] = rdsts[0];
  662.                     chalf.mDst[1] = rdsts[1];
  663.                     chalf.mDst[2] = rdsts[2];
  664.                     chalf.mSrc[0] = rsrcs[0];
  665.                     chalf.mSrc[1] = rsrcs[1];
  666.                     chalf.mSrc[2] = rsrcs[2];
  667.                     chalf.mSrc[3] = rsrcs[3];
  668.                     chalf.mbDotAB = false;
  669.                     chalf.mbDotCD = false;
  670.                     chalf.mbMux = true;
  671.                     break;
  672.                 case kFSOpDm:
  673.                     if (alphaOp)
  674.                         errout.ThrowError("'dm' cannot be issued as an alpha instruction");
  675.                     chalf.mDst[0] = rdsts[0];
  676.                     chalf.mDst[1] = rdsts[1];
  677.                     chalf.mDst[2] = GL_DISCARD_NV;
  678.                     chalf.mSrc[0] = rsrcs[0];
  679.                     chalf.mSrc[1] = rsrcs[1];
  680.                     chalf.mSrc[2] = rsrcs[2];
  681.                     chalf.mSrc[3] = rsrcs[3];
  682.                     chalf.mbDotAB = true;
  683.                     chalf.mbDotCD = false;
  684.                     chalf.mbMux = false;
  685.                     break;
  686.                 case kFSOpDd:
  687.                     if (alphaOp)
  688.                         errout.ThrowError("'dd' cannot be issued as an alpha instruction");
  689.                     chalf.mDst[0] = rdsts[0];
  690.                     chalf.mDst[1] = rdsts[1];
  691.                     chalf.mDst[2] = GL_DISCARD_NV;
  692.                     chalf.mSrc[0] = rsrcs[0];
  693.                     chalf.mSrc[1] = rsrcs[1];
  694.                     chalf.mSrc[2] = rsrcs[2];
  695.                     chalf.mSrc[3] = rsrcs[3];
  696.                     chalf.mbDotAB = true;
  697.                     chalf.mbDotCD = true;
  698.                     chalf.mbMux = false;
  699.                     break;
  700.                 case kFSOpDda:
  701.                     if (alphaOp)
  702.                         errout.ThrowError("'dda' cannot be issued as an alpha instruction");
  703.                     chalf.mDst[0] = rdsts[0];
  704.                     chalf.mDst[1] = rdsts[1];
  705.                     chalf.mDst[2] = rdsts[2];
  706.                     chalf.mSrc[0] = rsrcs[0];
  707.                     chalf.mSrc[1] = rsrcs[1];
  708.                     chalf.mSrc[2] = rsrcs[2];
  709.                     chalf.mSrc[3] = rsrcs[3];
  710.                     chalf.mbDotAB = true;
  711.                     chalf.mbDotCD = true;
  712.                     chalf.mbMux = false;
  713.                     break;
  714.                 default:
  715.                     errout.ThrowError("Instruction not supported in this profile");
  716.             }
  717.  
  718.             if (alphaOp) {
  719.                 if (!allowAlpha)
  720.                     errout.ThrowError(op.mLocation, "Cannot use RGB argument in alpha operation");
  721.  
  722.                 if (combinerAlphaOp)
  723.                     errout.ThrowError(op.mLocation, "Cannot co-issue two alpha operations");
  724.  
  725.                 combinerAlphaOp = true;
  726.  
  727.                 RegisterCombinerHalf& calpha = pCombiner->mAlpha;
  728.                 calpha = chalf;
  729.  
  730.                 for(int i=0; i<4; ++i) {
  731.                     if (calpha.mSrc[i].mPortion == GL_RGBA)
  732.                         calpha.mSrc[i].mPortion = GL_ALPHA;
  733.                 }
  734.             }
  735.  
  736.             if (colorOp) {
  737.                 if (!allowColor)
  738.                     errout.ThrowError(op.mLocation, "Cannot use .b swizzle on color operation");
  739.  
  740.                 if (combinerColorOp)
  741.                     errout.ThrowError(op.mLocation, "Cannot co-issue two color operations");
  742.  
  743.                 combinerColorOp = true;
  744.  
  745.                 RegisterCombinerHalf& ccolor = pCombiner->mColor;
  746.                 ccolor = chalf;
  747.  
  748.                 for(int i=0; i<4; ++i) {
  749.                     if (ccolor.mSrc[i].mPortion == GL_RGBA)
  750.                         ccolor.mSrc[i].mPortion = GL_RGB;
  751.                 }
  752.             }
  753.         } else {
  754.             for(int i=0; i<7; ++i) {
  755.                 RegisterCombinerSrc& rsrc = config->mFinalCombiner.mSrc[i];
  756.                 
  757.                 rsrc = rsrcs[i];
  758.  
  759.                 switch(rsrc.mMapping) {
  760.                     case GL_SIGNED_IDENTITY_NV:        // we implicitly saturate, so we allow this
  761.                         rsrc.mMapping = GL_UNSIGNED_IDENTITY_NV;
  762.                         break;
  763.                     case GL_UNSIGNED_IDENTITY_NV:
  764.                     case GL_UNSIGNED_INVERT_NV:
  765.                         break;
  766.                     default:
  767.                         errout.ThrowError(op.mLocation, "Inputs to the final combiner must use unsigned saturation or complement");
  768.                         break;
  769.                 }
  770.  
  771.                 if (i < 6) {
  772.                     if (rsrc.mPortion == GL_RGBA)
  773.                         rsrc.mPortion = GL_RGB;
  774.  
  775.                     if (rsrc.mPortion == GL_BLUE)
  776.                         errout.ThrowError(op.mLocation, "Final combiner inputs A-F must use .a, .rgb, or .rgba (none) swizzle");
  777.                 } else {
  778.                     if (rsrc.mPortion == GL_RGB)
  779.                         errout.ThrowError(op.mLocation, "Final combiner input G must use .a, .b, or .rgba (none) swizzle");
  780.  
  781.                     if (rsrc.mPortion == GL_RGBA)
  782.                         rsrc.mPortion = GL_ALPHA;
  783.                 }
  784.             }
  785.         }
  786.  
  787.         ++it;
  788.         if (isFinalCombiner || it == itEnd || !it->mbCoIssue) {
  789.             // flush combiner
  790.             if (!isFinalCombiner) {
  791.                 ++config->mGeneralCombinerCount;
  792.                 ++pCombiner;
  793.             }
  794.  
  795.             constantStageMask = 0;
  796.             constantStageCount = 0;
  797.             combinerAlphaOp = false;
  798.             combinerColorOp = false;
  799.         }
  800.     }
  801.  
  802. #if 0
  803.     // dump combiners
  804.     for(int i=0; i<config->mConstantsUsed; ++i) {
  805.         printf("def c%d, %g, %g, %g, %g\n", i, config->mConstants[i][0], config->mConstants[i][1], config->mConstants[i][2], config->mConstants[i][3]);
  806.     }
  807.  
  808.     for(int i=0; i<config->mGeneralCombinerCount; ++i) {
  809.         const RegisterCombiner& comb = config->mGeneralCombiners[i];
  810.         int maxdst = 2;
  811.         bool coissue = false;
  812.  
  813.         for(int j=0; j<2; ++j) {
  814.             const RegisterCombinerHalf& chalf = j ? comb.mAlpha : comb.mColor;
  815.  
  816.             if (chalf.mDst[0] == GL_DISCARD_NV && chalf.mDst[1] == GL_DISCARD_NV && chalf.mDst[2] == GL_DISCARD_NV)
  817.                 continue;
  818.  
  819.             if (coissue)
  820.                 printf("+ ");
  821.             coissue = true;
  822.  
  823.             if (chalf.mbDotAB) {
  824.                 if (chalf.mbDotCD)
  825.                     printf("dd");
  826.                 else
  827.                     printf("dm");
  828.             } else {
  829.                 if (chalf.mbDotCD)
  830.                     printf("md");
  831.                 else {
  832.                     maxdst = 3;
  833.                     if (chalf.mbMux)
  834.                         printf("mms");
  835.                     else
  836.                         printf("mma");
  837.                 }
  838.             }
  839.  
  840.             if (chalf.mBias == GL_BIAS_BY_NEGATIVE_ONE_HALF_NV) {
  841.                 if (chalf.mScale == GL_SCALE_BY_TWO_NV)
  842.                     printf("_bx2");
  843.                 else
  844.                     printf("_bias");
  845.             } else {
  846.                 switch(chalf.mScale) {
  847.                     case GL_SCALE_BY_ONE_HALF_NV:
  848.                         printf("_d2");
  849.                         break;
  850.                     case GL_SCALE_BY_TWO_NV:
  851.                         printf("_x2");
  852.                         break;
  853.                     case GL_SCALE_BY_FOUR_NV:
  854.                         printf("_x4");
  855.                         break;
  856.                 }
  857.             }
  858.  
  859.             for(int k=0; k<maxdst; ++k) {
  860.                 if (k)
  861.                     putchar(',');
  862.  
  863.                 putchar(' ');
  864.  
  865.                 switch(chalf.mDst[k]) {
  866.                     case GL_DISCARD_NV:        printf("discard"); break;
  867.                     case GL_SPARE0_NV:        printf("r0"); break;
  868.                     case GL_SPARE1_NV:        printf("r1"); break;
  869.                     case GL_TEXTURE0_ARB:    printf("t0"); break;
  870.                     case GL_TEXTURE1_ARB:    printf("t1"); break;
  871.                     case GL_TEXTURE2_ARB:    printf("t2"); break;
  872.                     case GL_TEXTURE3_ARB:    printf("t3"); break;
  873.                 }
  874.  
  875.                 if (chalf.mDst[k] != GL_DISCARD_NV)
  876.                     printf(j ? ".a" : ".rgb");
  877.             }
  878.  
  879.             for(int k=0; k<4; ++k) {
  880.                 const RegisterCombinerSrc& rsrc = chalf.mSrc[k];
  881.  
  882.                 printf(", ");
  883.  
  884.                 rsrc.Print(j ? GL_ALPHA : GL_RGB);
  885.             }
  886.  
  887.             putchar('\n');
  888.         }
  889.     }
  890.  
  891.     printf("final");
  892.     for(int i=0; i<7; ++i) {
  893.         if (i)
  894.             putchar(',');
  895.         putchar(' ');
  896.         config->mFinalCombiner.mSrc[i].Print(i == 6 ? GL_ALPHA : GL_RGB);
  897.     }
  898.     putchar('\n');
  899. #endif
  900.  
  901.     return config.release();
  902. }
  903.