home *** CD-ROM | disk | FTP | other *** search
/ Computer Shopper 275 / DPCS0111DVD.ISO / Toolkit / Audio-Visual / VirtualDub / Source / VirtualDub-1.9.10-src.7z / src / Kasumi / source / uberblit.cpp < prev   
Encoding:
C/C++ Source or Header  |  2009-10-28  |  28.8 KB  |  904 lines

  1. #include <vd2/Kasumi/pixmap.h>
  2. #include <vd2/Kasumi/pixmaputils.h>
  3. #include "uberblit.h"
  4. #include "uberblit_gen.h"
  5.  
  6. uint32 VDPixmapGetFormatTokenFromFormat(int format) {
  7.     using namespace nsVDPixmap;
  8.     switch(format) {
  9.     case kPixFormat_Pal1:            return kVDPixType_1 | kVDPixSamp_444 | kVDPixSpace_Pal;
  10.     case kPixFormat_Pal2:            return kVDPixType_2 | kVDPixSamp_444 | kVDPixSpace_Pal;
  11.     case kPixFormat_Pal4:            return kVDPixType_4 | kVDPixSamp_444 | kVDPixSpace_Pal;
  12.     case kPixFormat_Pal8:            return kVDPixType_8 | kVDPixSamp_444 | kVDPixSpace_Pal;
  13.     case kPixFormat_XRGB1555:        return kVDPixType_1555_LE | kVDPixSamp_444 | kVDPixSpace_BGR;
  14.     case kPixFormat_RGB565:            return kVDPixType_565_LE | kVDPixSamp_444 | kVDPixSpace_BGR;
  15.     case kPixFormat_RGB888:            return kVDPixType_888 | kVDPixSamp_444 | kVDPixSpace_BGR;
  16.     case kPixFormat_XRGB8888:        return kVDPixType_8888 | kVDPixSamp_444 | kVDPixSpace_BGR;
  17.     case kPixFormat_Y8:                return kVDPixType_8 | kVDPixSamp_444 | kVDPixSpace_Y_601;
  18.     case kPixFormat_YUV422_UYVY:    return kVDPixType_B8G8_R8G8 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
  19.     case kPixFormat_YUV422_YUYV:    return kVDPixType_G8B8_G8R8 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
  20.     case kPixFormat_YUV444_XVYU:    return kVDPixType_8888 | kVDPixSamp_444 | kVDPixSpace_YCC_601;
  21.     case kPixFormat_YUV444_Planar:    return kVDPixType_8_8_8 | kVDPixSamp_444 | kVDPixSpace_YCC_601;
  22.     case kPixFormat_YUV422_Planar:    return kVDPixType_8_8_8 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
  23.     case kPixFormat_YUV422_Planar_16F:    return kVDPixType_16F_16F_16F_LE | kVDPixSamp_422 | kVDPixSpace_YCC_601;
  24.     case kPixFormat_YUV420_Planar:    return kVDPixType_8_8_8 | kVDPixSamp_420_MPEG2 | kVDPixSpace_YCC_601;
  25.     case kPixFormat_YUV411_Planar:    return kVDPixType_8_8_8 | kVDPixSamp_411 | kVDPixSpace_YCC_601;
  26.     case kPixFormat_YUV410_Planar:    return kVDPixType_8_8_8 | kVDPixSamp_410 | kVDPixSpace_YCC_601;
  27.     case kPixFormat_YUV422_Planar_Centered:    return kVDPixType_8_8_8 | kVDPixSamp_422_JPEG | kVDPixSpace_YCC_601;
  28.     case kPixFormat_YUV420_Planar_Centered:    return kVDPixType_8_8_8 | kVDPixSamp_420_MPEG1 | kVDPixSpace_YCC_601;
  29.     case kPixFormat_YUV422_V210:    return kVDPixType_V210 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
  30.     case kPixFormat_YUV422_UYVY_709:    return kVDPixType_B8G8_R8G8 | kVDPixSamp_422 | kVDPixSpace_YCC_709;
  31.     case kPixFormat_YUV420_NV12:    return kVDPixType_8_B8R8 | kVDPixSamp_420_MPEG2 | kVDPixSpace_YCC_601;
  32.     default:
  33.         VDASSERT(false);
  34.         return 0;
  35.     }
  36. }
  37.  
  38. const VDPixmapSamplingInfo& VDPixmapGetSamplingInfo(uint32 samplingToken) {
  39.     static const VDPixmapSamplingInfo kPixmapSamplingInfo[]={
  40.         /* Null            */ {  0,  0,  0,  0,  0 },
  41.         /* 444            */ {  0,  0,  0,  0,  0 },
  42.         /* 422            */ { -4,  0,  0,  1,  0 },
  43.         /* 422_JPEG        */ {  0,  0,  0,  1,  0 },
  44.         /* 420_MPEG2    */ { -4,  0,  0,  1,  1 },
  45.         /* 420_MPEG2INT    */ { -4,  0,  0,  1,  1 },
  46.         /* 420_MPEG1    */ {  0,  0,  0,  1,  1 },
  47.         /* 420_DVPAL    */ { -4,  0,  0,  1,  1 },
  48.         /* 411            */ { -6,  0,  0,  2,  0 },
  49.         /* 410            */ { -6,  0,  0,  2,  2 }
  50.     };
  51.  
  52.     uint32 index = (samplingToken & kVDPixSamp_Mask) >> kVDPixSamp_Bits;
  53.  
  54.     return index >= sizeof(kPixmapSamplingInfo)/sizeof(kPixmapSamplingInfo[0]) ? kPixmapSamplingInfo[0] : kPixmapSamplingInfo[index];
  55. }
  56.  
  57. namespace {
  58.     uint32 BlitterConvertSampling(VDPixmapUberBlitterGenerator& gen, uint32 srcToken, uint32 dstSamplingToken, sint32 w, sint32 h) {
  59.         // if the source type is 16F, we have to convert to 32F
  60.         if ((srcToken & kVDPixType_Mask) == kVDPixType_16F_16F_16F_LE) {
  61.             // 0 1 2
  62.             gen.conv_16F_to_32F();
  63.             gen.swap(1);
  64.             // 1 0 2
  65.             gen.conv_16F_to_32F();
  66.             gen.swap(2);
  67.             // 2 0 1
  68.             gen.conv_16F_to_32F();
  69.             gen.swap(2);
  70.             gen.swap(1);
  71.             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
  72.         }
  73.  
  74.         // look up sampling info
  75.         const VDPixmapSamplingInfo& srcInfo = VDPixmapGetSamplingInfo(srcToken);
  76.         const VDPixmapSamplingInfo& dstInfo = VDPixmapGetSamplingInfo(dstSamplingToken);
  77.  
  78.         // convert destination chroma origin to luma space
  79.         int c_x = ((8 + dstInfo.mCXOffset16) << dstInfo.mCXBits) - 8;
  80.         int cr_y = ((8 + dstInfo.mCrYOffset16) << dstInfo.mCYBits) - 8;
  81.         int cb_y = ((8 + dstInfo.mCbYOffset16) << dstInfo.mCYBits) - 8;
  82.  
  83.         // convert luma chroma location to source chroma space
  84.         c_x = ((8 + c_x) >> srcInfo.mCXBits) - 8 - srcInfo.mCXOffset16;
  85.         cr_y = ((8 + cr_y) >> srcInfo.mCYBits) - 8 - srcInfo.mCrYOffset16;
  86.         cb_y = ((8 + cb_y) >> srcInfo.mCYBits) - 8 - srcInfo.mCbYOffset16;
  87.  
  88.         float cxo = c_x / 16.0f + 0.5f;
  89.         float cxf = ((16 << dstInfo.mCXBits) >> srcInfo.mCXBits) / 16.0f;
  90.         float cyf = ((16 << dstInfo.mCYBits) >> srcInfo.mCYBits) / 16.0f;
  91.         sint32 cw = -(-w >> dstInfo.mCXBits);
  92.         sint32 ch = -(-h >> dstInfo.mCYBits);
  93.  
  94.         gen.swap(2);
  95.         gen.linear(cxo, cxf, cw, cb_y / 16.0f + 0.5f, cyf, ch);
  96.         gen.swap(2);
  97.         gen.linear(cxo, cxf, cw, cr_y / 16.0f + 0.5f, cyf, ch);
  98.  
  99.         return (srcToken & ~kVDPixSamp_Mask) | (dstSamplingToken & kVDPixSamp_Mask);
  100.     }
  101.  
  102.     uint32 BlitterConvertType(VDPixmapUberBlitterGenerator& gen, uint32 srcToken, uint32 dstToken, sint32 w, sint32 h) {
  103.         uint32 dstType = dstToken & kVDPixType_Mask;
  104.  
  105.         while((srcToken ^ dstToken) & kVDPixType_Mask) {
  106.             uint32 srcType = srcToken & kVDPixType_Mask;
  107.             uint32 targetType = dstType;
  108.  
  109.     type_reconvert:
  110.             switch(targetType) {
  111.                 case kVDPixType_1555_LE:
  112.                     switch(srcType) {
  113.                         case kVDPixType_565_LE:
  114.                             gen.conv_565_to_555();
  115.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_1555_LE;
  116.                             break;
  117.  
  118.                         case kVDPixType_8888:
  119.                             gen.conv_8888_to_555();
  120.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_1555_LE;
  121.                             break;
  122.                         case kVDPixType_B8G8_R8G8:
  123.                         case kVDPixType_G8B8_G8R8:
  124.                             targetType = kVDPixType_8_8_8;
  125.                             goto type_reconvert;
  126.                         default:
  127.                             targetType = kVDPixType_8888;
  128.                             goto type_reconvert;
  129.                     }
  130.                     break;
  131.  
  132.                 case kVDPixType_565_LE:
  133.                     switch(srcType) {
  134.                         case kVDPixType_1555_LE:
  135.                             gen.conv_555_to_565();
  136.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_565_LE;
  137.                             break;
  138.                         case kVDPixType_8888:
  139.                             gen.conv_8888_to_565();
  140.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_565_LE;
  141.                             break;
  142.                         case kVDPixType_B8G8_R8G8:
  143.                         case kVDPixType_G8B8_G8R8:
  144.                             targetType = kVDPixType_8_8_8;
  145.                             goto type_reconvert;
  146.                         default:
  147.                             targetType = kVDPixType_8888;
  148.                             goto type_reconvert;
  149.                     }
  150.                     break;
  151.  
  152.                 case kVDPixType_888:
  153.                     switch(srcType) {
  154.                         case kVDPixType_8888:
  155.                             gen.conv_8888_to_888();
  156.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_888;
  157.                             break;
  158.                         default:
  159.                             targetType = kVDPixType_8888;
  160.                             goto type_reconvert;
  161.                     }
  162.                     break;
  163.  
  164.                 case kVDPixType_8888:
  165.                     switch(srcType) {
  166.                         case kVDPixType_1555_LE:
  167.                             gen.conv_555_to_8888();
  168.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
  169.                             break;
  170.                         case kVDPixType_565_LE:
  171.                             gen.conv_565_to_8888();
  172.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
  173.                             break;
  174.                         case kVDPixType_888:
  175.                             gen.conv_888_to_8888();
  176.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
  177.                             break;
  178.                         case kVDPixType_32Fx4_LE:
  179.                             gen.conv_X32F_to_8888();
  180.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
  181.                             break;
  182.                         case kVDPixType_8_8_8:
  183.                             if ((srcToken & kVDPixSamp_Mask) != kVDPixSamp_444)
  184.                                 srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_444, w, h);
  185.                             gen.interleave_X8R8G8B8();
  186.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
  187.                             break;
  188.                         default:
  189.                             VDASSERT(false);
  190.                             break;
  191.                     }
  192.                     break;
  193.  
  194.                 case kVDPixType_8:
  195.                     switch(srcType) {
  196.                         case kVDPixType_8_8_8:
  197.                             gen.pop();
  198.                             gen.swap(1);
  199.                             gen.pop();
  200.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
  201.                             break;
  202.  
  203.                         case kVDPixType_16F_LE:
  204.                             targetType = kVDPixType_32F_LE;
  205.                             goto type_reconvert;
  206.  
  207.                         case kVDPixType_32F_LE:
  208.                             gen.conv_32F_to_8();
  209.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
  210.                             break;
  211.  
  212.                         default:
  213.                             targetType = kVDPixType_8_8_8;
  214.                             goto type_reconvert;
  215.                     }
  216.                     break;
  217.  
  218.                 case kVDPixType_8_8_8:
  219.                     switch(srcType) {
  220.                         case kVDPixType_B8G8_R8G8:
  221.                             gen.dup();
  222.                             gen.dup();
  223.                             gen.extract_8in32(2, (w + 1) >> 1, h);
  224.                             gen.swap(2);
  225.                             gen.extract_8in16(1, w, h);
  226.                             gen.swap(1);
  227.                             gen.extract_8in32(0, (w + 1) >> 1, h);
  228.                             srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_8_8_8 | kVDPixSamp_422;
  229.                             break;
  230.                         case kVDPixType_G8B8_G8R8:
  231.                             gen.dup();
  232.                             gen.dup();
  233.                             gen.extract_8in32(3, (w + 1) >> 1, h);
  234.                             gen.swap(2);
  235.                             gen.extract_8in16(0, w, h);
  236.                             gen.swap(1);
  237.                             gen.extract_8in32(1, (w + 1) >> 1, h);
  238.                             srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_8_8_8 | kVDPixSamp_422;
  239.                             break;
  240.                         case kVDPixType_16F_16F_16F_LE:
  241.                         case kVDPixType_V210:
  242.                             targetType = kVDPixType_32F_32F_32F_LE;
  243.                             goto type_reconvert;
  244.                         case kVDPixType_32F_32F_32F_LE:
  245.                             // 0 1 2
  246.                             gen.conv_32F_to_8();
  247.                             gen.swap(1);
  248.                             // 1 0 2
  249.                             gen.conv_32F_to_8();
  250.                             gen.swap(2);
  251.                             // 2 0 1
  252.                             gen.conv_32F_to_8();
  253.                             gen.swap(2);
  254.                             gen.swap(1);
  255.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
  256.                             break;
  257.                         case kVDPixType_8_B8R8:
  258.                             {
  259.                                 const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
  260.                                 int cw = -(-w >> sampInfo.mCXBits);
  261.                                 int ch = -(-h >> sampInfo.mCYBits);
  262.  
  263.                                 gen.dup();
  264.                                 gen.extract_8in16(1, cw, ch);
  265.                                 gen.swap(2);
  266.                                 gen.swap(1);
  267.                                 gen.extract_8in16(0, cw, ch);
  268.                                 srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
  269.                             }
  270.                             break;
  271.                         default:
  272.                             VDASSERT(false);
  273.                             break;
  274.                     }
  275.                     break;
  276.  
  277.                 case kVDPixType_B8G8_R8G8:
  278.                     switch(srcType) {
  279.                     case kVDPixType_8_8_8:
  280.                         if ((srcToken ^ dstToken) & kVDPixSamp_Mask)
  281.                             srcToken = BlitterConvertSampling(gen, srcToken, dstToken, w, h);
  282.  
  283.                         gen.interleave_B8G8_R8G8();
  284.                         srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_B8G8_R8G8;
  285.                         break;
  286.                     case kVDPixType_G8B8_G8R8:
  287.                         gen.swap_8in16(w, h, w*2);
  288.                         srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_B8G8_R8G8;
  289.                         break;
  290.                     default:
  291.                         targetType = kVDPixType_8_8_8;
  292.                         goto type_reconvert;
  293.                     }
  294.                     break;
  295.  
  296.                 case kVDPixType_G8B8_G8R8:
  297.                     switch(srcType) {
  298.                     case kVDPixType_8_8_8:
  299.                         if ((srcToken ^ dstToken) & kVDPixSamp_Mask)
  300.                             srcToken = BlitterConvertSampling(gen, srcToken, dstToken, w, h);
  301.  
  302.                         gen.interleave_G8B8_G8R8();
  303.                         srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_G8B8_G8R8;
  304.                         break;
  305.                     case kVDPixType_B8G8_R8G8:
  306.                         gen.swap_8in16(w, h, w*2);
  307.                         srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_G8B8_G8R8;
  308.                         break;
  309.                     default:
  310.                         targetType = kVDPixType_8_8_8;
  311.                         goto type_reconvert;
  312.                     }
  313.                     break;
  314.  
  315.                 case kVDPixType_16F_16F_16F_LE:
  316.                     switch(srcType) {
  317.                         case kVDPixType_32F_32F_32F_LE:
  318.                             // 0 1 2
  319.                             gen.conv_32F_to_16F();
  320.                             gen.swap(1);
  321.                             // 1 0 2
  322.                             gen.conv_32F_to_16F();
  323.                             gen.swap(2);
  324.                             // 2 0 1
  325.                             gen.conv_32F_to_16F();
  326.                             gen.swap(2);
  327.                             gen.swap(1);
  328.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_16F_16F_LE;
  329.                             break;
  330.  
  331.                         default:
  332.                             targetType = kVDPixType_32F_32F_32F_LE;
  333.                             goto type_reconvert;
  334.                     }
  335.                     break;
  336.  
  337.                 case kVDPixType_32F_32F_32F_LE:
  338.                     switch(srcType) {
  339.                         case kVDPixType_8_8_8:
  340.                             // 0 1 2
  341.                             gen.conv_8_to_32F();
  342.                             gen.swap(1);
  343.                             // 1 0 2
  344.                             gen.conv_8_to_32F();
  345.                             gen.swap(2);
  346.                             // 2 0 1
  347.                             gen.conv_8_to_32F();
  348.                             gen.swap(2);
  349.                             gen.swap(1);
  350.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
  351.                             break;
  352.  
  353.                         case kVDPixType_16F_16F_16F_LE:
  354.                             // 0 1 2
  355.                             gen.conv_16F_to_32F();
  356.                             gen.swap(1);
  357.                             // 1 0 2
  358.                             gen.conv_16F_to_32F();
  359.                             gen.swap(2);
  360.                             // 2 0 1
  361.                             gen.conv_16F_to_32F();
  362.                             gen.swap(2);
  363.                             gen.swap(1);
  364.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
  365.                             break;
  366.  
  367.                         case kVDPixType_B8G8_R8G8:
  368.                         case kVDPixType_G8B8_G8R8:
  369.                         case kVDPixType_8_B8R8:
  370.                             targetType = kVDPixType_8_8_8;
  371.                             goto type_reconvert;
  372.  
  373.                         case kVDPixType_V210:
  374.                             gen.conv_V210_to_32F();
  375.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
  376.                             break;
  377.  
  378.                         default:
  379.                             VDASSERT(false);
  380.                     }
  381.                     break;
  382.  
  383.                 case kVDPixType_V210:
  384.                     switch(srcType) {
  385.                         case kVDPixType_32F_32F_32F_LE:
  386.                             if ((srcToken & kVDPixSamp_Mask) != kVDPixSamp_422)
  387.                                 srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_422, w, h);
  388.  
  389.                             gen.conv_32F_to_V210();
  390.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_V210;
  391.                             break;
  392.  
  393.                         case kVDPixType_16F_16F_16F_LE:
  394.                             targetType = kVDPixType_32F_32F_32F_LE;
  395.                             goto type_reconvert;
  396.  
  397.                         case kVDPixType_8_8_8:
  398.                             if ((srcToken & kVDPixSamp_Mask) != kVDPixSamp_422)
  399.                                 srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_422, w, h);
  400.  
  401.                             targetType = kVDPixType_32F_32F_32F_LE;
  402.                             goto type_reconvert;
  403.  
  404.                         case kVDPixType_B8G8_R8G8:
  405.                         case kVDPixType_G8B8_G8R8:
  406.                         case kVDPixType_8_B8R8:
  407.                             targetType = kVDPixType_8_8_8;
  408.                             goto type_reconvert;
  409.  
  410.                         default:
  411.                             VDASSERT(false);
  412.                     }
  413.                     break;
  414.  
  415.                 case kVDPixType_32F_LE:
  416.                     switch(srcType) {
  417.                         case kVDPixType_8:
  418.                             gen.conv_8_to_32F();
  419.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
  420.                             break;
  421.                         case kVDPixType_16F_LE:
  422.                             gen.conv_16F_to_32F();
  423.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
  424.                             break;
  425.                         default:
  426.                             VDASSERT(false);
  427.                     }
  428.                     break;
  429.  
  430.                 case kVDPixType_8_B8R8:
  431.                     switch(srcType) {
  432.                         case kVDPixType_8_8_8:
  433.                             gen.swap(1);
  434.                             gen.swap(2);
  435.                             gen.interleave_B8R8();
  436.                             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_B8R8;
  437.                             break;
  438.                         default:
  439.                             VDASSERT(false);
  440.                             break;
  441.                     }
  442.                     break;
  443.  
  444.                 default:
  445.                     VDASSERT(false);
  446.                     break;
  447.             }
  448.         }
  449.  
  450.         return srcToken;
  451.     }
  452. }
  453.  
  454. IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmap& dst, const VDPixmap& src) {
  455.     const VDPixmapLayout& dstlayout = VDPixmapToLayoutFromBase(dst, dst.data);
  456.     const VDPixmapLayout& srclayout = VDPixmapToLayoutFromBase(src, src.data);
  457.  
  458.     return VDPixmapCreateBlitter(dstlayout, srclayout);
  459. }
  460.  
  461. IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmapLayout& dst, const VDPixmapLayout& src) {
  462.     if (src.format == dst.format) {
  463.         return VDCreatePixmapUberBlitterDirectCopy(dst, src);
  464.     }
  465.  
  466.     uint32 srcToken = VDPixmapGetFormatTokenFromFormat(src.format);
  467.     uint32 dstToken = VDPixmapGetFormatTokenFromFormat(dst.format);
  468.  
  469.     VDPixmapUberBlitterGenerator gen;
  470.  
  471.     // load source channels
  472.     int w = src.w;
  473.     int h = src.h;
  474.  
  475.     switch(srcToken & kVDPixType_Mask) {
  476.     case kVDPixType_1:
  477.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, (w + 7) >> 3);
  478.         break;
  479.  
  480.     case kVDPixType_2:
  481.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, (w + 3) >> 2);
  482.         break;
  483.  
  484.     case kVDPixType_4:
  485.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, (w + 1) >> 1);
  486.         break;
  487.  
  488.     case kVDPixType_8:
  489.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w);
  490.         break;
  491.  
  492.     case kVDPixType_555_LE:
  493.     case kVDPixType_565_LE:
  494.     case kVDPixType_1555_LE:
  495.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*2);
  496.         break;
  497.  
  498.     case kVDPixType_888:
  499.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*3);
  500.         break;
  501.  
  502.     case kVDPixType_8888:
  503.     case kVDPixType_32F_LE:
  504.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*4);
  505.         break;
  506.  
  507.     case kVDPixType_32Fx4_LE:
  508.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*16);
  509.         break;
  510.  
  511.     case kVDPixType_B8G8_R8G8:
  512.     case kVDPixType_G8B8_G8R8:
  513.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, ((w + 1) & ~1)*2);
  514.         break;
  515.  
  516.     case kVDPixType_8_8_8:
  517.         {
  518.             uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
  519.             uint32 cbtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
  520.             uint32 crtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
  521.  
  522.             const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
  523.  
  524.             int cxbits = sampInfo.mCXBits;
  525.             int cybits = sampInfo.mCYBits;
  526.             int w2 = -(-w >> cxbits);
  527.             int h2 = -(-h >> cybits);
  528.             gen.ldsrc(0, 2, 0, 0, w2, h2, cbtoken, w2);
  529.             gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w);
  530.             gen.ldsrc(0, 1, 0, 0, w2, h2, crtoken, w2);
  531.         }
  532.         break;
  533.  
  534.     case kVDPixType_16F_16F_16F_LE:
  535.         {
  536.             uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_LE;
  537.             uint32 cbtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_LE;
  538.             uint32 crtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_LE;
  539.  
  540.             const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
  541.  
  542.             int cxbits = sampInfo.mCXBits;
  543.             int cybits = sampInfo.mCYBits;
  544.             int w2 = -(-w >> cxbits);
  545.             int h2 = -(-h >> cybits);
  546.             gen.ldsrc(0, 2, 0, 0, w2, h2, cbtoken, w2 * 2);
  547.             gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*2);
  548.             gen.ldsrc(0, 1, 0, 0, w2, h2, crtoken, w2 * 2);
  549.         }
  550.         break;
  551.  
  552.     case kVDPixType_32F_32F_32F_LE:
  553.         {
  554.             uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
  555.             uint32 cbtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
  556.             uint32 crtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
  557.  
  558.             const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
  559.  
  560.             int cxbits = sampInfo.mCXBits;
  561.             int cybits = sampInfo.mCYBits;
  562.             int w2 = -(-w >> cxbits);
  563.             int h2 = -(-h >> cybits);
  564.             gen.ldsrc(0, 2, 0, 0, w2, h2, cbtoken, w2 * 4);
  565.             gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*4);
  566.             gen.ldsrc(0, 1, 0, 0, w2, h2, crtoken, w2 * 4);
  567.         }
  568.         break;
  569.  
  570.     case kVDPixType_V210:
  571.         gen.ldsrc(0, 0, 0, 0, w, h, srcToken, ((w + 5) / 6) * 4);
  572.         break;
  573.  
  574.     case kVDPixType_8_B8R8:
  575.         {
  576.             uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
  577.             uint32 ctoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_B8R8;
  578.  
  579.             const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
  580.  
  581.             int cxbits = sampInfo.mCXBits;
  582.             int cybits = sampInfo.mCYBits;
  583.             int w2 = -(-w >> cxbits);
  584.             int h2 = -(-h >> cybits);
  585.             gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w);
  586.             gen.ldsrc(0, 1, 0, 0, w2, h2, ctoken, w2*2);
  587.         }
  588.         break;
  589.  
  590.     default:
  591.         VDASSERT(false);
  592.     }
  593.  
  594.     // check if we need a color space change
  595.     if ((srcToken ^ dstToken) & kVDPixSpace_Mask) {
  596.         // first, if we're dealing with an interleaved format, deinterleave it
  597.         switch(srcToken & kVDPixType_Mask) {
  598.         case kVDPixType_B8G8_R8G8:
  599.             gen.dup();
  600.             gen.dup();
  601.             gen.extract_8in32(2, (w + 1) >> 1, h);
  602.             gen.swap(2);
  603.             gen.extract_8in16(1, w, h);
  604.             gen.swap(1);
  605.             gen.extract_8in32(0, (w + 1) >> 1, h);
  606.             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
  607.             break;
  608.  
  609.         case kVDPixType_G8B8_G8R8:
  610.             gen.dup();
  611.             gen.dup();
  612.             gen.extract_8in32(3, (w + 1) >> 1, h);
  613.             gen.swap(2);
  614.             gen.extract_8in16(0, w, h);
  615.             gen.swap(1);
  616.             gen.extract_8in32(1, (w + 1) >> 1, h);
  617.             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
  618.             break;
  619.  
  620.         case kVDPixType_8_B8R8:
  621.             gen.dup();
  622.             gen.extract_8in16(1, (w + 1) >> 1, (h + 1) >> 1);
  623.             gen.swap(2);
  624.             gen.swap(1);
  625.             gen.extract_8in16(0, (w + 1) >> 1, (h + 1) >> 1);
  626.             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
  627.             break;
  628.  
  629.         case kVDPixType_V210:
  630.             gen.conv_V210_to_32F();
  631.             srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
  632.             break;
  633.         }
  634.  
  635.         // if the source is subsampled, converge on 4:4:4 subsampling, but only if we actually need
  636.         // the auxiliary channels
  637.         const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
  638.  
  639.         if ((dstToken & kVDPixSpace_Mask) != kVDPixSpace_Y_601 && (dstToken & kVDPixSpace_Mask) != kVDPixSpace_Y_709) {
  640.             if (sampInfo.mCXBits | sampInfo.mCYBits | sampInfo.mCXOffset16 | sampInfo.mCbYOffset16 | sampInfo.mCrYOffset16)
  641.                 srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_444, w, h);
  642.         }
  643.  
  644.         // change color spaces
  645.         uint32 dstSpace = dstToken & kVDPixSpace_Mask;
  646.         while((srcToken ^ dstToken) & kVDPixSpace_Mask) {
  647.             uint32 srcSpace = srcToken & kVDPixSpace_Mask;
  648.             uint32 targetSpace = dstSpace;
  649.  
  650. space_reconvert:
  651.             switch(targetSpace) {
  652.                 case kVDPixSpace_BGR:
  653.                     switch(srcSpace) {
  654.                     case kVDPixSpace_YCC_709:
  655.                         switch(srcToken & kVDPixType_Mask) {
  656.                             case kVDPixType_8_8_8:
  657.                                 gen.ycbcr709_to_rgb32();
  658.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
  659.                                 break;
  660.  
  661.                             case kVDPixType_16F_16F_16F_LE:
  662.                                 srcToken = BlitterConvertType(gen, srcToken, kVDPixType_32F_32F_32F_LE, w, h);
  663.                                 gen.ycbcr709_to_rgb32_32f();
  664.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
  665.                                 break;
  666.  
  667.                             case kVDPixType_32F_32F_32F_LE:
  668.                                 gen.ycbcr709_to_rgb32_32f();
  669.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
  670.                                 break;
  671.  
  672.                             default:
  673.                                 VDASSERT(false);
  674.                                 break;
  675.                         }
  676.                         break;
  677.  
  678.                     case kVDPixSpace_YCC_601:
  679.                         switch(srcToken & kVDPixType_Mask) {
  680.                             case kVDPixType_8_8_8:
  681.                                 gen.ycbcr601_to_rgb32();
  682.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
  683.                                 break;
  684.  
  685.                             case kVDPixType_16F_16F_16F_LE:
  686.                                 srcToken = BlitterConvertType(gen, srcToken, kVDPixType_32F_32F_32F_LE, w, h);
  687.                                 gen.ycbcr601_to_rgb32_32f();
  688.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
  689.                                 break;
  690.  
  691.                             case kVDPixType_32F_32F_32F_LE:
  692.                                 gen.ycbcr601_to_rgb32_32f();
  693.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
  694.                                 break;
  695.  
  696.                             default:
  697.                                 VDASSERT(false);
  698.                                 break;
  699.                         }
  700.                         break;
  701.  
  702.                     case kVDPixSpace_Y_601:
  703.                         targetSpace = kVDPixSpace_YCC_601;
  704.                         goto space_reconvert;
  705.  
  706.                     case kVDPixSpace_Pal:
  707.                         switch(srcToken & kVDPixType_Mask) {
  708.                             case kVDPixType_1:
  709.                                 gen.conv_Pal1_to_8888(0);
  710.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
  711.                                 break;
  712.  
  713.                             case kVDPixType_2:
  714.                                 gen.conv_Pal2_to_8888(0);
  715.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
  716.                                 break;
  717.  
  718.                             case kVDPixType_4:
  719.                                 gen.conv_Pal4_to_8888(0);
  720.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
  721.                                 break;
  722.  
  723.                             case kVDPixType_8:
  724.                                 gen.conv_Pal8_to_8888(0);
  725.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
  726.                                 break;
  727.  
  728.                             default:
  729.                                 VDASSERT(false);
  730.                                 break;
  731.                         }
  732.                         break;
  733.  
  734.                     default:
  735.                         VDASSERT(false);
  736.                         break;
  737.                     }
  738.                     break;
  739.                 case kVDPixSpace_Y_601:
  740.                     if (srcSpace == kVDPixSpace_YCC_601) {
  741.                         gen.pop();
  742.                         gen.swap(1);
  743.                         gen.pop();
  744.                         switch(srcToken & kVDPixType_Mask) {
  745.                             case kVDPixType_32F_32F_32F_LE:
  746.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_601 | kVDPixType_32F_LE;
  747.                                 break;
  748.                             case kVDPixType_16F_16F_16F_LE:
  749.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_601 | kVDPixType_16F_LE;
  750.                                 break;
  751.                             case kVDPixType_8_8_8:
  752.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_601 | kVDPixType_8;
  753.                                 break;
  754.  
  755.                             default:
  756.                                 VDASSERT(false);
  757.                         }
  758.                         srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8, w, h);
  759.                         break;
  760.                     } else if (srcSpace == kVDPixSpace_YCC_709) {
  761.                         gen.pop();
  762.                         gen.swap(1);
  763.                         gen.pop();
  764.                         switch(srcToken & kVDPixType_Mask) {
  765.                             case kVDPixType_32F_32F_32F_LE:
  766.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_709 | kVDPixType_32F_LE;
  767.                                 break;
  768.                             case kVDPixType_16F_16F_16F_LE:
  769.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_709 | kVDPixType_16F_LE;
  770.                                 break;
  771.                             case kVDPixType_8_8_8:
  772.                                 srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_709 | kVDPixType_8;
  773.                                 break;
  774.  
  775.                             default:
  776.                                 VDASSERT(false);
  777.                         }
  778.                         srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8, w, h);
  779.                         break;
  780.                     }
  781.                     // fall through
  782.                 case kVDPixSpace_YCC_601:
  783.                     switch(srcSpace) {
  784.                     case kVDPixSpace_BGR:
  785.                         srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8888, w, h);
  786.                         gen.rgb32_to_ycbcr601();
  787.                         srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_601 | kVDPixType_8_8_8;
  788.                         break;
  789.                     case kVDPixSpace_Y_601:
  790.                     case kVDPixSpace_Y_709:
  791.                         srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_601 | kVDPixType_8;
  792.  
  793.                         {
  794.                             const VDPixmapSamplingInfo& sinfo = VDPixmapGetSamplingInfo(dstToken);
  795.                             int cw = ((w - 1) >> sinfo.mCXBits) + 1;
  796.                             int ch = ((h - 1) >> sinfo.mCYBits) + 1;
  797.  
  798.                             gen.ldconst(0x80, cw, cw, ch, srcToken);
  799.                         }
  800.  
  801.                         gen.dup();
  802.                         gen.swap(2);
  803.                         gen.swap(1);
  804.                         srcToken = kVDPixSpace_YCC_601 | kVDPixType_8_8_8 | (dstToken & kVDPixSamp_Mask);
  805.                         break;
  806.                     case kVDPixSpace_YCC_709:
  807.                         VDASSERT((srcToken & kVDPixType_Mask) == kVDPixType_8_8_8);
  808.                         gen.ycbcr709_to_ycbcr601();
  809.                         srcToken = (srcToken & ~kVDPixSpace_Mask) | kVDPixSpace_YCC_601;
  810.                         break;
  811.  
  812.                     case kVDPixSpace_Pal:
  813.                         targetSpace = kVDPixSpace_BGR;
  814.                         goto space_reconvert;
  815.  
  816.                     default:
  817.                         VDASSERT(false);
  818.                         break;
  819.                     }
  820.                     break;
  821.                 case kVDPixSpace_YCC_709:
  822.                     switch(srcSpace) {
  823.                     case kVDPixSpace_BGR:
  824.                         srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8888, w, h);
  825.                         gen.rgb32_to_ycbcr709();
  826.                         srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_709 | kVDPixType_8_8_8;
  827.                         break;
  828.                     case kVDPixSpace_Y_709:
  829.                     case kVDPixSpace_Y_601:
  830.                         srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_709 | kVDPixType_8;
  831.  
  832.                         {
  833.                             const VDPixmapSamplingInfo& sinfo = VDPixmapGetSamplingInfo(dstToken);
  834.                             int cw = ((w - 1) >> sinfo.mCXBits) + 1;
  835.                             int ch = ((h - 1) >> sinfo.mCYBits) + 1;
  836.  
  837.                             gen.ldconst(0x80, cw, cw, ch, srcToken);
  838.                         }
  839.  
  840.                         gen.dup();
  841.                         gen.swap(2);
  842.                         gen.swap(1);
  843.                         srcToken = kVDPixSpace_YCC_709 | kVDPixType_8_8_8 | (dstToken & kVDPixSamp_Mask);
  844.                         break;
  845.                     case kVDPixSpace_YCC_601:
  846.                         VDASSERT((srcToken & kVDPixType_Mask) == kVDPixType_8_8_8 || (srcToken & kVDPixType_Mask) == kVDPixType_32F_32F_32F_LE);
  847.                         gen.ycbcr601_to_ycbcr709();
  848.                         srcToken = (srcToken & ~kVDPixSpace_Mask) | kVDPixSpace_YCC_709;
  849.                         break;
  850.                     case kVDPixSpace_Pal:
  851.                         targetSpace = kVDPixSpace_BGR;
  852.                         goto space_reconvert;
  853.                     default:
  854.                         VDASSERT(false);
  855.                         break;
  856.                     }
  857.                     break;
  858.  
  859.                 default:
  860.                     VDASSERT(false);
  861.                     break;
  862.             }
  863.         }
  864.     }
  865.  
  866.     // check if we need a type change
  867.     //
  868.     // Note: If the sampling is also different, we have to be careful about what types we
  869.     // target. The type conversion may itself involve a sampling conversion, so things get
  870.     // VERY tricky here.
  871.     if ((srcToken ^ dstToken) & kVDPixType_Mask) {
  872.         bool samplingDifferent = 0 != ((srcToken ^ dstToken) & kVDPixSamp_Mask);
  873.         uint32 intermediateTypeToken = dstToken & kVDPixType_Mask;
  874.  
  875.         if (samplingDifferent) {
  876.             switch(dstToken & kVDPixType_Mask) {
  877.                 case kVDPixType_16F_16F_16F_LE:
  878.                     intermediateTypeToken = kVDPixType_32F_32F_32F_LE;
  879.                     break;
  880.                 case kVDPixType_8_B8R8:
  881.                     intermediateTypeToken = kVDPixType_8_8_8;
  882.                     break;
  883.             }
  884.         }
  885.  
  886.         srcToken = BlitterConvertType(gen, srcToken, (dstToken & ~kVDPixType_Mask) | intermediateTypeToken, w, h);
  887.     }
  888.  
  889.     // convert subsampling if necessary
  890.     switch(srcToken & kVDPixType_Mask) {
  891.         case kVDPixType_8_8_8:
  892.         case kVDPixType_16F_16F_16F_LE:
  893.         case kVDPixType_32F_32F_32F_LE:
  894.             if ((srcToken ^ dstToken) & kVDPixSamp_Mask)
  895.                 srcToken = BlitterConvertSampling(gen, srcToken, dstToken, w, h);
  896.             break;
  897.     }
  898.  
  899.     // check if we need a type change (possible with 16F)
  900.     srcToken = BlitterConvertType(gen, srcToken, dstToken, w, h);
  901.  
  902.     return gen.create();
  903. }
  904.