home *** CD-ROM | disk | FTP | other *** search
Text File | 1999-01-14 | 31.4 KB | 1,313 lines | [TEXT/CWIE] |
- //--------------------------------------------------------------
- // BlitPixieDoubleRect, v 4.4
- // by Anders Björklund, May 1998
- //
- // This file contains the DrawProcs for use with the
- // SWSetSpriteWorldDoubleRectDrawProc function in Scrolling.c.
- //--------------------------------------------------------------
-
- #include <SWIncludes.h>
-
- //#define THEORY // uncomment this to use the "theoretical" version
- //#define USE_C // uncomment this to use the C version of the blitter
-
- typedef struct OffsetInfo
- {
- long srcOffsetAtoB; // offset from right of rect A to left of rect B
- long srcOffsetBtoA; // offset from right of rect B to left of rect A, in next row
- long dstOffsetAtoB;
- long dstOffsetBtoA;
- } OffsetInfo, *OffsetInfoPtr;
-
- static void BlitDoubleRects(
- char *src,
- char *dst,
- unsigned long rows,
- unsigned long bytesA,
- unsigned long bytesB,
- OffsetInfoPtr info
- );
-
- //--------------------------------------------------------------------------------------
- #pragma mark [Macros]
-
- // Note: this version of CLIP_RECT is different from the BP_CLIP_RECT in BlitPixie.h!
- // This version is designed to handle DoubleRect DrawProcs.
-
- //#define CLIP_RECT(clip, src, dst, interlaced)
- #define CLIP_RECT(r, r1, r2, interlaced) \
- /* clip off the top so we don't write into random memory */ \
- if (r2.top < r.top) { \
- r1.top += r.top - r2.top; \
- r2.top = r.top; \
- } \
- /* clip off the bottom */ \
- if (r2.bottom > r.bottom) { \
- r1.bottom -= r2.bottom - r.bottom; \
- r2.bottom = r.bottom; \
- } \
- /* clip off the left */ \
- if (r2.left < r.left) { \
- r1.left += r.left - r2.left; \
- r2.left = r.left; \
- } \
- /* clip off the right */ \
- if (r2.right > r.right) { \
- r1.right -= r2.right - r.right; \
- r2.right = r.right; \
- } \
- if (interlaced) { \
- /* If first line is not on an even number, then skip it. */ \
- if ((r2.top - r.top) & 1) { \
- r1.top++; \
- r2.top++; \
- } \
- } \
- /* Make sure height is valid */ \
- if (r2.bottom <= r2.top) \
- return; \
- /* Make sure width is valid */ \
- /* Instead of returning, we set the width to 0, so the other rect can be drawn */ \
- if (r2.right < r2.left) { \
- r1.right = r1.left; \
- r2.right = r2.left; \
- }
-
-
- extern SInt8 gSWmmuMode;
-
-
- ///--------------------------------------------------------------------------------------
- // BlitPixie8BitDoubleRectDrawProc
- ///--------------------------------------------------------------------------------------
-
- SW_FUNC void BlitPixie8BitDoubleRectDrawProc(
- FramePtr srcFrameP,
- FramePtr dstFrameP,
- Rect* srcRectA,
- Rect* dstRectA,
- Rect* srcRectB,
- Rect* dstRectB)
- {
- OffsetInfo info;
- Rect srcBlitRectA = *srcRectA,
- dstBlitRectA = *dstRectA,
- srcBlitRectB = *srcRectB,
- dstBlitRectB = *dstRectB;
-
- SW_ASSERT(srcFrameP->isFrameLocked && dstFrameP->isFrameLocked);
- SW_ASSERT((*srcFrameP->framePort->portPixMap)->pixelSize == 8);
- SW_ASSERT((*dstFrameP->framePort->portPixMap)->pixelSize == 8);
-
- CLIP_RECT(dstFrameP->frameRect, srcBlitRectA, dstBlitRectA, false)
- CLIP_RECT(dstFrameP->frameRect, srcBlitRectB, dstBlitRectB, false)
-
- info.srcOffsetAtoB = srcBlitRectB.left - srcBlitRectA.right;
- info.srcOffsetBtoA = (srcBlitRectA.left - srcBlitRectB.right) +
- srcFrameP->frameRowBytes;
- info.dstOffsetAtoB = dstBlitRectB.left - dstBlitRectA.right;
- info.dstOffsetBtoA = (dstBlitRectA.left - dstBlitRectB.right) +
- dstFrameP->frameRowBytes;
-
- START_32_BIT_MODE
-
- BlitDoubleRects(
- // calculate the address of the first byte of the source
- (srcFrameP->frameBaseAddr +
- (srcFrameP->scanLinePtrArray[srcBlitRectA.top - srcFrameP->frameRect.top]) +
- srcBlitRectA.left),
-
- // calculate the address of the first byte of the destination
- (dstFrameP->frameBaseAddr +
- (dstFrameP->scanLinePtrArray[dstBlitRectA.top]) +
- dstBlitRectA.left),
-
- // calculate the number of rows to blit
- dstBlitRectA.bottom - dstBlitRectA.top,
- // == dstBlitRectB.bottom - dstBlitRectB.top
-
- // pass rect widths and offset info
- dstBlitRectA.right - dstBlitRectA.left,
- dstBlitRectB.right - dstBlitRectB.left,
- &info
- );
-
- END_32_BIT_MODE
-
- }
-
- ///--------------------------------------------------------------------------------------
- // BP8BitInterlacedDoubleRectDrawProc
- ///--------------------------------------------------------------------------------------
-
- SW_FUNC void BP8BitInterlacedDoubleRectDrawProc(
- FramePtr srcFrameP,
- FramePtr dstFrameP,
- Rect* srcRectA,
- Rect* dstRectA,
- Rect* srcRectB,
- Rect* dstRectB)
- {
- OffsetInfo info;
- Rect srcBlitRectA = *srcRectA,
- dstBlitRectA = *dstRectA,
- srcBlitRectB = *srcRectB,
- dstBlitRectB = *dstRectB;
- int numRowsToCopy;
-
- SW_ASSERT(srcFrameP->isFrameLocked && dstFrameP->isFrameLocked);
- SW_ASSERT((*srcFrameP->framePort->portPixMap)->pixelSize == 8);
- SW_ASSERT((*dstFrameP->framePort->portPixMap)->pixelSize == 8);
-
- CLIP_RECT(dstFrameP->frameRect, srcBlitRectA, dstBlitRectA, true)
- CLIP_RECT(dstFrameP->frameRect, srcBlitRectB, dstBlitRectB, true)
-
- numRowsToCopy = dstBlitRectA.bottom - dstBlitRectA.top;
- // Is destBlitRect height an even number?
- if ( ! (numRowsToCopy & 1) )
- numRowsToCopy >>= 1;
- else
- numRowsToCopy = (numRowsToCopy>>1) + 1;
- if (numRowsToCopy < 1)
- return;
-
- info.srcOffsetAtoB = srcBlitRectB.left - srcBlitRectA.right;
- info.srcOffsetBtoA = (srcBlitRectA.left - srcBlitRectB.right) +
- (srcFrameP->frameRowBytes << 1);
- info.dstOffsetAtoB = dstBlitRectB.left - dstBlitRectA.right;
- info.dstOffsetBtoA = (dstBlitRectA.left - dstBlitRectB.right) +
- (dstFrameP->frameRowBytes << 1);
-
- START_32_BIT_MODE
-
- BlitDoubleRects(
- // calculate the address of the first byte of the source
- (srcFrameP->frameBaseAddr +
- (srcFrameP->scanLinePtrArray[srcBlitRectA.top - srcFrameP->frameRect.top]) +
- srcBlitRectA.left),
-
- // calculate the address of the first byte of the destination
- (dstFrameP->frameBaseAddr +
- (dstFrameP->scanLinePtrArray[dstBlitRectA.top]) +
- dstBlitRectA.left),
-
- // calculate the number of rows to blit
- numRowsToCopy,
-
- // pass rect widths and offset info
- SW_MAX(dstBlitRectA.right - dstBlitRectA.left,0),
- SW_MAX(dstBlitRectB.right - dstBlitRectB.left,0),
- &info
- );
-
- END_32_BIT_MODE
- }
-
- ///--------------------------------------------------------------------------------------
- // BlitPixie16BitDoubleRectDrawProc
- ///--------------------------------------------------------------------------------------
-
- SW_FUNC void BlitPixie16BitDoubleRectDrawProc(
- FramePtr srcFrameP,
- FramePtr dstFrameP,
- Rect* srcRectA,
- Rect* dstRectA,
- Rect* srcRectB,
- Rect* dstRectB)
- {
- OffsetInfo info;
- Rect srcBlitRectA = *srcRectA,
- dstBlitRectA = *dstRectA,
- srcBlitRectB = *srcRectB,
- dstBlitRectB = *dstRectB;
-
- SW_ASSERT(srcFrameP->isFrameLocked && dstFrameP->isFrameLocked);
- SW_ASSERT((*srcFrameP->framePort->portPixMap)->pixelSize == 16);
- SW_ASSERT((*dstFrameP->framePort->portPixMap)->pixelSize == 16);
-
- CLIP_RECT(dstFrameP->frameRect, srcBlitRectA, dstBlitRectA, false)
- CLIP_RECT(dstFrameP->frameRect, srcBlitRectB, dstBlitRectB, false)
-
- info.srcOffsetAtoB = (srcBlitRectB.left - srcBlitRectA.right) << 1;
- info.srcOffsetBtoA = ((srcBlitRectA.left - srcBlitRectB.right) << 1 ) +
- srcFrameP->frameRowBytes;
- info.dstOffsetAtoB = (dstBlitRectB.left - dstBlitRectA.right) << 1;
- info.dstOffsetBtoA = ((dstBlitRectA.left - dstBlitRectB.right) << 1) +
- dstFrameP->frameRowBytes;
-
- START_32_BIT_MODE
-
- BlitDoubleRects(
- // calculate the address of the first byte of the source
- (srcFrameP->frameBaseAddr +
- (srcFrameP->scanLinePtrArray[srcBlitRectA.top - srcFrameP->frameRect.top]) +
- (srcBlitRectA.left << 1)),
-
- // calculate the address of the first byte of the destination
- (dstFrameP->frameBaseAddr +
- (dstFrameP->scanLinePtrArray[dstBlitRectA.top]) +
- (dstBlitRectA.left << 1)),
-
- // calculate the number of rows to blit
- dstBlitRectA.bottom - dstBlitRectA.top,
- // == dstBlitRectB.bottom - dstBlitRectB.top
-
- // pass rect widths and offset info
- SW_MAX(dstBlitRectA.right - dstBlitRectA.left,0) << 1,
- SW_MAX(dstBlitRectB.right - dstBlitRectB.left,0) << 1,
- &info
- );
-
- END_32_BIT_MODE
- }
-
-
- ///--------------------------------------------------------------------------------------
- // BP16BitInterlacedDoubleRectDrawProc
- ///--------------------------------------------------------------------------------------
-
- SW_FUNC void BP16BitInterlacedDoubleRectDrawProc(
- FramePtr srcFrameP,
- FramePtr dstFrameP,
- Rect* srcRectA,
- Rect* dstRectA,
- Rect* srcRectB,
- Rect* dstRectB)
- {
- OffsetInfo info;
- Rect srcBlitRectA = *srcRectA,
- dstBlitRectA = *dstRectA,
- srcBlitRectB = *srcRectB,
- dstBlitRectB = *dstRectB;
- int numRowsToCopy;
-
- SW_ASSERT(srcFrameP->isFrameLocked && dstFrameP->isFrameLocked);
- SW_ASSERT((*srcFrameP->framePort->portPixMap)->pixelSize == 16);
- SW_ASSERT((*dstFrameP->framePort->portPixMap)->pixelSize == 16);
-
- CLIP_RECT(dstFrameP->frameRect, srcBlitRectA, dstBlitRectA, true)
- CLIP_RECT(dstFrameP->frameRect, srcBlitRectB, dstBlitRectB, true)
-
- numRowsToCopy = dstBlitRectA.bottom - dstBlitRectA.top;
- // Is destBlitRect height an even number?
- if ( ! (numRowsToCopy & 1) )
- numRowsToCopy >>= 1;
- else
- numRowsToCopy = (numRowsToCopy>>1) + 1;
- if (numRowsToCopy < 1)
- return;
-
- info.srcOffsetAtoB = (srcBlitRectB.left - srcBlitRectA.right) << 1;
- info.srcOffsetBtoA = ((srcBlitRectA.left - srcBlitRectB.right) << 1 ) +
- (srcFrameP->frameRowBytes << 1);
- info.dstOffsetAtoB = (dstBlitRectB.left - dstBlitRectA.right) << 1;
- info.dstOffsetBtoA = ((dstBlitRectA.left - dstBlitRectB.right) << 1 ) +
- (dstFrameP->frameRowBytes << 1);
-
- START_32_BIT_MODE
-
- BlitDoubleRects(
- // calculate the address of the first byte of the source
- (srcFrameP->frameBaseAddr +
- (srcFrameP->scanLinePtrArray[srcBlitRectA.top - srcFrameP->frameRect.top]) +
- (srcBlitRectA.left << 1)),
-
- // calculate the address of the first byte of the destination
- (dstFrameP->frameBaseAddr +
- (dstFrameP->scanLinePtrArray[dstBlitRectA.top]) +
- (dstBlitRectA.left << 1)),
-
- // calculate the number of rows to blit
- numRowsToCopy,
-
- // pass rect widths and offset info
- SW_MAX(dstBlitRectA.right - dstBlitRectA.left,0) << 1,
- SW_MAX(dstBlitRectB.right - dstBlitRectB.left,0) << 1,
- &info
- );
-
- END_32_BIT_MODE
- }
-
-
- #pragma mark -
-
- ///--------------------------------------------------------------------------------------
- // BlitDoubleRects
- //
- // a blitter to merge two offscreen areas into one onscreen area
- // NOTE: This implementation _always_ blits aligned doubles to the screen
- ///--------------------------------------------------------------------------------------
-
- #pragma mark *** Theory:
- #ifdef THEORY
-
- #include <Memory.h>
-
- #if SW_PPC
- #define BlockMoveFunction BlockMoveDataUncached
- // BlockMoveDataUncached is implemented in "DriverServicesLib" (strangely enough).
- // Even more strangely, DriverServicesLib is only available on PCI-based Macs -
- // making it kinda hard to run on others. Just include this library in your project,
- // or go with BlockMoveData/memcpy (and suffer the consequences). -- AFB
- //
- // "the difference between theory and practice is greater in practice than in theory"
- #else
- #define BlockMoveFunction BlockMoveData
- #endif
-
- void BlitDoubleRects(
- char *src,
- char *dst,
- unsigned long rows,
- unsigned long bytesA,
- unsigned long bytesB,
- OffsetInfoPtr info)
- {
- int y;
-
- for ( y = 0; y < rows; y++ )
- {
- BlockMoveFunction( src, dst , bytesA );
- src += bytesA;
- dst += bytesA;
-
- src += info->srcOffsetAtoB;
- dst += info->dstOffsetAtoB;
-
- BlockMoveFunction( src, dst , bytesB );
- src += bytesB;
- dst += bytesB;
-
- src += info->srcOffsetBtoA;
- dst += info->dstOffsetBtoA;
- }
- }
-
- #pragma mark *** C (optimized):
- #elif defined(USE_C)
-
- #if THINK_C
- // NOTE: This code will not compile on THINK C, because it doesn't have decent pointers.
- // Use a real compiler instead, or switch back to the assembly. Thank you.
- #error
- #endif
-
- // This implementation is
- // ©1998 Anders Fredrik Björklund. All rights reserved.
-
- #define srcL ((long *) src)
- #define dstL ((long *) dst)
- #define bufferL ((long *) buffer)
- #define srcD ((double *) src)
- #define dstD ((double *) dst)
- #define bufferD ((double *) buffer)
-
- #if SW_PPC
- #define kAlignmentMask 7
- #else
- #define kAlignmentMask 3
- #endif
-
- void BlitDoubleRects(
- char *src,
- char *dst,
- unsigned long rows,
- unsigned long bytesA,
- unsigned long bytesB,
- OffsetInfoPtr info)
- {
- char *buffer;
- unsigned int x;
-
- unsigned int leftblocks,leftwords,left;
- unsigned int rightblocks,rightwords,right;
- unsigned int alignA,alignB;
- long srcOffsetBtoA,dstOffsetBtoA,srcOffsetAtoB,dstOffsetAtoB;
- #if SW_PPC
- Boolean useDoublesA,useDoublesB;
- char DoubleBuffer[64];
- #endif
-
- // load stuff from struct
- srcOffsetAtoB = info->srcOffsetAtoB;
- dstOffsetAtoB = info->dstOffsetAtoB;
- srcOffsetBtoA = info->srcOffsetBtoA;
- dstOffsetBtoA = info->dstOffsetBtoA;
-
- // alignment offset for rect A
- alignA = (-((long) dst )) & kAlignmentMask;
- if ( alignA > bytesA) alignA = bytesA;
- bytesA -= alignA;
-
- // alignment offset for rect B
- alignB = (-((long) dst + bytesA + dstOffsetAtoB)) & kAlignmentMask;
- if ( alignB > bytesB) alignB = bytesB;
- bytesB -= alignB;
-
- //pre-calculate transfer sizes
- leftblocks = bytesA >> 5;
- left = bytesA & 31;
- rightblocks = bytesB >> 5;
- right = bytesB & 31;
-
- #if SW_PPC
-
- // alignment for source (can use doubles if word-aligned)
- useDoublesA = (alignA & 3) == ((-((long) src )) & 3);
- useDoublesB = (alignB & 3) == ((-((long) src + bytesA + srcOffsetAtoB)) & 3);
-
- // align buffer to 32-byte boundary (cache line)
- buffer = (char *) (( (long) DoubleBuffer + 32) & ~31L);
-
- #define COPY_BLOCKS_LONG(blocks) \
- for ( x = 0; x < blocks; x++) \
- { register long t1,t2,t3,t4; \
- register double f1,f2,f3,f4; \
- t1 = srcL[0]; t2 = srcL[1]; t3 = srcL[2]; t4 = srcL[3]; \
- bufferL[0] = t1; bufferL[1] = t2; bufferL[2] = t3; bufferL[3] = t4; \
- t1 = srcL[4]; t2 = srcL[5]; t3 = srcL[6]; t4 = srcL[7]; srcL += 8; \
- f1 = bufferD[0]; f2 = bufferD[1]; \
- bufferL[4] = t1; bufferL[5] = t2; bufferL[6] = t3; bufferL[7] = t4; \
- f3 = bufferD[2]; f4 = bufferD[3]; \
- dstD[0] = f1; dstD[1] = f2; dstD[2] = f3; dstD[3] = f4; dstD += 4; \
- }
-
- #define COPY_LEFTOVER_LONG(longs,bytes) \
- for ( x = 0; x < longs; x++) *dstL++ = *srcL++; \
- if ( bytes & 2) *((short *)dst)++ = *((short *)src)++; \
- if ( bytes & 1) *((char *)dst)++ = *((char *)src)++;
-
- #define COPY_BLOCKS_DOUBLE(blocks) \
- for ( x = 0; x < blocks; x++) \
- { register double t1,t2,t3,t4; \
- t1 = srcD[0]; t2 = srcD[1]; t3 = srcD[2]; t4 = srcD[3]; srcD += 4; \
- dstD[0] = t1; dstD[1] = t2; dstD[2] = t3; dstD[3] = t4; dstD += 4; \
- }
- #define COPY_LEFTOVER_DOUBLE(doubles,bytes) \
- for ( x = 0; x < doubles; x++) *dstD++ = *srcD++; \
- if ( bytes & 4) *((long *)dst)++ = *((long *)src)++; \
- if ( bytes & 2) *((short *)dst)++ = *((short *)src)++; \
- if ( bytes & 1) *((char *)dst)++ = *((char *)src)++;
-
- #define COPY_ALIGN(align) \
- if ( align & 4) *((long *)dst)++ = *((long *)src)++; \
- if ( align & 1) *((char *)dst)++ = *((char *)src)++; \
- if ( align & 2) *((short *)dst)++ = *((short *)src)++;
-
- // ----------------------------------------------------------------------------
-
- if ( useDoublesA && useDoublesB ) // Both rects aligned
- {
- leftwords = left >> 3;
- rightwords = right >> 3;
-
- do
- {
- COPY_ALIGN(alignA)
- COPY_BLOCKS_DOUBLE(leftblocks)
- COPY_LEFTOVER_DOUBLE(leftwords,left)
- src += srcOffsetAtoB;
- dst += dstOffsetAtoB;
-
- COPY_ALIGN(alignB)
- COPY_BLOCKS_DOUBLE(rightblocks)
- COPY_LEFTOVER_DOUBLE(rightwords,right)
- src += srcOffsetBtoA;
- dst += dstOffsetBtoA;
- }
- while (--rows);
- }
- else if ( useDoublesA ) // Left rect aligned
- {
- leftwords = left >> 3;
- rightwords = right >> 2;
-
- do
- {
- COPY_ALIGN(alignA)
- COPY_BLOCKS_DOUBLE(leftblocks)
- COPY_LEFTOVER_DOUBLE(leftwords,left)
- src += srcOffsetAtoB;
- dst += dstOffsetAtoB;
-
- COPY_ALIGN(alignB)
- COPY_BLOCKS_LONG(rightblocks)
- COPY_LEFTOVER_LONG(rightwords,right)
- src += srcOffsetBtoA;
- dst += dstOffsetBtoA;
- }
- while (--rows);
- }
- else if ( useDoublesB ) // Right rect aligned
- {
- leftwords = left >> 2;
- rightwords = right >> 3;
-
- do
- {
- COPY_ALIGN(alignA)
- COPY_BLOCKS_LONG(leftblocks)
- COPY_LEFTOVER_LONG(leftwords,left)
- src += srcOffsetAtoB;
- dst += dstOffsetAtoB;
-
- COPY_ALIGN(alignB)
- COPY_BLOCKS_DOUBLE(rightblocks)
- COPY_LEFTOVER_DOUBLE(rightwords,right)
- src += srcOffsetBtoA;
- dst += dstOffsetBtoA;
- }
- while (--rows);
- }
- else // None of the rects aligned
- {
- leftwords = left >> 2;
- rightwords = right >> 2;
-
- do
- {
- COPY_ALIGN(alignA)
- COPY_BLOCKS_LONG(leftblocks)
- COPY_LEFTOVER_LONG(leftwords,left)
- src += srcOffsetAtoB;
- dst += dstOffsetAtoB;
-
- COPY_ALIGN(alignB)
- COPY_BLOCKS_LONG(rightblocks)
- COPY_LEFTOVER_LONG(rightwords,right)
- src += srcOffsetBtoA;
- dst += dstOffsetBtoA;
- }
- while (--rows);
- }
-
- #else // ! SW_PPC
-
- leftwords = left >> 2;
- rightwords = right >> 2;
- do
- {
- // align destination
- if ( alignA & 1)
- *((char *)dst)++ = *((char *)src)++;
- if ( alignA & 2)
- *((short *)dst)++ = *((short *)src)++;
-
- // copy 32 byte blocks
- for ( x = 0; x < leftblocks; x++)
- {
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- }
- // copy left-over bytes (<32)
- for ( x = 0; x < leftwords; x++)
- *((long *)dst)++ = *((long *)src)++;
- if ( left & 2)
- *((short *)dst)++ = *((short *)src)++;
- if ( left & 1)
- *((char *)dst)++ = *((char *)src)++;
-
- src += srcOffsetAtoB;
- dst += dstOffsetAtoB;
-
- // align destination
- if ( alignB & 1)
- *((char *)dst)++ = *((char *)src)++;
- if ( alignB & 2)
- *((short *)dst)++ = *((short *)src)++;
-
- // copy 32 byte blocks
- for ( x = 0; x < rightblocks; x++)
- {
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- *dstL++ = *srcL++;
- }
-
- // copy left-over bytes (<32)
- for ( x = 0; x < rightwords; x++)
- *((long *)dst)++ = *((long *)src)++;
- if ( right & 2)
- *((short *)dst)++ = *((short *)src)++;
- if ( right & 1)
- *((char *)dst)++ = *((char *)src)++;
-
- src += srcOffsetBtoA;
- dst += dstOffsetBtoA;
-
- } while (--rows);
-
- #endif // SW_PPC
-
- }
- #undef srcL
- #undef dstL
- #undef bufferL
- #undef srcD
- #undef dstD
- #undef bufferD
-
- #else // !USE_C == USE_ASM
-
- #pragma mark *** 68k asm:
- #if !SW_PPC
-
- // This 680X0 asm implementation is
- // ©1997 Anders Fredrik Björklund. All rights reserved.
- // mailto:coderonin@geocities.com ¡¡¡ FRONT LINE ASSEMBLER !!!
-
- SW_ASM_FUNC void BlitDoubleRects(
- char *src,
- char *dst,
- unsigned long rows,
- unsigned long bytesA,
- unsigned long bytesB,
- OffsetInfoPtr info)
- {
- // VARIABLE(S) REGISTER
- // *temp* D0
- // *temp*,y D1
- // bytesA,left D2
- // leftblocks D3
- // bytesB,right D4
- // dst D5
- // alignA D6
- // alignB D7
- // src A0
- // dst A1
- // startA A2
- // startB A3
- // offsetinfoptr A4
-
- SW_ASM_BEGIN
-
- #if __MWERKS__
- fralloc
- #endif
-
- MOVEM.L D3-D7/A2-A4,-(SP)
- MOVEA.L src,A0
- MOVEA.L dst,A1
- MOVE.L bytesA,D2
- MOVE.L bytesB,D4
-
- // srcOffsetAtoB = info->srcOffsetAtoB;
- // srcOffsetBtoA = info->srcOffsetBtoA;
- // dstOffsetAtoB = info->dstOffsetAtoB;
- // dstOffsetBtoA = info->dstOffsetBtoA;
- MOVEA.L info,A4
- #define srcOffsetAtoB (A4)
- #define srcOffsetBtoA 4(A4)
- #define dstOffsetAtoB 8(A4)
- #define dstOffsetBtoA 12(A4)
-
- // alignment offset for rect A
- // align = (-((long) dst )) & 3
- MOVE.L A1,D6
- MOVEQ #3,D1
- NEG.L D6
- AND.W D1,D6
-
- // alignment offset for rect B
- // align = (-((long) dst + bytesA + dstOffsetAtoB)) & 3
- MOVE.L A1,D7
- ADD.L D2,D7
- ADD.L dstOffsetAtoB,D7
- NEG.L D7
- AND.W D1,D7
-
- // if ( alignA > bytesA) alignA = bytesA;
- CMP.W D2,D6
- BLE.S @alignAok
- MOVE.W D2,D6
- @alignAok:
- // bytesB -= align;
- SUB.W D6,D2
-
- // if ( alignB > bytesB) alignB = bytesB;
- CMP.W D4,D7
- BLE.S @alignBok
- MOVE.W D4,D7
- @alignBok:
- // bytesB -= align;
- SUB.W D7,D4
-
- //pre-calculate transfer sizes
- MOVE.W #15,D0
- // leftblocks = bytesA >> 6;
- // left = bytesA & 63;
- MOVE.W D2,D3
- LSR.W #6,D3
-
- // calculate words outside blocks
- MOVE.W D2,D1
- ANDI.W #3,D2
- LSR.W #2,D1 // / sizeof(long)
- AND.W D0,D1
- ADD.W D1,D1 // * sizeof(MOVE.L (A0)+,(A1)+)
- LEA @leftloopend,A2
- SUBA.L D1,A2
-
- // rightblocks = bytesB >> 6;
- // right = bytesB & 63;
- MOVE.W D4,D5
- LSR.W #6,D5
-
- // calculate words outside blocks
- MOVE.W D4,D1
- ANDI.W #3,D4
- LSR.W #2,D1 // / sizeof(long)
- AND.W D0,D1
- ADD.W D1,D1 // * sizeof(MOVE.L (A0)+,(A1)+)
- LEA @rightloopend,A3
- SUBA.L D1,A3
-
- // if (rows) do
- // {
- MOVE.L rows,D1
- TST.W D1
- BEQ @end
- @rowloop:
-
- MOVE.W D6,D0
- ANDI.W #1,D0
- BEQ.S @skipalignAbyte
- MOVE.B (A0)+,(A1)+
- @skipalignAbyte:
- MOVE.W D6,D0
- ANDI.W #2,D0
- BEQ.S @skipalignAword
- MOVE.W (A0)+,(A1)+
- @skipalignAword:
-
- // copy 64 byte blocks
- MOVE.W D3,D0
- JMP (A2)
- @leftloop:
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
-
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
-
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
-
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- @leftloopend:
- DBRA D0,@leftloop
-
- MOVE.W D2,D0
- BEQ.S @skipleftbyte
- SUBQ.W #2,D0
- BMI.S @leftbyte
- MOVE.W (A0)+,(A1)+
- TST D0
- BEQ.S @skipleftbyte
- @leftbyte:
- MOVE.B (A0)+,(A1)+
- @skipleftbyte:
-
- // src += srcOffsetAtoB;
- // dst += dstOffsetAtoB;
- ADDA.L srcOffsetAtoB,A0
- ADDA.L dstOffsetAtoB,A1
-
- // align destination to 4-byte boundary
- MOVE.W D7,D0
- ANDI.W #1,D0
- BEQ.S @skipalignBbyte
- MOVE.B (A0)+,(A1)+
- @skipalignBbyte:
- MOVE.W D7,D0
- ANDI.W #2,D0
- BEQ.S @skipalignBword
- MOVE.W (A0)+,(A1)+
- @skipalignBword:
-
- // copy 64 byte blocks
- MOVE.W D5,D0
- JMP (A3)
- @rightloop:
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
-
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
-
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
-
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- MOVE.L (A0)+,(A1)+
- @rightloopend:
- DBRA D0,@rightloop
-
- MOVE.W D4,D0
- BEQ.S @skiprightbyte
- SUBQ.W #2,D0
- BMI.S @rightbyte
- MOVE.W (A0)+,(A1)+
- TST D0
- BEQ.S @skiprightbyte
- @rightbyte:
- MOVE.B (A0)+,(A1)+
- @skiprightbyte:
-
- // src += srcOffsetBtoA;
- // dst += dstOffsetBtoA;
- ADDA.L srcOffsetBtoA,A0
- ADDA.L dstOffsetBtoA,A1
-
- // } while (--rows);
- SUBQ.W #1,D1
- BNE @rowloop
-
- @end:
- #undef srcOffsetAtoB
- #undef srcOffsetBtoA
- #undef dstOffsetAtoB
- #undef dstOffsetBtoA
- MOVEM.L (SP)+,D3-D7/A2-A4
-
- #if __MWERKS__
- frfree
- #endif
-
- SW_ASM_END
- }
-
- #endif //#if !SW_PPC
-
- #pragma mark *** PowerPC asm:
- #if SW_PPC
-
- // This PowerPC asm implementation is
- // ©1997-98 Anders Fredrik Björklund. All rights reserved.
- // mailto:coderonin@geocities.com ¡¡¡ RISC RULEZ !!!
-
- asm void BlitDoubleRects(
- register char *src,
- register char *dst,
- register unsigned long rows,
- register unsigned long bytesA,
- register unsigned long bytesB,
- register OffsetInfoPtr info)
- {
- // LOCAL VARIABLES:
- #define y r31
- #define alignA r30
- #define alignB r29
- #define leftdoubles r30 // recycled
- #define rightdoubles r29 // recycled
-
- #define srcOffsetAtoB r28
- #define srcOffsetBtoA r27
- #define dstOffsetAtoB r26
- #define dstOffsetBtoA r25
-
- #define leftblocks r24
- #define left r23
- #define rightblocks r22
- #define right r21
-
- #define buffer r20
- #define offset r19
-
- #define kRegisterSaveStack (13 * 4)
-
- stmw r19,-kRegisterSaveStack(SP) // save registers on stack (in the "red zone")
-
- // PARAMETERS:
- // src r3
- // dst r4
- #define rows r5
- #define bytesA r6
- #define bytesB r7
- #define info r8
-
- // srcOffsetAtoB = info->srcOffsetAtoB;
- // srcOffsetBtoA = info->srcOffsetBtoA;
- // dstOffsetAtoB = info->dstOffsetAtoB;
- // dstOffsetBtoA = info->dstOffsetBtoA;
- lwz srcOffsetAtoB,0(info)
- lwz srcOffsetBtoA,4(info)
- lwz dstOffsetAtoB,8(info)
- lwz dstOffsetBtoA,12(info)
-
- // get a cache-block aligned stack storage for buffer
- addi buffer,SP,-(kRegisterSaveStack + 32) // (still in the red zone)
- rlwinm buffer,buffer,0,0,26
- dcbtst r0,buffer
-
- // alignment offset for rect A
- // align = (-((long) dst )) & 7
- neg alignA,r4
- rlwinm alignA,alignA,0,29,31
-
- // if ( alignA > bytesA) alignA = bytesA;
- // bytesA -= alignA;
- cmplw alignA,bytesA
- ble @alignAok
- mr alignA,bytesA
- @alignAok:
- sub bytesA,bytesA,alignA
-
- neg r9,r3
- rlwinm r9,r9,0,30,31
-
- // alignment offset for rect B
- // align = (-((long) dst + bytesA + dstOffsetAtoB)) & 7
- add r0,bytesA,dstOffsetAtoB
- add alignB,r4,r0
- neg alignB,alignB
- rlwinm alignB,alignB,0,29,31
-
- // if ( alignB > bytesB) alignB = bytesB;
- // bytesB -= alignB
- cmplw alignB,bytesB
- ble @alignBok
- mr alignB,bytesB
- @alignBok:
- sub bytesB,bytesB,alignB
-
- add r0,bytesA,srcOffsetAtoB
- add r10,r3,r0
- neg r10,r10
- rlwinm r10,r10,0,30,31
-
- // ———————————————————————————————————————————————————————————————————————————
-
- //pre-calculate transfer sizes
- rlwinm leftblocks,bytesA,27,5,31 // leftblocks = bytesA / 32;
- rlwinm left,bytesA,0,27,31 // left = bytesA % 32;
- rlwinm rightblocks,bytesB,27,5,31 // rightblocks = bytesB / 32;
- rlwinm right,bytesB,0,27,31 // right = bytesB % 32;
-
- mr. y,rows
- ble @gohome
-
- mfcr r0 // save CR in r0
- // NOTE : don't use r0 from here and below (except as zero)
-
- rlwinm alignA,alignA,5*4,9,11
- mtcrf 32,alignA // cr2 = alignA & 7
-
- rlwinm alignB,alignB,2*4,21,23
- mtcrf 4,alignB // cr5 = alignB & 7
-
- rlwinm alignA,alignA,12,29,31 // shift back, and with 3
- rlwinm alignB,alignB,24,29,31
- cmplw cr0,alignA,r9
- cmplw cr1,alignB,r10
-
- cmplwi cr3,leftblocks,0
- cmplwi cr4,left,0
- cmplwi cr6,rightblocks,0
- cmplwi cr7,right,0
-
- bne cr0,@leftNotAligned
- rlwinm. leftdoubles,left,29,30,31
- rlwinm left,left,3*4,17,19
- creqv 12,12,12 // crset cr3_LT
- mtcrf 8,left // cr4 = left & 7
- crnor 16,2,2 // cr4_LT = leftdoubles > 0
- @leftNotAligned:
-
- bne cr1,@rightNotAligned
- rlwinm. rightdoubles,right,29,30,31
- rlwinm right,right,0*4,29,31
- creqv 24,24,24 // crset cr6_LT
- mtcrf 1,right // cr7 = right & 7
- crnor 28,2,2 // cr7_LT = rightdoubles > 0
- @rightNotAligned:
-
- li offset,32
- sub r3,r3,offset
- sub r4,r4,offset
-
- @rowloop:
- // ———————————————————————————————————————————————————————————————————————————
- // NOTE: from here on, regs r5-r12 are scratch!
- // NOTE : cr0 is used for rows (y), cr1 is free
-
- // align destination
- bns cr2,@skipalignAByte
- lbz r5,32(r3)
- addi r3,r3,1
- stb r5,32(r4)
- addi r4,r4,1
- @skipalignAByte:
- bne cr2,@skipalignAWord
- lhz r5,32(r3)
- addi r3,r3,2
- sth r5,32(r4)
- addi r4,r4,2
- @skipalignAWord:
- bng cr2,@skipalignALong
- lwz r5,32(r3)
- addi r3,r3,4
- stw r5,32(r4)
- addi r4,r4,4
- @skipalignALong:
-
- // copy 32 byte blocks
- blt cr3,@leftAligned
- beq cr3,@skipleft
- mtctr leftblocks
- @leftloop:
- lwzu r5,32(r3)
- lwz r6,4(r3)
- lwz r7,8(r3)
- lwz r8,12(r3)
- stw r5,0(buffer)
- stw r6,4(buffer)
- stw r7,8(buffer)
- stw r8,12(buffer)
- lwz r9,16(r3)
- lwz r10,20(r3)
- lwz r11,24(r3)
- lwz r12,28(r3)
- lfd fp1,0(buffer)
- lfd fp2,8(buffer)
- stw r9,16(buffer)
- stw r10,20(buffer)
- stw r11,24(buffer)
- stw r12,28(buffer)
- lfd fp3,16(buffer)
- lfd fp4,24(buffer)
- stfdu fp1,32(r4)
- stfd fp2,8(r4)
- stfd fp3,16(r4)
- stfd fp4,24(r4)
- bdnz @leftloop
- @skipleft:
-
- // copy left-over bytes (<32)
- beq cr4,@endleft
- mtxer left
- lswx r5,offset,r3
- add r3,r3,left
- stswx r5,offset,r4
- add r4,r4,left
- b @endleft
-
- @leftAligned:
- beq cr3,@skipleftAligned
- mtctr leftblocks
- @leftloopAligned:
- lfdu fp1,32(r3)
- lfd fp2,8(r3)
- lfd fp3,16(r3)
- lfd fp4,24(r3)
- stfdu fp1,32(r4)
- stfd fp2,8(r4)
- stfd fp3,16(r4)
- stfd fp4,24(r4)
- bdnz @leftloopAligned
- @skipleftAligned:
-
- bnl cr4,@skipADoubles
- mtctr leftdoubles
- @leftloopDouble:
- lfd fp0,32(r3)
- addi r3,r3,8
- stfd fp0,32(r4)
- addi r4,r4,8
- bdnz @leftloopDouble
- @skipADoubles:
- bng cr4,@skipALong
- lwz r5,32(r3)
- addi r3,r3,4
- stw r5,32(r4)
- addi r4,r4,4
- @skipALong:
- bne cr4,@skipAWord
- lhz r5,32(r3)
- addi r3,r3,2
- sth r5,32(r4)
- addi r4,r4,2
- @skipAWord:
- bns cr4,@skipAByte
- lbz r5,32(r3)
- addi r3,r3,1
- stb r5,32(r4)
- addi r4,r4,1
- @skipAByte:
-
- @endleft:
- add r3,r3,srcOffsetAtoB
- add r4,r4,dstOffsetAtoB
-
- // ———————————————————————————————————————————————————————————————————————————
-
- // align destination
- bns cr5,@skipalignBByte
- lbz r5,32(r3)
- addi r3,r3,1
- stb r5,32(r4)
- addi r4,r4,1
- @skipalignBByte:
- bne cr5,@skipalignBWord
- lhz r5,32(r3)
- addi r3,r3,2
- sth r5,32(r4)
- addi r4,r4,2
- @skipalignBWord:
- bng cr5,@skipalignBLong
- lwz r5,32(r3)
- addi r3,r3,4
- stw r5,32(r4)
- addi r4,r4,4
- @skipalignBLong:
-
- // copy 32 byte blocks
- blt cr6,@rightAligned
- beq cr6,@skipright
- mtctr rightblocks
- @rightloop:
- lwzu r5,32(r3)
- lwz r6,4(r3)
- lwz r7,8(r3)
- lwz r8,12(r3)
- stw r5,0(buffer)
- stw r6,4(buffer)
- stw r7,8(buffer)
- stw r8,12(buffer)
- lwz r9,16(r3)
- lwz r10,20(r3)
- lwz r11,24(r3)
- lwz r12,28(r3)
- lfd fp1,0(buffer)
- lfd fp2,8(buffer)
- stw r9,16(buffer)
- stw r10,20(buffer)
- stw r11,24(buffer)
- stw r12,28(buffer)
- lfd fp3,16(buffer)
- lfd fp4,24(buffer)
- stfdu fp1,32(r4)
- stfd fp2,8(r4)
- stfd fp3,16(r4)
- stfd fp4,24(r4)
- bdnz @rightloop
- @skipright:
-
- // copy left-over bytes (<32)
- beq cr7,@endright
- mtxer right
- lswx r5,offset,r3
- add r3,r3,right
- stswx r5,offset,r4
- add r4,r4,right
- b @endright
-
- @rightAligned:
- beq cr6,@skiprightAligned
- mtctr rightblocks
- @rightloopAligned:
- lfdu fp1,32(r3)
- lfd fp2,8(r3)
- lfd fp3,16(r3)
- lfd fp4,24(r3)
- stfdu fp1,32(r4)
- stfd fp2,8(r4)
- stfd fp3,16(r4)
- stfd fp4,24(r4)
- bdnz @rightloopAligned
- @skiprightAligned:
-
- bnl cr7,@skipBDoubles
- mtctr rightdoubles
- @rightloopDouble:
- lfd fp0,32(r3)
- addi r3,r3,8
- stfd fp0,32(r4)
- addi r4,r4,8
- bdnz @rightloopDouble
- @skipBDoubles:
- bng cr7,@skipBLong
- lwz r5,32(r3)
- addi r3,r3,4
- stw r5,32(r4)
- addi r4,r4,4
- @skipBLong:
- bne cr7,@skipBWord
- lhz r5,32(r3)
- addi r3,r3,2
- sth r5,32(r4)
- addi r4,r4,2
- @skipBWord:
- bns cr7,@skipBByte
- lbz r5,32(r3)
- addi r3,r3,1
- stb r5,32(r4)
- addi r4,r4,1
- @skipBByte:
-
- @endright:
-
- // ———————————————————————————————————————————————————————————————————————————
-
- subic. y,y,1
-
- add r3,r3,srcOffsetBtoA
- add r4,r4,dstOffsetBtoA
-
- bne @rowloop
-
- @end:
- mtcrf 0xFF,r0 // restore CR (mtcr r0)
- @gohome:
- #undef y
- #undef leftblocks
- #undef left
- #undef rightblocks
- #undef right
- #undef alignA
- #undef alignB
- #undef srcOffsetAtoB
- #undef srcOffsetBtoA
- #undef dstOffsetAtoB
- #undef dstOffsetBtoA
- #undef buffer
- #undef offset
- lmw r19,-kRegisterSaveStack(SP) // restore registers from stack
- #undef rows
- #undef bytesA
- #undef bytesB
- #undef info
- blr
- }
- #endif //#if SW_PPC
-
- #endif //#if THEORY/USE_C/USE_ASM