home *** CD-ROM | disk | FTP | other *** search
- // VisualBoyAdvance - Nintendo Gameboy/GameboyAdvance (TM) emulator.
- // Copyright (C) 1999-2003 Forgotten
- // Copyright (C) 2004 Forgotten and the VBA development team
-
- // This program is free software; you can redistribute it and/or modify
- // it under the terms of the GNU General Public License as published by
- // the Free Software Foundation; either version 2, or(at your option)
- // any later version.
- //
- // This program is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- //
- // You should have received a copy of the GNU General Public License
- // along with this program; if not, write to the Free Software Foundation,
- // Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #include "System.h"
- #include <stdlib.h>
- #include <memory.h>
-
- #ifdef MMX
- extern "C" bool cpu_mmx;
- #endif
-
- /*
- * Thanks to Kawaks' Mr. K for the code
-
- Incorporated into vba by Anthony Di Franco
- */
-
- static u8 *frm1 = NULL;
- static u8 *frm2 = NULL;
- static u8 *frm3 = NULL;
-
- extern int RGB_LOW_BITS_MASK;
- extern u32 qRGB_COLOR_MASK[2];
-
- static void Init()
- {
- frm1 = (u8 *)calloc(322*242,4);
- // 1 frame ago
- frm2 = (u8 *)calloc(322*242,4);
- // 2 frames ago
- frm3 = (u8 *)calloc(322*242,4);
- // 3 frames ago
- }
-
- void InterframeCleanup()
- {
- if(frm1)
- free(frm1);
- if(frm2)
- free(frm2);
- if(frm3)
- free(frm3);
- frm1 = frm2 = frm3 = NULL;
- }
-
- #ifdef MMX
- static void SmartIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
- {
- u16 *src0 = (u16 *)srcPtr;
- u16 *src1 = (u16 *)frm1;
- u16 *src2 = (u16 *)frm2;
- u16 *src3 = (u16 *)frm3;
-
- int count = width >> 2;
-
- for(int i = 0; i < height; i++) {
- #ifdef __GNUC__
- asm volatile (
- "push %4\n"
- "movq 0(%5), %%mm7\n" // colorMask
- "0:\n"
- "movq 0(%0), %%mm0\n" // src0
- "movq 0(%1), %%mm1\n" // src1
- "movq 0(%2), %%mm2\n" // src2
- "movq 0(%3), %%mm3\n" // src3
- "movq %%mm0, 0(%3)\n" // src3 = src0
- "movq %%mm0, %%mm4\n"
- "movq %%mm1, %%mm5\n"
- "pcmpeqw %%mm2, %%mm5\n" // src1 == src2 (A)
- "pcmpeqw %%mm3, %%mm4\n" // src3 == src0 (B)
- "por %%mm5, %%mm4\n" // A | B
- "movq %%mm2, %%mm5\n"
- "pcmpeqw %%mm0, %%mm5\n" // src0 == src2 (C)
- "pcmpeqw %%mm1, %%mm3\n" // src1 == src3 (D)
- "por %%mm3, %%mm5\n" // C|D
- "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
- "movq %%mm0, %%mm2\n"
- "pand %%mm7, %%mm2\n" // color & colorMask
- "pand %%mm7, %%mm1\n" // src1 & colorMask
- "psrlw $1, %%mm2\n" // (color & colorMask) >> 1 (E)
- "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
- "paddw %%mm2, %%mm1\n" // E+F
- "pand %%mm4, %%mm1\n" // (E+F) & res
- "pandn %%mm0, %%mm4\n" // color& !res
-
- "por %%mm1, %%mm4\n"
- "movq %%mm4, 0(%0)\n" // src0 = res
-
- "addl $8, %0\n"
- "addl $8, %1\n"
- "addl $8, %2\n"
- "addl $8, %3\n"
-
- "decl %4\n"
- "jnz 0b\n"
- "pop %4\n"
- "emms\n"
- : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
- : "r" (count), "r" (qRGB_COLOR_MASK)
- );
- #else
- __asm {
- movq mm7, qword ptr [qRGB_COLOR_MASK];
- mov eax, src0;
- mov ebx, src1;
- mov ecx, src2;
- mov edx, src3;
- mov edi, count;
- label0:
- movq mm0, qword ptr [eax]; // src0
- movq mm1, qword ptr [ebx]; // src1
- movq mm2, qword ptr [ecx]; // src2
- movq mm3, qword ptr [edx]; // src3
- movq qword ptr [edx], mm0; // src3 = src0
- movq mm4, mm0;
- movq mm5, mm1;
- pcmpeqw mm5, mm2; // src1 == src2 (A)
- pcmpeqw mm4, mm3; // src3 == src0 (B)
- por mm4, mm5; // A | B
- movq mm5, mm2;
- pcmpeqw mm5, mm0; // src0 == src2 (C)
- pcmpeqw mm3, mm1; // src1 == src3 (D)
- por mm5, mm3; // C|D
- pandn mm4, mm5; // (!(A|B))&(C|D)
- movq mm2, mm0;
- pand mm2, mm7; // color & colorMask
- pand mm1, mm7; // src1 & colorMask
- psrlw mm2, 1; // (color & colorMask) >> 1 (E)
- psrlw mm1, 1; // (src & colorMask) >> 1 (F)
- paddw mm1, mm2; // E+F
- pand mm1, mm4; // (E+F) & res
- pandn mm4, mm0; // color & !res
-
- por mm4, mm1;
- movq qword ptr [eax], mm4; // src0 = res
-
- add eax, 8;
- add ebx, 8;
- add ecx, 8;
- add edx, 8;
-
- dec edi;
- jnz label0;
- mov src0, eax;
- mov src1, ebx;
- mov src2, ecx;
- mov src3, edx;
- emms;
- }
- #endif
- src0+=2;
- src1+=2;
- src2+=2;
- src3+=2;
- }
-
- /* Swap buffers around */
- u8 *temp = frm1;
- frm1 = frm3;
- frm3 = frm2;
- frm2 = temp;
- }
- #endif
-
- void SmartIB(u8 *srcPtr, u32 srcPitch, int width, int height)
- {
- if(frm1 == NULL) {
- Init();
- }
- #ifdef MMX
- if(cpu_mmx) {
- SmartIB_MMX(srcPtr, srcPitch, width, height);
- return;
- }
- #endif
-
- u16 colorMask = ~RGB_LOW_BITS_MASK;
-
- u16 *src0 = (u16 *)srcPtr;
- u16 *src1 = (u16 *)frm1;
- u16 *src2 = (u16 *)frm2;
- u16 *src3 = (u16 *)frm3;
-
- int sPitch = srcPitch >> 1;
-
- int pos = 0;
- for (int j = 0; j < height; j++)
- for (int i = 0; i < sPitch; i++) {
- u16 color = src0[pos];
- src0[pos] =
- (src1[pos] != src2[pos]) &&
- (src3[pos] != color) &&
- ((color == src2[pos]) || (src1[pos] == src3[pos]))
- ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
- color;
- src3[pos] = color; /* oldest buffer now holds newest frame */
- pos++;
- }
-
- /* Swap buffers around */
- u8 *temp = frm1;
- frm1 = frm3;
- frm3 = frm2;
- frm2 = temp;
- }
-
- #ifdef MMX
- static void SmartIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
- {
- u32 *src0 = (u32 *)srcPtr;
- u32 *src1 = (u32 *)frm1;
- u32 *src2 = (u32 *)frm2;
- u32 *src3 = (u32 *)frm3;
-
- int count = width >> 1;
-
- for(int i = 0; i < height; i++) {
- #ifdef __GNUC__
- asm volatile (
- "push %4\n"
- "movq 0(%5), %%mm7\n" // colorMask
- "0:\n"
- "movq 0(%0), %%mm0\n" // src0
- "movq 0(%1), %%mm1\n" // src1
- "movq 0(%2), %%mm2\n" // src2
- "movq 0(%3), %%mm3\n" // src3
- "movq %%mm0, 0(%3)\n" // src3 = src0
- "movq %%mm0, %%mm4\n"
- "movq %%mm1, %%mm5\n"
- "pcmpeqd %%mm2, %%mm5\n" // src1 == src2 (A)
- "pcmpeqd %%mm3, %%mm4\n" // src3 == src0 (B)
- "por %%mm5, %%mm4\n" // A | B
- "movq %%mm2, %%mm5\n"
- "pcmpeqd %%mm0, %%mm5\n" // src0 == src2 (C)
- "pcmpeqd %%mm1, %%mm3\n" // src1 == src3 (D)
- "por %%mm3, %%mm5\n" // C|D
- "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
- "movq %%mm0, %%mm2\n"
- "pand %%mm7, %%mm2\n" // color & colorMask
- "pand %%mm7, %%mm1\n" // src1 & colorMask
- "psrld $1, %%mm2\n" // (color & colorMask) >> 1 (E)
- "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
- "paddd %%mm2, %%mm1\n" // E+F
- "pand %%mm4, %%mm1\n" // (E+F) & res
- "pandn %%mm0, %%mm4\n" // color& !res
-
- "por %%mm1, %%mm4\n"
- "movq %%mm4, 0(%0)\n" // src0 = res
-
- "addl $8, %0\n"
- "addl $8, %1\n"
- "addl $8, %2\n"
- "addl $8, %3\n"
-
- "decl %4\n"
- "jnz 0b\n"
- "pop %4\n"
- "emms\n"
- : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
- : "r" (count), "r" (qRGB_COLOR_MASK)
- );
- #else
- __asm {
- movq mm7, qword ptr [qRGB_COLOR_MASK];
- mov eax, src0;
- mov ebx, src1;
- mov ecx, src2;
- mov edx, src3;
- mov edi, count;
- label0:
- movq mm0, qword ptr [eax]; // src0
- movq mm1, qword ptr [ebx]; // src1
- movq mm2, qword ptr [ecx]; // src2
- movq mm3, qword ptr [edx]; // src3
- movq qword ptr [edx], mm0; // src3 = src0
- movq mm4, mm0;
- movq mm5, mm1;
- pcmpeqd mm5, mm2; // src1 == src2 (A)
- pcmpeqd mm4, mm3; // src3 == src0 (B)
- por mm4, mm5; // A | B
- movq mm5, mm2;
- pcmpeqd mm5, mm0; // src0 == src2 (C)
- pcmpeqd mm3, mm1; // src1 == src3 (D)
- por mm5, mm3; // C|D
- pandn mm4, mm5; // (!(A|B))&(C|D)
- movq mm2, mm0;
- pand mm2, mm7; // color & colorMask
- pand mm1, mm7; // src1 & colorMask
- psrld mm2, 1; // (color & colorMask) >> 1 (E)
- psrld mm1, 1; // (src & colorMask) >> 1 (F)
- paddd mm1, mm2; // E+F
- pand mm1, mm4; // (E+F) & res
- pandn mm4, mm0; // color & !res
-
- por mm4, mm1;
- movq qword ptr [eax], mm4; // src0 = res
-
- add eax, 8;
- add ebx, 8;
- add ecx, 8;
- add edx, 8;
-
- dec edi;
- jnz label0;
- mov src0, eax;
- mov src1, ebx;
- mov src2, ecx;
- mov src3, edx;
- emms;
- }
- #endif
-
- src0++;
- src1++;
- src2++;
- src3++;
- }
- /* Swap buffers around */
- u8 *temp = frm1;
- frm1 = frm3;
- frm3 = frm2;
- frm2 = temp;
- }
- #endif
-
- void SmartIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
- {
- if(frm1 == NULL) {
- Init();
- }
- #ifdef MMX
- if(cpu_mmx) {
- SmartIB32_MMX(srcPtr, srcPitch, width, height);
- return;
- }
- #endif
-
- u32 *src0 = (u32 *)srcPtr;
- u32 *src1 = (u32 *)frm1;
- u32 *src2 = (u32 *)frm2;
- u32 *src3 = (u32 *)frm3;
-
- u32 colorMask = 0xfefefe;
-
- int sPitch = srcPitch >> 2;
- int pos = 0;
-
- for (int j = 0; j < height; j++)
- for (int i = 0; i < sPitch; i++) {
- u32 color = src0[pos];
- src0[pos] =
- (src1[pos] != src2[pos]) &&
- (src3[pos] != color) &&
- ((color == src2[pos]) || (src1[pos] == src3[pos]))
- ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
- color;
- src3[pos] = color; /* oldest buffer now holds newest frame */
- pos++;
- }
-
- /* Swap buffers around */
- u8 *temp = frm1;
- frm1 = frm3;
- frm3 = frm2;
- frm2 = temp;
- }
-
- #ifdef MMX
- static void MotionBlurIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
- {
- u16 *src0 = (u16 *)srcPtr;
- u16 *src1 = (u16 *)frm1;
-
- int count = width >> 2;
-
- for(int i = 0; i < height; i++) {
- #ifdef __GNUC__
- asm volatile (
- "push %2\n"
- "movq 0(%3), %%mm7\n" // colorMask
- "0:\n"
- "movq 0(%0), %%mm0\n" // src0
- "movq 0(%1), %%mm1\n" // src1
- "movq %%mm0, 0(%1)\n" // src1 = src0
- "pand %%mm7, %%mm0\n" // color & colorMask
- "pand %%mm7, %%mm1\n" // src1 & colorMask
- "psrlw $1, %%mm0\n" // (color & colorMask) >> 1 (E)
- "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
- "paddw %%mm1, %%mm0\n" // E+F
-
- "movq %%mm0, 0(%0)\n" // src0 = res
-
- "addl $8, %0\n"
- "addl $8, %1\n"
-
- "decl %2\n"
- "jnz 0b\n"
- "pop %2\n"
- "emms\n"
- : "+r" (src0), "+r" (src1)
- : "r" (count), "r" (qRGB_COLOR_MASK)
- );
- #else
- __asm {
- movq mm7, qword ptr [qRGB_COLOR_MASK];
- mov eax, src0;
- mov ebx, src1;
- mov edi, count;
- label0:
- movq mm0, qword ptr [eax]; // src0
- movq mm1, qword ptr [ebx]; // src1
- movq qword ptr [ebx], mm0; // src1 = src0
- pand mm0, mm7; // color & colorMask
- pand mm1, mm7; // src1 & colorMask
- psrlw mm0, 1; // (color & colorMask) >> 1 (E)
- psrlw mm1, 1; // (src & colorMask) >> 1 (F)
- paddw mm0, mm1; // E+F
-
- movq qword ptr [eax], mm0; // src0 = res
-
- add eax, 8;
- add ebx, 8;
-
- dec edi;
- jnz label0;
- mov src0, eax;
- mov src1, ebx;
- emms;
- }
- #endif
- src0+=2;
- src1+=2;
- }
- }
- #endif
-
- void MotionBlurIB(u8 *srcPtr, u32 srcPitch, int width, int height)
- {
- if(frm1 == NULL) {
- Init();
- }
-
- #ifdef MMX
- if(cpu_mmx) {
- MotionBlurIB_MMX(srcPtr, srcPitch, width, height);
- return;
- }
- #endif
-
- u16 colorMask = ~RGB_LOW_BITS_MASK;
-
- u16 *src0 = (u16 *)srcPtr;
- u16 *src1 = (u16 *)frm1;
-
- int sPitch = srcPitch >> 1;
-
- int pos = 0;
- for (int j = 0; j < height; j++)
- for (int i = 0; i < sPitch; i++) {
- u16 color = src0[pos];
- src0[pos] =
- (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
- src1[pos] = color;
- pos++;
- }
- }
-
- #ifdef MMX
- static void MotionBlurIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
- {
- u32 *src0 = (u32 *)srcPtr;
- u32 *src1 = (u32 *)frm1;
-
- int count = width >> 1;
-
- for(int i = 0; i < height; i++) {
- #ifdef __GNUC__
- asm volatile (
- "push %2\n"
- "movq 0(%3), %%mm7\n" // colorMask
- "0:\n"
- "movq 0(%0), %%mm0\n" // src0
- "movq 0(%1), %%mm1\n" // src1
- "movq %%mm0, 0(%1)\n" // src1 = src0
- "pand %%mm7, %%mm0\n" // color & colorMask
- "pand %%mm7, %%mm1\n" // src1 & colorMask
- "psrld $1, %%mm0\n" // (color & colorMask) >> 1 (E)
- "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
- "paddd %%mm1, %%mm0\n" // E+F
-
- "movq %%mm0, 0(%0)\n" // src0 = res
-
- "addl $8, %0\n"
- "addl $8, %1\n"
-
- "decl %2\n"
- "jnz 0b\n"
- "pop %2\n"
- "emms\n"
- : "+r" (src0), "+r" (src1)
- : "r" (count), "r" (qRGB_COLOR_MASK)
- );
- #else
- __asm {
- movq mm7, qword ptr [qRGB_COLOR_MASK];
- mov eax, src0;
- mov ebx, src1;
- mov edi, count;
- label0:
- movq mm0, qword ptr [eax]; // src0
- movq mm1, qword ptr [ebx]; // src1
- movq qword ptr [ebx], mm0; // src1 = src0
- pand mm0, mm7; // color & colorMask
- pand mm1, mm7; // src1 & colorMask
- psrld mm0, 1; // (color & colorMask) >> 1 (E)
- psrld mm1, 1; // (src & colorMask) >> 1 (F)
- paddd mm0, mm1; // E+F
-
- movq qword ptr [eax], mm0; // src0 = res
-
- add eax, 8;
- add ebx, 8;
-
- dec edi;
- jnz label0;
- mov src0, eax;
- mov src1, ebx;
- emms;
- }
- #endif
- src0++;
- src1++;
- }
- }
- #endif
-
- void MotionBlurIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
- {
- if(frm1 == NULL) {
- Init();
- }
-
- #ifdef MMX
- if(cpu_mmx) {
- MotionBlurIB32_MMX(srcPtr, srcPitch, width, height);
- return;
- }
- #endif
-
- u32 *src0 = (u32 *)srcPtr;
- u32 *src1 = (u32 *)frm1;
-
- u32 colorMask = 0xfefefe;
-
- int sPitch = srcPitch >> 2;
- int pos = 0;
-
- for (int j = 0; j < height; j++)
- for (int i = 0; i < sPitch; i++) {
- u32 color = src0[pos];
- src0[pos] = (((color & colorMask) >> 1) +
- ((src1[pos] & colorMask) >> 1));
- src1[pos] = color;
- pos++;
- }
- }
-
- static int count = 0;
-
- void InterlaceIB(u8 *srcPtr, u32 srcPitch, int width, int height)
- {
- if(frm1 == NULL) {
- Init();
- }
-
- u16 colorMask = ~RGB_LOW_BITS_MASK;
-
- u16 *src0 = (u16 *)srcPtr;
- u16 *src1 = (u16 *)frm1;
-
- int sPitch = srcPitch >> 1;
-
- int pos = 0;
- for (int j = 0; j < height; j++) {
- bool render = count ? (j & 1) != 0 : (j & 1) == 0;
- if(render) {
- for (int i = 0; i < sPitch; i++) {
- u16 color = src0[pos];
- src0[pos] =
- (((color & colorMask) >> 1) + ((((src1[pos] & colorMask) >> 1) & colorMask) >> 1));
- src1[pos] = color;
- pos++;
- }
- } else {
- for (int i = 0; i < sPitch; i++) {
- u16 color = src0[pos];
- src0[pos] =
- (((((color & colorMask) >> 1) & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
- src1[pos] = color;
- pos++;
- }
- }
- }
- count = count ^ 1;
- }
-