home *** CD-ROM | disk | FTP | other *** search
- segment .rdata, align=16
-
- x0020w dq 00020002000200020h
- rb_mask_555 dq 07c1f7c1f7c1f7c1fh
- g_mask_555 dq 003e003e003e003e0h
- rb_mask_888 dq 000ff00ff00ff00ffh
- g_mask_888 dq 00000ff000000ff00h
-
- segment .text
-
- struc VDPixmapReferenceStretchBltBilinearParameters
- .dst resd 1
- .src resd 1
- .u resd 1
- .uinc resd 1
- .dudx resd 1
-
- .xprepos resd 1
- .xpostpos resd 1
- .xprecopy resd 1
- .xpostcopy resd 1
- .xmidsize resd 1
- endstruc
-
-
-
- global _vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX
- _vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX:
- push ebp
- push edi
- push esi
- push ebx
-
- mov eax, [esp+20+16]
- and eax, 0f8000000h
- mov ebx, [esp+8+16]
- mov ecx, [esp+12+16]
- jz .noreverse
- xchg ebx, ecx
- js .noreverse
- neg eax
- xchg ebx, ecx
- .noreverse:
- shr eax, 16
- mov [esp+20+16], eax
- mov edx, [esp+4+16]
- mov eax, [esp+16+16]
- add eax, eax
- lea ebx, [ebx+eax-6]
- lea ecx, [ecx+eax-6]
- lea edx, [edx+eax-6]
- neg eax
-
- movd mm4, dword [esp+20+16]
- punpcklwd mm4, mm4
- punpckldq mm4, mm4
-
- movq mm6, [rb_mask_555]
- movq mm7, [g_mask_555]
-
- .xstart:
- add eax, 6
- jbe .doodd
- .xloop:
- movq mm0, [ebx+eax]
- movq mm1, [ecx+eax]
- movq mm2, mm7
- movq mm3, mm7
-
- pand mm2, mm0
- pand mm3, mm1
- pand mm0, mm6
- pand mm1, mm6
-
- psubw mm3, mm2
- psubw mm1, mm0
-
- pmulhw mm3, mm4
- pmulhw mm1, mm4
-
- psubw mm0, mm1
- psubw mm2, mm3
-
- pand mm0, mm6
- pand mm2, mm7
-
- paddw mm0, mm2
-
- movq [edx+eax], mm0
- add eax, 8
- jnc .xloop
-
- .doodd:
- sub eax, 6
- jz .noodd
- .odd:
- movzx esi, word [ebx+eax+6]
- movd mm0, esi
- movzx esi, word [ecx+eax+6]
- movd mm1, esi
- movq mm2, mm7
- movq mm3, mm7
-
- pand mm2, mm0
- pand mm3, mm1
- pand mm0, mm6
- pand mm1, mm6
-
- psubw mm3, mm2
- psubw mm1, mm0
-
- pmulhw mm3, mm4
- pmulhw mm1, mm4
-
- psubw mm0, mm1
- psubw mm2, mm3
-
- pand mm0, mm6
- pand mm2, mm7
-
- paddw mm0, mm2
-
- movd esi, mm0
- mov [edx+eax+6], si
- add eax,2
- jne .odd
-
- .noodd:
- emms
- pop ebx
- pop esi
- pop edi
- pop ebp
- ret
-
-
- global _vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX
- _vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX:
- push ebp
- push edi
- push esi
- push ebx
-
- mov edx, [esp+4+16]
-
- mov ebx, [edx+VDPixmapReferenceStretchBltBilinearParameters.src]
- mov edi, [edx+VDPixmapReferenceStretchBltBilinearParameters.dst]
-
- mov ecx, [edx+VDPixmapReferenceStretchBltBilinearParameters.xprecopy]
- or ecx, ecx
- jz .noprecopy
- mov eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.xprepos]
- mov eax, [ebx+eax]
- lea ebp, [ecx*4]
- sub edi, ebp
- rep stosd
- .noprecopy:
- mov ebp, [edx+VDPixmapReferenceStretchBltBilinearParameters.xmidsize]
- add ebp, ebp
- add ebp, ebp
- add edi, ebp
- neg ebp
-
- mov esi, [edx+VDPixmapReferenceStretchBltBilinearParameters.u]
- mov eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.dudx]
- mov edx, [edx+VDPixmapReferenceStretchBltBilinearParameters.uinc]
- movd mm2, esi
- movd mm3, eax
- shr ebx, 2
-
- movq mm5, mm2
- punpcklwd mm5, mm5
- punpckhdq mm5, mm5
- movq mm4, mm5
- psraw mm4, 15
-
- .xloop:
- movd mm0, dword [ebx*4]
- pxor mm7, mm7
- movd mm1, dword [ebx*4+4]
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- psubw mm1, mm0
- pand mm4, mm1
- pmulhw mm1, mm5
- paddw mm1, mm4
- paddw mm0, mm1
- packuswb mm0, mm0
- movd dword [edi+ebp], mm0
-
- add esi, eax
- adc ebx, edx
-
- paddd mm2, mm3
- movq mm5, mm2
- punpcklwd mm5, mm5
- punpckhdq mm5, mm5
- movq mm4, mm5
- psraw mm4, 15
- add ebp, 4
- jnz .xloop
-
- mov edx, [esp+4+16]
- mov ecx, [edx+VDPixmapReferenceStretchBltBilinearParameters.xpostcopy]
- or ecx, ecx
- jz .nopostcopy
- mov eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.xpostpos]
- add eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.src]
- mov eax, [eax]
- rep stosd
- .nopostcopy:
-
- emms
- pop ebx
- pop esi
- pop edi
- pop ebp
- ret
-
- global _vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX
- _vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX:
- push ebp
- push edi
- push esi
- push ebx
-
- mov eax, [esp+20+16]
- and eax, 0ff000000h
- mov ebx, [esp+8+16]
- mov ecx, [esp+12+16]
- jz .noreverse
- xchg ebx, ecx
- js .noreverse
- neg eax
- xchg ebx, ecx
- .noreverse:
- shr eax, 16
- mov [esp+20+16], eax
- mov edx, [esp+4+16]
- mov eax, [esp+16+16]
- add eax, eax
- add eax, eax
- lea ebx, [ebx+eax-4]
- lea ecx, [ecx+eax-4]
- lea edx, [edx+eax-4]
- neg eax
-
- movd mm4, dword [esp+20+16]
- punpcklwd mm4, mm4
- punpckldq mm4, mm4
-
- movq mm6, [rb_mask_888]
- movq mm7, [g_mask_888]
-
- .xstart:
- add eax, 4
- jbe .doodd
- .xloop:
- movq mm0, [ebx+eax]
- movq mm1, [ecx+eax]
- movq mm2, mm0
- movq mm3, mm1
- psrlw mm2, 8
- psrlw mm3, 8
- pand mm0, mm6
- pand mm1, mm6
-
- psubw mm3, mm2
- psubw mm1, mm0
-
- pmulhw mm3, mm4
- pmulhw mm1, mm4
-
- psubw mm0, mm1
- psubw mm2, mm3
-
- pand mm0, mm6
-
- psllw mm2, 8
-
- paddw mm0, mm2
-
- movq qword [edx+eax], mm0
- add eax, 8
- jnc .xloop
-
- .doodd:
- sub eax, 4
- jz .noodd
- .odd:
- movd mm0, dword [ebx]
- movd mm1, dword [ecx]
- movq mm2, mm0
- movq mm3, mm1
- psrlw mm2, 8
- psrlw mm3, 8
- pand mm0, mm6
- pand mm1, mm6
-
- psubw mm3, mm2
- psubw mm1, mm0
-
- pmulhw mm3, mm4
- pmulhw mm1, mm4
-
- psubw mm0, mm1
- psubw mm2, mm3
-
- pand mm0, mm6
-
- psllw mm2, 8
-
- paddw mm0, mm2
-
- movd dword [edx], mm0
-
- .noodd:
- emms
- pop ebx
- pop esi
- pop edi
- pop ebp
- ret
-
-
- end
-