home *** CD-ROM | disk | FTP | other *** search
- /*
- * FlMemcpy.h - Our own mem copy routine
- *
- * Copyright (C) Alberto Vigata - January 2000 - ultraflask@yahoo.com
- *
- * This file is part of FlasKMPEG, a free MPEG to MPEG/AVI converter
- *
- * FlasKMPEG is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * FlasKMPEG is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
- #ifndef FLMEMCPY_H
- #define FLMEMCPY_H
-
- void static flmemcpy(void *dest, void *src, int n)
- {
-
- #if 1
- __asm
- {
- mov edi,dest
- mov esi,src
- mov ecx,n
- push ecx
-
- // align 64 byte
- and ecx,63
- rep movsb
- pop ecx
- shr ecx,6
- cmp ecx,0
- je _mmx_copy_end
-
- _mmx_copy:
-
- movq mm0,[esi+0]
- movq mm1,[esi+8]
- movq [edi+0],mm0
- movq [edi+8],mm1
- movq mm2,[esi+16]
- movq mm3,[esi+24]
- movq [edi+16],mm2
- movq [edi+24],mm3
- movq mm0,[esi+32]
- movq mm1,[esi+40]
- movq [edi+32],mm0
- movq [edi+40],mm1
- movq mm2,[esi+48]
- movq mm3,[esi+56]
- movq [edi+48],mm2
- movq [edi+56],mm3
-
- add esi,64
- add edi,64
- dec ecx
- jnz _mmx_copy
-
- _mmx_copy_end:
- emms
- }
- #else
- __asm
- {
- mov esi, src
- mov ecx, n
- mov ebx, ecx
- shr ebx, 11 // 2048 bytes at a time
- mov edi, dest
-
- loop2k: // Copy 2k into temporary buffer
- push edi
- mov edi, tbuf
- mov ecx, 2048
- shr ecx, 6
-
- loopMemToL1:
- prefetchnta 64[ESI] // Prefetch next loop, non-temporal
- prefetchnta 96[ESI]
-
- movq mm1, 0[ESI] // Read in source data
- movq mm2, 8[ESI]
- movq mm3, 16[ESI]
- movq mm4, 24[ESI]
- movq mm5, 32[ESI]
- movq mm6, 40[ESI]
- movq mm7, 48[ESI]
- movq mm0, 56[ESI]
-
- movq 0[EDI], mm1 // Store into L1
- movq 8[EDI], mm2
- movq 16[EDI], mm3
- movq 24[EDI], mm4
- movq 32[EDI], mm5
- movq 40[EDI], mm6
- movq 48[EDI], mm7
- movq 56[EDI], mm0
- add esi, 64
- add edi, 64
- dec ecx
- jnz loopMemToL1
-
- pop edi // Now copy from L1 to system memory
- push esi
- mov esi, tbuf
- mov ecx, 2048
- shr ecx, 6
-
- loopL1ToMem:
- movq mm1, 0[ESI] // Read in source data from L1
- movq mm2, 8[ESI]
- movq mm3, 16[ESI]
- movq mm4, 24[ESI]
- movq mm5, 32[ESI]
- movq mm6, 40[ESI]
- movq mm7, 48[ESI]
- movq mm0, 56[ESI]
-
- movntq 0[EDI], mm1 // Non-temporal stores
- movntq 8[EDI], mm2
- movntq 16[EDI], mm3
- movntq 24[EDI], mm4
- movntq 32[EDI], mm5
- movntq 40[EDI], mm6
- movntq 48[EDI], mm7
- movntq 56[EDI], mm0
-
- add esi, 64
- add edi, 64
- dec ecx
- jnz loopL1ToMem
-
- pop esi // Do next 2k block
- dec ebx
- jnz loop2k
- }
- #endif
- }
-
- #endif FLMEMCPY_H