home *** CD-ROM | disk | FTP | other *** search
- /* strcmp (s1, s2) -- Compare S1 and S2, returning less than, equal to or
- greater than zero if S1 is lexicographically less than,
- equal to or greater than S2.
- For Intel 80x86, x>=3.
- Copyright (C) 1994 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@ira.uka.de>
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If
- not, write to the Free Software Foundation, Inc., 675 Mass Ave,
- Cambridge, MA 02139, USA. */
-
- #include <string.h>
-
- #include "asm-ops.h"
-
- int
- strcmp(char * s1, const char * s2)
- {
- register int __res;
- __asm__(
- "cmpl %1,%2\n\t"
- "jae L1ls2\n\t"
-
- "cmpl ____brk_addr,%1\n\t"
- "jae L1gt2h\n\t"
-
- "testl $3,%1\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
-
- "testl $3,%1\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
-
- "testl $3,%1\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
- "jmp LFast\n"
-
- "L1gt2h:\n\t"
- "cmpl %%esp,%2\n\t"
- "jb LNorm\n\t"
- "jmp LFast\n"
-
- "L1ls2: \n\t"
- "cmpl ____brk_addr,%2\n\t"
- "jae L1ls2h\n\t"
-
- "testb $3,%2\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
-
- "testb $3,%2\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
-
- "testb $3,%2\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
- "jmp LFast\n"
-
- "L1ls2h:\n\t"
- "cmpl %%esp,%1\n\t"
- "jae LFast\n"
-
- "LNorm:\n\t"
- "subl %1,%2\n\t" /* only one loop pointer by index adressing */
- "subl $4,%1\n\t" /* prepare initial incrementation */
-
- ALIGN "\n" /* align for better loop performance */
-
- LL(6) "\taddl $4,%1\n\t" /* increment loop pointer */
-
- "movb (%1),%%al\n\t" /* load actual character from S1 in %al */
- "movb (%2,%1),%%ah\n\t" /* load actual character from S2 in %ah */
- "subb %%ah,%%al\n\t" /* different ? */
- "jnz " LF(5) "\n\t" /* yes, branch */
- "testb %%ah,%%ah\n\t" /* NULL characters? */
- "jz " LF(2) "\n\t" /* yes, branch */
-
- "movb 1(%1),%%al\n\t"
- "movb 1(%2,%1),%%ah\n\t"
- "subb %%ah,%%al\n\t"
- "jnz " LF(5) "\n\t"
- "testb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
-
- "movb 2(%1),%%al\n\t"
- "movb 2(%2,%1),%%ah\n\t"
- "subb %%ah,%%al\n\t"
- "jnz " LF(5) "\n\t"
- "testb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
-
- "movb 3(%1),%%al\n\t"
- "movb 3(%2,%1),%%ah\n\t"
- "subb %%ah,%%al\n\t"
- "jnz " LF(5) "\n\t"
- "testb %%ah,%%ah\n\t"
- "jnz " LB(6) "\n"
-
- "xorl %0,%0\n\t"
- "jmp " LF(4) "\n\t"
-
-
- /* Some optimizations:
- * 0. use as less jumps as possible (this is what has to be done
- * ALWAYS)
- * 1. four times unfolded loop
- * 2. loop pointer increment only at the end of the loop
- * 3. only one loop pointer by using index adressing
- * 4. don't use register used for register variables (i.e. which
- * which must be saved)
- */
-
- "LFast:\n\t"
- "subl %1,%2\n" /* only one loop pointer by index adressing */
- "subl $16,%1\n\t" /* prepare initial incrementation */
-
- ".align 4,0x90\n" /* align for better loop performance */
-
- /* Each loop round we compare 4 x 4 bytes. If the longwords
- are equal we have to test for the end of the string (NULL
- char). This is done by adding MAGIC_BITS to the LONGWORD.
- If this fails to change any of the hole bits of LONGWORD
- we have two equal strings.
-
- 1) Is this safe? Will it catch all the zero bytes?
- Suppose there is a byte with all zeros. Any carry bits
- propagating from its left will fall into the hole at its
- least significant bit and stop. Since there will be no
- carry from its most significant bit, the LSB of the
- byte to the left will be unchanged, and the zero will be
- detected.
-
- 2) Is this worthwhile? Will it ignore everything except
- zero bytes? Suppose every byte of LONGWORD has a bit set
- somewhere. There will be a carry into bit 8. If bit 8
- is set, this will carry into bit 16. If bit 8 is clear,
- one of bits 9-15 must be set, so there will be a carry
- into bit 16. Similarly, there will be a carry into bit
- 24. If one of bits 24-31 is set, there will be a carry
- into bit 32 (=carry flag), so all of the hole bits will
- be changed. */
-
- LL(1) "\taddl $16,%1\n\t" /* increment loop pointer */
-
- "movl (%1),%0\n\t" /* load actual character from S1 in %al */
- "movl %0,%%ecx\n\t" /* save it for later */
- "subl (%2,%1),%%ecx\n\t"/* is it different from second string ? */
- "jnz " LF(3) "\n\t" /* yes, branch */
- "movl %0,%%ecx\n\t" /* test for NULL char in string (see above) */
- "addl $0xfefefeff,%0\n\t" /* this means the two strings are equal */
- "jnc " LF(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jnz " LF(2) "\n\t" /* strings are equal, return 0 */
-
- "movl 4(%1),%0\n\t"
- "movl %0,%%ecx\n\t"
- "subl 4(%2,%1),%%ecx\n\t"
- "jnz " LF(31) "\n\t"
- "movl %0,%%ecx\n\t"
- "addl $0xfefefeff,%0\n\t"
- "jnc " LF(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jnz " LF(2) "\n\t"
-
- "movl 8(%1),%0\n\t"
- "movl %0,%%ecx\n\t"
- "subl 8(%2,%1),%%ecx\n\t"
- "jnz " LF(32) "\n\t"
- "movl %0,%%ecx\n\t"
- "addl $0xfefefeff,%0\n\t"
- "jnc " LF(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jnz " LF(2) "\n\t"
-
- "movl 12(%1),%0\n\t"
- "movl %0,%%ecx\n\t"
- "subl 12(%2,%1),%%ecx\n\t"
- "jnz " LF(33) "\n\t"
- "movl %0,%%ecx\n\t"
- "addl $0xfefefeff,%0\n\t"
- "jnc " LF(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jz " LB(1) "\n"
-
- LL(2) "\txorl %0,%0\n\t"
- "jmp " LF(4) "\n"
-
- LL(33) "\taddl $4,%1\n" /* correct counter */
- LL(32) "\taddl $4,%1\n"
- LL(31) "\taddl $4,%1\n"
-
- /* We now have to test what is the first differing byte. This
- * determines the return value. If two matching NULL chars are
- * found before the differing bytes we also have two equal
- * strings.
- *
- * Here %EAX contains the dword in question of s1. */
-
- LL(3) "\tmovb %%al,%%cl\n\t" /* save for later */
- "subb (%2,%1),%%al\n\t" /* bytes different ? */
- "jnz " LF(5) "\n\t" /* yes, expand difference value to 32 bits */
- "orb %%cl,%%cl\n\t" /* was end of strings ? */
- "jz " LB(2) "\n\t" /* yes, return 0 */
-
- "movb %%ah,%%al\n\t" /* second byte of s1 is still there */
- "subb 1(%2,%1),%%al\n\t"/* bytes different ? */
- "jnz " LF(5) "\n\t" /* yes, expand difference value to 32 bits */
- "orb %%ah,%%ah\n\t" /* was end of strings ? */
- "jz " LB(2) "\n\t" /* yes, return 0 */
-
- "movw 2(%1),%%ax\n\t" /* load next two bytes */
- "movb %%al,%%cl\n\t" /* save for later */
- "subb 2(%2,%1),%%al\n\t"/* is third byte different ? */
- "jnz " LF(5) "\n\t" /* yes, expand difference value to 32 bits */
- "orb %%cl,%%cl\n\t" /* was end of strings ? */
- "jz " LB(2) "\n\t" /* yes, return 0 */
-
- "subb 3(%2,%1),%%ah\n" /* make difference and expand */
-
- LL(5) "\tmovl $0,%0\n\t"
- "sbbl %0,%0\n\t"
- "orb $1,%%al\n"
-
- LL(4)
- :"=a" (__res):"S" (s1),"d" (s2):"cx","dx");
- return __res;
- }
-