home *** CD-ROM | disk | FTP | other *** search
- /* strncmp (s1, s2, n) -- Compare no more than N characters of S1 and S2,
- returning less than, equal to or greater than zero
- if S1 is lexicographically less than, equal to or
- greater than S2.
- For Intel 80x86, x>=3.
- Copyright (C) 1994 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@ira.uka.de>
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If
- not, write to the Free Software Foundation, Inc., 675 Mass Ave,
- Cambridge, MA 02139, USA. */
-
- #include <string.h>
-
- #include "asm-ops.h"
-
- int
- strncmp(const char * s1, const char * s2, size_t n)
- {
- register int __res;
- __asm__(
- "cmpl %1,%2\n\t"
- "jae L1ls2\n\t"
-
- "cmpl ____brk_addr,%1\n\t"
- "jae L1gt2h\n\t"
-
- "orl %3,%3\n\t"
- "jz " LF(2) "\n\t"
- "testl $3,%1\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
- "decl %3\n\t"
- "jz " LF(2) "\n\t"
-
- "testl $3,%1\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
- "decl %3\n\t"
- "jz " LF(2) "\n\t"
-
- "testl $3,%1\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
- "decl %3\n\t"
- "jnz LFast\n\t"
-
- "xorl %0,%0\n\t"
- "jmp " LF(4) "\n"
-
- "L1gt2h:\n\t"
- "cmpl %%esp,%2\n\t"
- "jb LNorm\n\t"
- "jmp LFast\n"
-
- "L1ls2:\n\t"
- "cmpl ____brk_addr,%2\n\t"
- "jae L1ls2h\n\t"
-
- "orl %3,%3\n\t"
- "jz " LF(2) "\n\t"
- "testb $3,%2\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
- "decl %3\n\t"
- "jz " LF(2) "\n\t"
-
- "testb $3,%2\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
- "decl %3\n\t"
- "jz " LF(2) "\n\t"
-
- "testb $3,%2\n\t"
- "jz LFast\n\t"
- "movb (%1),%%al\n\t"
- "movb %%al,%%ah\n\t"
- "subb (%2),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
- "incl %2\n\t"
- "incl %1\n\t"
- "decl %3\n\t"
- "jnz LFast\n\t"
-
- "xorl %0,%0\n\t"
- "jmp " LF(4) "\n"
-
- "L1ls2h:\n\t"
- "cmpl %%esp,%1\n\t"
- "jae LFast\n"
-
- "LNorm:\n\t"
- "subl %1,%2\n\t" /* only one loop pointer by index adressing */
-
- ".align 4,0x90\n" /* align for better loop performance */
-
- LL(1) "\tcmpl $4,%3\n\t" /* run loop for more than 3 chars left */
- "jb " LF(3) "\n\t" /* no, than process rest (<=3) */
-
- "movb (%1),%%al\n\t" /* load actual character from S1 in %al */
- "movb (%2,%1),%%ah\n\t" /* load actual character from S2 in %ah */
- "subb %%ah,%%al\n\t" /* different ? */
- "jnz " LF(5) "\n\t" /* yes, branch */
- "testb %%ah,%%ah\n\t" /* NULL characters? */
- "jz " LF(2) "\n\t" /* yes, branch */
-
- "movb 1(%1),%%al\n\t"
- "movb 1(%2,%1),%%ah\n\t"
- "subb %%ah,%%al\n\t"
- "jnz " LF(5) "\n\t"
- "testb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
-
- "movb 2(%1),%%al\n\t"
- "movb 2(%2,%1),%%ah\n\t"
- "subb %%ah,%%al\n\t"
- "jnz " LF(5) "\n\t"
- "testb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
-
- "movb 3(%1),%%al\n\t"
- "movb 3(%2,%1),%%ah\n\t"
- "subb %%ah,%%al\n\t"
- "jnz " LF(5) "\n\t"
- "subl $4,%3\n\t" /* decrement string length counter */
- "addl $4,%1\n\t" /* increment loop pointer */
- "testb %%ah,%%ah\n\t"
- "jnz " LB(1) "\n\t"
-
- "xorl %0,%0\n\t"
- "jmp " LF(4) "\n" /* second string is empty -> return 0 */
-
- LL(3) "\torl %3,%3\n\t"
- "jz " LF(2) "\n\t" /* no more character left -> return 0 */
- "movb (%1),%%al\n\t" /* load actual character from S1 in %al */
- "movb (%2,%1),%%ah\n\t" /* load actual character from S2 in %ah */
- "subb %%ah,%%al\n\t" /* different ? */
- "jnz " LF(5) "\n\t" /* yes, branch */
- "testb %%ah,%%ah\n\t" /* NULL characters? */
- "jz " LF(2) "\n\t" /* yes, branch */
-
- "decl %3\n\t" /* decrement string length counter */
- "jz " LF(2) "\n\t" /* if zero, than return 0 (because %al==0) */
- "movb 1(%1),%%al\n\t"
- "movb 1(%2,%1),%%ah\n\t"
- "subb %%ah,%%al\n\t"
- "jnz " LF(5) "\n\t"
- "testb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
-
- "decl %3\n\t"
- "jz " LF(2) "\n\t"
- "movb 2(%1),%%al\n\t"
- "movb 2(%2,%1),%%ah\n\t"
- "subb %%ah,%%al\n\t"
- "jnz " LF(5) "\n\t"
- "testb %%ah,%%ah\n\t"
- "jz " LF(2) "\n\t"
-
- "decl %3\n\t"
- "jz " LF(2) "\n\t"
- "movb 3(%1),%%al\n\t"
- "subb 3(%2,%1),%%al\n\t"
- "jz " LF(2) "\n"
-
- "\tjmp " LF(5) "\n"
-
-
- /* Some optimizations:
- * 0. use as less jumps as possible (this is what has to be done
- * ALWAYS)
- * 1. four times unfolded loop
- * 2. loop pointer increment only at the end of the loop
- * 3. only one loop pointer by using index adressing
- * 4. use fast character match technique (see below)
- */
-
- "LFast:\tsubl %1,%2\n\t" /* only one loop pointer by index adressing */
-
- ALIGN "\n" /* align for better loop performance */
-
- /* Each loop round we compare 4 x 4 bytes. If the longwords
- are equal we have to test for the end of the string (NULL
- char). This is done by adding MAGIC_BITS to the LONGWORD.
- If this fails to change any of the hole bits of LONGWORD
- we have two equal strings.
-
- 1) Is this safe? Will it catch all the zero bytes?
- Suppose there is a byte with all zeros. Any carry bits
- propagating from its left will fall into the hole at its
- least significant bit and stop. Since there will be no
- carry from its most significant bit, the LSB of the
- byte to the left will be unchanged, and the zero will be
- detected.
-
- 2) Is this worthwhile? Will it ignore everything except
- zero bytes? Suppose every byte of LONGWORD has a bit set
- somewhere. There will be a carry into bit 8. If bit 8
- is set, this will carry into bit 16. If bit 8 is clear,
- one of bits 9-15 must be set, so there will be a carry
- into bit 16. Similarly, there will be a carry into bit
- 24. If one of bits 24-31 is set, there will be a carry
- into bit 32 (=carry flag), so all of the hole bits will
- be changed. */
-
- LL(1) "\tsubl $16,%3\n\t"
- "jc " LF(6) "\n\t"
-
- "movl (%1),%0\n\t" /* load actual character from S1 in %al */
- "movl %0,%%ecx\n\t" /* save it for later */
- "subl (%2,%1),%%ecx\n\t"/* is it different from second string ? */
- "jnz " LF(3) "\n\t" /* yes, branch */
- "movl %0,%%ecx\n\t" /* test for NULL char in string (see above) */
- "addl $0xfefefeff,%0\n\t" /* this means the two strings are equal */
- "jnc " LF(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jnz " LF(2) "\n\t" /* strings are equal, return 0 */
-
- "movl 4(%1),%0\n\t"
- "movl %0,%%ecx\n\t"
- "subl 4(%2,%1),%%ecx\n\t"
- "jnz " LF(31) "\n\t"
- "movl %0,%%ecx\n\t"
- "addl $0xfefefeff,%0\n\t"
- "jnc " LF(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jnz " LF(2) "\n\t"
-
- "movl 8(%1),%0\n\t"
- "movl %0,%%ecx\n\t"
- "subl 8(%2,%1),%%ecx\n\t"
- "jnz " LF(32) "\n\t"
- "movl %0,%%ecx\n\t"
- "addl $0xfefefeff,%0\n\t"
- "jnc " LF(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jnz " LF(2) "\n\t"
-
- "movl 12(%1),%0\n\t"
- "movl %0,%%ecx\n\t"
- "subl 12(%2,%1),%%ecx\n\t"
- "jnz " LF(33) "\n\t"
-
- "addl $16,%1\n\t" /* now we can increment to counter */
-
- "movl %0,%%ecx\n\t"
- "addl $0xfefefeff,%0\n\t"
- "jnc " LF(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jz " LB(1) "\n"
-
- LL(2) "\txorl %0,%0\n\t"
- "jmp " LF(4) "\n"
-
- /* Here comes again the code of the loop but there are no more
- * than 15 characters to matched. We must be prepared to compare
- * any number from 0 to 15.
- *
- * On entry %3 contains N-16, where N is the remaining number of
- * characters. */
-
- LL(6) "\taddl $12,%3\n\t" /* more than 3 characters left ? */
- "jnc " LF(7) "\n\t" /* no, branch */
- "movl (%1),%0\n\t"
- "movl %0,%%ecx\n\t"
- "subl (%2,%1),%%ecx\n\t"
- "jnz " LF(3) "\n\t"
- "movl %0,%%ecx\n\t"
- "addl $0xfefefeff,%0\n\t"
- "jnc " LB(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jnz " LB(2) "\n\t"
-
- "subl $4,%3\n\t" /* more than 7 characters left ? */
- "jc " LF(71) "\n\t" /* no, branch */
- "movl 4(%1),%0\n\t"
- "movl %0,%%ecx\n\t"
- "subl 4(%2,%1),%%ecx\n\t"
- "jnz " LF(31) "\n\t"
- "movl %0,%%ecx\n\t"
- "addl $0xfefefeff,%0\n\t"
- "jnc " LB(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jnz " LB(2) "\n\t"
-
- "subl $4,%3\n\t" /* more then 11 characters left ? */
- "jc " LF(72) "\n\t"
- "movl 8(%1),%0\n\t"
- "movl %0,%%ecx\n\t"
- "subl 8(%2,%1),%%ecx\n\t"
- "jnz " LF(32) "\n\t"
- "movl %0,%%ecx\n\t"
- "addl $0xfefefeff,%0\n\t"
- "jnc " LB(2) "\n\t"
- "xorl %%ecx,%0\n\t"
- "notl %0\n\t"
- "andl $0x01010100,%0\n\t"
- "jnz " LB(2) "\n\t"
-
- "subl $4,%3\n\t"
- "addl $4,%1\n" /* correct counters */
- LL(72) "\taddl $4,%1\n"
- LL(71) "\taddl $4,%1\n"
-
- LL(7) "\taddl $3,%3\n\t" /* now %3 contains left number of chars - 1 */
- "jnc " LB(2) "\n\t" /* all processed, than strings equal */
- "movb (%1),%%al\n\t"
- "movb %%al,%%cl\n\t"
- "subb (%2,%1),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%cl,%%cl\n\t"
- "jz " LB(2) "\n\t"
-
- "orl %3,%3\n\t" /* only one char left ? */
- "jz " LB(2) "\n\t" /* yes, branch */
- "movb 1(%1),%%al\n\t"
- "movb %%al,%%cl\n\t"
- "subb 1(%2,%1),%%al\n\t"
- "jnz " LF(5) "\n\t"
- "orb %%cl,%%cl\n\t"
- "jz " LB(2) "\n\t"
-
- "decl %3\n\t" /* only two left ? */
- "jz " LB(2) "\n\t" /* yes, branch */
- "movb 2(%1),%%al\n\t"
- "subb 2(%2,%1),%%al\n\t"/* the last difference is the result */
- "jz " LB(2) "\n\t" /* equal than return 0 */
- "jmp " LF(5) "\n"
-
- LL(33) "\taddl $4,%1\n" /* correct counter */
- LL(32) "\taddl $4,%1\n"
- LL(31) "\taddl $4,%1\n"
-
- /* We now have to test what is the first differing byte. This
- * determines the return value. If two matching NULL chars are
- * found before the differing bytes we also have two equal
- * strings.
- *
- * Here %EAX contains the dword in question of s1. */
-
- LL(3) "\tmovb %%al,%%cl\n\t" /* save for later */
- "subb (%2,%1),%%al\n\t" /* bytes different ? */
- "jnz " LF(5) "\n\t" /* yes, expand difference value to 32 bits */
- "orb %%cl,%%cl\n\t" /* was end of strings ? */
- "jz " LB(2) "\n\t" /* yes, return 0 */
-
- "movb %%ah,%%al\n\t" /* second byte of s1 is still there */
- "subb 1(%2,%1),%%al\n\t"/* bytes different ? */
- "jnz " LF(5) "\n\t" /* yes, expand difference value to 32 bits */
- "orb %%ah,%%ah\n\t" /* was end of strings ? */
- "jz " LB(2) "\n\t" /* yes, return 0 */
-
- "movw 2(%1),%%ax\n\t" /* load next two bytes */
- "movb %%al,%%cl\n\t" /* save for later */
- "subb 2(%2,%1),%%al\n\t"/* is third byte different ? */
- "jnz " LF(5) "\n\t" /* yes, expand difference value to 32 bits */
- "orb %%cl,%%cl\n\t" /* was end of strings ? */
- "jz " LB(2) "\n\t" /* yes, return 0 */
-
- "movb %%ah,%%al\n\t" /* the last two bytes must be different */
- "subb 3(%2,%1),%%al\n" /* make difference and expand */
-
- LL(5) "\tmovl $0,%0\n\t"
- "sbbl %0,%0\n\t"
- "orb $1,%%al\n"
-
- LL(4)
- :"=a" (__res):"S" (s1),"d" (s2),"D" (n):"cx","dx","si","di");
- return __res;
- }
-