home *** CD-ROM | disk | FTP | other *** search
-
- /********************************************
- split.c
- copyright 1991, Michael D. Brennan
-
- This is a source file for mawk, an implementation of
- the AWK programming language.
-
- Mawk is distributed without warranty under the terms of
- the GNU General Public License, version 2, 1991.
- ********************************************/
-
- /* $Log: split.c,v $
- * Revision 1.3 1996/02/01 04:39:42 mike
- * dynamic array scheme
- *
- * Revision 1.2 1993/07/15 01:55:03 mike
- * rm SIZE_T & indent
- *
- * Revision 1.1.1.1 1993/07/03 18:58:21 mike
- * move source to cvs
- *
- * Revision 5.4 1993/05/08 18:06:00 mike
- * null_split
- *
- * Revision 5.3 1993/01/01 21:30:48 mike
- * split new_STRING() into new_STRING and new_STRING0
- *
- * Revision 5.2 1992/07/08 21:19:09 brennan
- * patch2
- * change in split() requires that
- * bi_split() call load_array() even
- * when cnt is 0.
- *
- * Revision 5.1 1991/12/05 07:56:31 brennan
- * 1.1 pre-release
- *
- */
-
- /* split.c */
-
-
- /* For all splitting up to MAX_SPLIT fields go into
- split_buff[], the rest go onto split_ov_list ( split
- overflow list)
-
- We can split one of three ways:
- (1) By space:
- space_split() and space_ov_split()
- (2) By regular expression:
- re_split() and re_ov_split()
- (3) By "" (null -- split into characters)
- null_split() and null_ov_split()
- */
-
- #define TEMPBUFF_GOES_HERE
-
- #include "mawk.h"
- #include "symtype.h"
- #include "bi_vars.h"
- #include "bi_funct.h"
- #include "memory.h"
- #include "scan.h"
- #include "regexp.h"
- #include "field.h"
-
- SPLIT_OV *split_ov_list ;
-
- static int PROTO(re_ov_split, (char *, PTR)) ;
- static int PROTO(space_ov_split, (char *, char *)) ;
- static int PROTO(null_ov_split, (char *)) ;
-
- /* split string s of length slen on SPACE without changing s.
- load the pieces into STRINGS and ptrs into
- split_buff[]
- return the number of pieces */
-
- int
- space_split(s, slen)
- register char *s ;
- unsigned slen ;
- {
- char *back = s + slen ;
- int i = 0 ;
- int len ;
- char *q ;
- STRING *sval ;
- int lcnt = MAX_SPLIT / 3 ;
-
- #define EAT_SPACE() while ( scan_code[*(unsigned char*)s] ==\
- SC_SPACE ) s++
- #define EAT_NON_SPACE() \
- *back = ' ' ; /* sentinel */\
- while ( scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ;\
- *back = 0
-
-
- while (lcnt--)
- {
- EAT_SPACE() ;
- if (*s == 0) goto done ;
- /* mark the front with q */
- q = s++ ;
- EAT_NON_SPACE() ;
- sval = split_buff[i++] = new_STRING0(len = s - q) ;
- memcpy(sval->str, q, len) ;
-
- EAT_SPACE() ;
- if (*s == 0) goto done ;
- q = s++ ;
- EAT_NON_SPACE() ;
- sval = split_buff[i++] = new_STRING0(len = s - q) ;
- memcpy(sval->str, q, len) ;
-
- EAT_SPACE() ;
- if (*s == 0) goto done ;
- q = s++ ;
- EAT_NON_SPACE() ;
- sval = split_buff[i++] = new_STRING0(len = s - q) ;
- memcpy(sval->str, q, len) ;
-
- }
- /* we've overflowed */
- return i + space_ov_split(s, back) ;
-
- done:
- return i ;
- }
-
- static int
- space_ov_split(s, back)
- register char *s ;
- char *back ;
-
- {
- SPLIT_OV dummy ;
- register SPLIT_OV *tail = &dummy ;
- char *q ;
- int cnt = 0 ;
- unsigned len ;
-
- while (1)
- {
- EAT_SPACE() ;
- if (*s == 0) break ; /* done */
- q = s++ ;
- EAT_NON_SPACE() ;
-
- tail = tail->link = ZMALLOC(SPLIT_OV) ;
- tail->sval = new_STRING0(len = s - q) ;
- memcpy(tail->sval->str, q, len) ;
- cnt++ ;
- }
-
- tail->link = (SPLIT_OV *) 0 ;
- split_ov_list = dummy.link ;
- return cnt ;
- }
-
- /* match a string with a regular expression, but
- only matches of positive length count */
- char *
- re_pos_match(s, re, lenp)
- register char *s ;
- PTR re ; unsigned *lenp ;
- {
- while (s = REmatch(s, re, lenp))
- if (*lenp) return s ;
- else if (*s == 0) break ;
- else s++ ;
-
- return (char *) 0 ;
- }
-
- int
- re_split(s, re)
- char *s ;
- PTR re ;
- {
- register char *t ;
- int i = 0 ;
- unsigned mlen, len ;
- STRING *sval ;
- int lcnt = MAX_SPLIT / 3 ;
-
- while (lcnt--)
- {
- if (!(t = re_pos_match(s, re, &mlen))) goto done ;
- sval = split_buff[i++] = new_STRING0(len = t - s) ;
- memcpy(sval->str, s, len) ;
- s = t + mlen ;
-
- if (!(t = re_pos_match(s, re, &mlen))) goto done ;
- sval = split_buff[i++] = new_STRING0(len = t - s) ;
- memcpy(sval->str, s, len) ;
- s = t + mlen ;
-
- if (!(t = re_pos_match(s, re, &mlen))) goto done ;
- sval = split_buff[i++] = new_STRING0(len = t - s) ;
- memcpy(sval->str, s, len) ;
- s = t + mlen ;
- }
- /* we've overflowed */
- return i + re_ov_split(s, re) ;
-
- done:
- split_buff[i++] = new_STRING(s) ;
- return i ;
- }
-
- /*
- we've overflowed split_buff[] , put
- the rest on the split_ov_list
- return number of pieces
- */
-
- static int
- re_ov_split(s, re)
- char *s ;
- PTR re ;
- {
- SPLIT_OV dummy ;
- register SPLIT_OV *tail = &dummy ;
- int cnt = 1 ;
- char *t ;
- unsigned len, mlen ;
-
- while (t = re_pos_match(s, re, &mlen))
- {
- tail = tail->link = ZMALLOC(SPLIT_OV) ;
- tail->sval = new_STRING0(len = t - s) ;
- memcpy(tail->sval->str, s, len) ;
- s = t + mlen ;
- cnt++ ;
- }
- /* and one more */
- tail = tail->link = ZMALLOC(SPLIT_OV) ;
- tail->sval = new_STRING(s) ;
- tail->link = (SPLIT_OV *) 0 ;
- split_ov_list = dummy.link ;
-
- return cnt ;
- }
-
-
- int
- null_split(s)
- char *s ;
- {
- int cnt = 0 ; /* number of fields split */
- STRING *sval ;
- int i = 0 ; /* indexes split_buff[] */
-
- while (*s)
- {
- if (cnt == MAX_SPLIT) return cnt + null_ov_split(s) ;
-
- sval = new_STRING0(1) ;
- sval->str[0] = *s++ ;
- split_buff[i++] = sval ;
- cnt++ ;
- }
- return cnt ;
- }
-
- static int
- null_ov_split(s)
- char *s ;
- {
- SPLIT_OV dummy ;
- SPLIT_OV *ovp = &dummy ;
- int cnt = 0 ;
-
- while (*s)
- {
- ovp = ovp->link = ZMALLOC(SPLIT_OV) ;
- ovp->sval = new_STRING0(1) ;
- ovp->sval->str[0] = *s++ ;
- cnt++ ;
- }
- ovp->link = (SPLIT_OV *) 0 ;
- split_ov_list = dummy.link ;
- return cnt ;
- }
-
-
- /* split(s, X, r)
- split s into array X on r
-
- entry: sp[0] holds r
- sp[-1] pts at X
- sp[-2] holds s
- */
- CELL *
- bi_split(sp)
- register CELL *sp ;
- {
- int cnt ; /* the number of pieces */
-
-
- if (sp->type < C_RE) cast_for_split(sp) ;
- /* can be C_RE, C_SPACE or C_SNULL */
- sp -= 2 ;
- if (sp->type < C_STRING) cast1_to_s(sp) ;
-
- if (string(sp)->len == 0) /* nothing to split */
- cnt = 0 ;
- else
- switch ((sp + 2)->type)
- {
- case C_RE:
- cnt = re_split(string(sp)->str, (sp + 2)->ptr) ;
- break ;
-
- case C_SPACE:
- cnt = space_split(string(sp)->str, string(sp)->len) ;
- break ;
-
- case C_SNULL: /* split on empty string */
- cnt = null_split(string(sp)->str) ;
- break ;
-
- default:
- bozo("bad splitting cell in bi_split") ;
- }
-
-
- free_STRING(string(sp)) ;
- sp->type = C_DOUBLE ;
- sp->dval = (double) cnt ;
-
- array_load((ARRAY) (sp + 1)->ptr, cnt) ;
-
- return sp ;
- }
-