home *** CD-ROM | disk | FTP | other *** search
- /*
- * datetok - date tokenisation
- */
-
- #include <stdio.h>
- #include <ctype.h>
- #include <string.h>
- #include <sys/types.h> /* for dateconv.h */
- #include "dateconv.h"
- #include "datetok.h"
-
- /* imports */
- int dtok_numparsed;
-
- /*
- * to keep this table reasonably small, we compact the lexval for TZ and DTZ
- * entries and truncate the text field at MAXTOKLEN characters.
- * the text field is not guaranteed to be NUL-terminated.
- * ST = Standard Time; DT = Daylight Time.
- */
- static datetkn datetktbl[] = {
- /* text token lexval */
- "acsst", DTZ, PACK(630), /* Cent. Australia */
- "acst", TZ, PACK(570), /* Cent. Australia */
- "adt", DTZ, PACK(-180), /* Atlantic DT */
- "aesst", DTZ, PACK(660), /* E. Australia */
- "aest", TZ, PACK(600), /* Australia Eastern ST */
- "akdt", DTZ, PACK(-480), /* Alaska DT */
- "akst", TZ, PACK(-540), /* Alaska ST */
- "am", AMPM, AM,
- "apr", MONTH, 4,
- "april", MONTH, 4,
- "ast", TZ, PACK(-240), /* Atlantic ST (Canada) */
- "at", IGNORE, 0, /* "at" (throwaway) */
- "aug", MONTH, 8,
- "august", MONTH, 8,
- "awst", TZ, PACK(480), /* W. Australia */
- "bst", DTZ, PACK(60), /* British Summer Time */
- "cadt", DTZ, PACK(630), /* Central Australian DT */
- "cast", TZ, PACK(570), /* Central Australian ST */
- "cat", TZ, PACK(-600), /* Central Alaska Time */
- "cct", TZ, PACK(480), /* China Coast */
- "cdt", DTZ, PACK(-300), /* Central DT */
- "cest", DTZ, PACK(120), /* Central Europe Summer Time */
- "cet", TZ, PACK(60), /* Central European Time */
- "cetdst", DTZ, PACK(120), /* Central European DT */
- "cst", TZ, PACK(-360), /* Central ST */
- "dec", MONTH, 12,
- "decemb", MONTH, 12,
- "dnt", TZ, PACK(60), /* Dansk Normal Tid */
- /*XX*/ "dst", IGNORE, 0,
- "eadt", DTZ, PACK(660), /* East Australian DT */
- "east", TZ, PACK(600), /* East Australian ST */
- "edt", DTZ, PACK(-240), /* Eastern DT */
- "eest", DTZ, PACK(180), /* Eastern Europe Summer */
- "eet", TZ, PACK(120), /* Eastern Europe */
- "eetdst", DTZ, PACK(180), /* Eastern Europe */
- "est", TZ, PACK(-300), /* Eastern ST */
- "feb", MONTH, 2,
- "februa", MONTH, 2,
- "fri", IGNORE, 5,
- "friday", IGNORE, 5,
- "fst", DTZ, PACK(120), /* French Summer Time */
- "fwt", TZ, PACK(60), /* French Winter Time */
- "gmt", TZ, PACK(0), /* Greenwich Mean Time */
- "gst", TZ, PACK(600), /* Guam ST */
- "hadt", DTZ, PACK(-540), /* Hawaii-Aleutian DT */
- "hast", TZ, PACK(-600), /* Hawaii-Aleutian ST */
- "hkt", TZ, PACK(480), /* Hong Kong Time */
- "hst", TZ, PACK(-600), /* Hawaii ST */
- "idle", TZ, PACK(720), /* Intl. Date Line, East */
- "idlw", TZ, PACK(-720), /* Intl. Date Line, West */
- "idt", DTZ, PACK(180), /* Israel DT */
- "ist", TZ, PACK(120), /* Israel */
- "jan", MONTH, 1,
- "januar", MONTH, 1,
- "jst", TZ, PACK(540), /* Japan ST */
- "jul", MONTH, 7,
- "july", MONTH, 7,
- "jun", MONTH, 6,
- "june", MONTH, 6,
- "kdt", DTZ, PACK(600), /* Korea DT */
- "kst", TZ, PACK(540), /* Korea ST */
- /*XX*/ "ligt", TZ, PACK(600), /* From Melbourne, Australia */
- "mar", MONTH, 3,
- "march", MONTH, 3,
- "may", MONTH, 5,
- "mdt", DTZ, PACK(-360), /* Mountain DT */
- "mest", DTZ, PACK(120), /* Middle Europe Summer Time */
- "mesz", DTZ, PACK(120), /* Mittel-Europaeische Sommerzeit */
- "met", TZ, PACK(60), /* Middle Europe Time */
- "metdst", DTZ, PACK(120), /* Middle Europe DT */
- "mewt", TZ, PACK(60), /* Middle Europe Winter Time */
- "mez", TZ, PACK(60), /* Mittel-Europaeische Zeit */
- "mon", IGNORE, 1,
- "monday", IGNORE, 1,
- "mst", TZ, PACK(-420), /* Mountain ST */
- "ndt", DTZ, PACK(-150), /* Nfld. DT */
- /*XXN*/ "nft", TZ, PACK(-210), /* Newfoundland ST */
- /*XX*/ "nor", TZ, PACK(60), /* Norway ST */
- "nov", MONTH, 11,
- "novemb", MONTH, 11,
- "nst", TZ, PACK(-210), /* Nfld. ST */
- "nzdt", DTZ, PACK(780), /* New Zealand DT */
- "nzst", TZ, PACK(720), /* New Zealand ST */
- "nzt", TZ, PACK(720), /* New Zealand Time */
- "oct", MONTH, 10,
- "octobe", MONTH, 10,
- "on", IGNORE, 0, /* "on" (throwaway) */
- "pdt", DTZ, PACK(-420), /* Pacific DT */
- "pm", AMPM, PM,
- "pst", TZ, PACK(-480), /* Pacific ST */
- "sadt", DTZ, PACK(630), /* S. Australian DT */
- "sast", TZ, PACK(570), /* South Australian ST */
- "sat", IGNORE, 6,
- "saturd", IGNORE, 6,
- "sep", MONTH, 9,
- "sept", MONTH, 9,
- "septem", MONTH, 9,
- "sst", DTZ, PACK(120), /* Swedish Summer Time */
- "sun", IGNORE, 0,
- "sunday", IGNORE, 0,
- "swt", TZ, PACK(60), /* Swedish Winter Time */
- "thu", IGNORE, 4,
- "thur", IGNORE, 4,
- "thurs", IGNORE, 4,
- "thursd", IGNORE, 4,
- "tue", IGNORE, 2,
- "tues", IGNORE, 2,
- "tuesda", IGNORE, 2,
- "ut", TZ, PACK(0),
- "utc", TZ, PACK(0),
- "wast", TZ, PACK(480), /* West Australian ST */
- "wat", TZ, PACK(-60), /* West Africa Time */
- "wed", IGNORE, 3,
- "wednes", IGNORE, 3,
- "weds", IGNORE, 3,
- "west", DTZ, PACK(60), /* Western Europe Summer */
- "wet", TZ, PACK(0), /* Western Europe */
- "wetdst", DTZ, PACK(60), /* Western Europe */
- "wst", TZ, PACK(480), /* West Australian ST */
- "ydt", DTZ, PACK(-480), /* Yukon DT */
- "yst", TZ, PACK(-540), /* Yukon ST */
- "zp4", TZ, PACK(-240), /* GMT +4 hours. */
- "zp5", TZ, PACK(-300), /* GMT +5 hours. */
- "zp6", TZ, PACK(-360), /* GMT +6 hours. */
- };
-
- #if 0
- /*
- * these time zones are orphans, i.e. the name is also used by a more
- * likely-to-appear time zone
- */
- "adt", DTZ, PACK(0), /* Azores DT */
- "adt", DTZ, PACK(-240), /* Acre DT */
- "ast", TZ, PACK(-60), /* Azores ST */
- "ast", TZ, PACK(-300), /* Acre ST */
- "bst", TZ, PACK(-180), /* Brazil ST */
- "cdt", DTZ, PACK(-180), /* Chile DT */
- "cdt", DTZ, PACK(-240), /* Cuba DT */
- "cdt", DTZ, PACK(540), /* China DT */
- "cst", TZ, PACK(-240), /* Chile ST */
- "cst", TZ, PACK(-300), /* Cuba ST */
- "cst", TZ, PACK(480), /* China ST */
- "edt", DTZ, PACK(-300), /* Easter Island DT */
- "edt", DTZ, PACK(-120), /* East Brazil DT */
- "edt", DTZ, PACK(660), /* Australian Eastern DT */
- "est", TZ, PACK(-360), /* Easter Island ST */
- "est", TZ, PACK(-180), /* East Brazil ST */
- "est", TZ, PACK(600), /* Australian Eastern ST */
- "fdt", DTZ, PACK(-60), /* Fernando de Noronha DT */
- "fst", TZ, PACK(-120), /* Fernando de Noronha ST */
- "ist", TZ, PACK(330), /* Indian ST */
- "sst", TZ, PACK(-660), /* Samoa ST */
- "sst", TZ, PACK(480), /* Singapore ST */
- "wdt", DTZ, PACK(-180), /* Western Brazil DT */
- "wet", TZ, PACK(60), /* Western European Time */
- "wst", TZ, PACK(-240), /* Western Brazil ST */
- /* military timezones are deprecated by RFC 1123 section 5.2.14 */
- "a", TZ, PACK(60), /* UTC+1h */
- "b", TZ, PACK(120), /* UTC+2h */
- "c", TZ, PACK(180), /* UTC+3h */
- "d", TZ, PACK(240), /* UTC+4h */
- "e", TZ, PACK(300), /* UTC+5h */
- "f", TZ, PACK(360), /* UTC+6h */
- "g", TZ, PACK(420), /* UTC+7h */
- "h", TZ, PACK(480), /* UTC+8h */
- "i", TZ, PACK(540), /* UTC+9h */
- "k", TZ, PACK(600), /* UTC+10h */
- "l", TZ, PACK(660), /* UTC+11h */
- "m", TZ, PACK(720), /* UTC+12h */
- "n", TZ, PACK(-60), /* UTC-1h */
- "o", TZ, PACK(-120), /* UTC-2h */
- "p", TZ, PACK(-180), /* UTC-3h */
- "q", TZ, PACK(-240), /* UTC-4h */
- "r", TZ, PACK(-300), /* UTC-5h */
- "s", TZ, PACK(-360), /* UTC-6h */
- "t", TZ, PACK(-420), /* UTC-7h */
- "u", TZ, PACK(-480), /* UTC-8h */
- "v", TZ, PACK(-540), /* UTC-9h */
- "w", TZ, PACK(-600), /* UTC-10h */
- "x", TZ, PACK(-660), /* UTC-11h */
- "y", TZ, PACK(-720), /* UTC-12h */
- "z", TZ, PACK(0), /* UTC */
- #endif
-
- static unsigned int szdatetktbl = sizeof datetktbl / sizeof datetktbl[0];
-
- datetkn *
- datetoktype(s, bigvalp)
- char *s;
- int *bigvalp;
- {
- register char *cp = s;
- register char c = *cp;
- static datetkn t;
- register datetkn *tp = &t;
-
- if (isascii(c) && isdigit(c)) {
- register int len = strlen(cp);
-
- if (len > 3 && (cp[1] == ':' || cp[2] == ':'))
- tp->type = TIME;
- else {
- if (bigvalp != NULL)
- /* won't fit in tp->value */
- *bigvalp = atoi(cp);
- if (len == 4)
- tp->type = YEAR;
- else if (++dtok_numparsed == 1)
- tp->type = DAY;
- else
- tp->type = YEAR;
- }
- } else if (c == '-' || c == '+') {
- register int val = atoi(cp + 1);
- register int hr = val / 100;
- register int min = val % 100;
-
- val = hr*60 + min;
- if (c == '-')
- val = -val;
- tp->type = TZ;
- TOVAL(tp, val);
- } else {
- char lowtoken[TOKMAXLEN+1];
- register char *ltp = lowtoken, *endltp = lowtoken+TOKMAXLEN;
-
- /* copy to lowtoken to avoid modifying s */
- while ((c = *cp++) != '\0' && ltp < endltp)
- *ltp++ = (isascii(c) && isupper(c)? tolower(c): c);
- *ltp = '\0';
- tp = datebsearch(lowtoken, datetktbl, szdatetktbl);
- if (tp == NULL) {
- tp = &t;
- tp->type = IGNORE;
- }
- }
- return tp;
- }
-
- /*
- * Binary search -- from Knuth (6.2.1) Algorithm B. Special case like this
- * is WAY faster than the generic bsearch().
- */
- datetkn *
- datebsearch(key, base, nel)
- register char *key;
- register datetkn *base;
- unsigned int nel;
- {
- register datetkn *last = base + nel - 1, *position;
- register int result;
-
- while (last >= base) {
- position = base + ((last - base) >> 1);
- result = key[0] - position->token[0];
- if (result == 0) {
- result = strncmp(key, position->token, TOKMAXLEN);
- if (result == 0)
- return position;
- }
- if (result < 0)
- last = position - 1;
- else
- base = position + 1;
- }
- return 0;
- }
-