Team Palmtops 7

home *** CD-ROM | disk | FTP | other *** search

/ Team Palmtops 7 / Palmtops_numero07.iso / Epoc / Palmtime / files / FrotzS5_src.ZIP / TEXT.CPP < prev next >

Wrap

C/C++ Source or Header | 1997-10-15 | 21.9 KB | 1,081 lines

/* * text.c * * Text manipulation functions * */ #include "frotz.h" #include "frotzs5.h" #include "s5frotz.h" #include "s5api.h" enum string_type { LOW_STRING, ABBREVIATION, HIGH_STRING, EMBEDDED_STRING, VOCABULARY }; extern zword object_name (struct sg *g, zword); const zchar zscii_to_latin1[] = { 0xe4, 0xf6, 0xfc, 0xc4, 0xd6, 0xdc, 0xdf, 0xab, 0xbb, 0xeb, 0xef, 0xff, 0xcb, 0xcf, 0xe1, 0xe9, 0xed, 0xf3, 0xfa, 0xfd, 0xc1, 0xc9, 0xcd, 0xd3, 0xda, 0xdd, 0xe0, 0xe8, 0xec, 0xf2, 0xf9, 0xc0, 0xc8, 0xcc, 0xd2, 0xd9, 0xe2, 0xea, 0xee, 0xf4, 0xfb, 0xc2, 0xca, 0xce, 0xd4, 0xdb, 0xe5, 0xc5, 0xf8, 0xd8, 0xe3, 0xf1, 0xf5, 0xc3, 0xd1, 0xd5, 0xe6, 0xc6, 0xe7, 0xc7, 0xfe, 0xf0, 0xde, 0xd0, 0xa3, 0x00, 0x00, 0xa1, 0xbf }; /* * translate_from_zscii * * Map a ZSCII character onto the ISO Latin-1 alphabet. * */ short translate_from_zscii (struct sg *g, short c) { if (c == 0xfc) return ZC_MENU_CLICK; if (c == 0xfd) return ZC_DOUBLE_CLICK; if (c == 0xfe) return ZC_SINGLE_CLICK; if (c >= 0x9b && g->story_id != BEYOND_ZORK) if (g->hx_unicode_table != 0) { /* game has its own Unicode table */ zbyte N; LOW_BYTE (g->hx_unicode_table, N) if (c - 0x9b < N) { zword addr = g->hx_unicode_table + 1 + 2 * (c - 0x9b); zword unicode; LOW_WORD (addr, unicode) return (unicode < 0x100) ? (zchar) unicode : '?'; } else return '?'; } else /* game uses standard set */ if (c <= 0xdf) { if (c == 0xdc || c == 0xdd) /* Oe and oe ligatures */ return '?'; /* are not ISO-Latin 1 */ return zscii_to_latin1[c - 0x9b]; } else return '?'; return c; }/* translate_from_zscii */ /* * translate_to_zscii * * Map an ISO Latin-1 character onto the ZSCII alphabet. * */ short translate_to_zscii (struct sg *g, short c) { short i; if (c == ZC_SINGLE_CLICK) return 0xfe; if (c == ZC_DOUBLE_CLICK) return 0xfd; if (c == ZC_MENU_CLICK) return 0xfc; if (c >= ZC_LATIN1_MIN) if (g->hx_unicode_table != 0) { /* game has its own Unicode table */ zbyte N; short i; LOW_BYTE (g->hx_unicode_table, N) for (i = 0x9b; i < 0x9b + N; i++) { zword addr = g->hx_unicode_table + 1 + 2 * (i - 0x9b); zword unicode; LOW_WORD (addr, unicode) if (c == unicode) return (zbyte) i; } return '?'; } else { /* game uses standard set */ for (i = 0x9b; i <= 0xdf; i++) if (c == zscii_to_latin1[i - 0x9b]) return (zbyte) i; return '?'; } return c; }/* translate_to_zscii */ /* * alphabet * * Return a character from one of the three character sets. * */ zchar alphabet (struct sg *g, short set, short index) { if (g->h_alphabet != 0) { /* game uses its own alphabet */ zbyte c; zword addr = g->h_alphabet + 26 * set + index; LOW_BYTE (addr, c) return (unsigned char)translate_from_zscii (g, c); } else /* game uses default alphabet */ if (set == 0) return 'a' + index; else if (set == 1) return 'A' + index; else if (g->h_version == V1) return " 0123456789.,!?_#'\"/\\<-:()"[index]; else return " ^0123456789.,!?_#'\"/\\-:()"[index]; }/* alphabet */ /* * load_string * * Copy a ZSCII string from the memory to the global "decoded" string. * */ void load_string (struct sg *g, zword addr, zword length) { short resolution = (g->h_version <= V3) ? 2 : 3; short i = 0; while (i < 3 * resolution) if (i < length) { zbyte c; LOW_BYTE (addr, c) addr++; g->decoded[i++] = (unsigned char)translate_from_zscii (g, c); } else g->decoded[i++] = 0; }/* load_string */ /* * encode_text * * Encode the Unicode text in the global "decoded" string then write * the result to the global "encoded" array. (This is used to look up * words in the dictionary.) Up to V3 the vocabulary resolution is * two, since V4 it is three words. Because each word contains three * Z-characters, that makes six or nine Z-characters respectively. * Longer words are chopped to the proper size, shorter words are are * padded out with 5's. For word completion we pad with 0s and 31s, * the minimum and maximum Z-characters. * */ void encode_text (struct sg *g, short padding) { const zchar again[] = { 'a', 'g', 'a', 'i', 'n', 0 }; const zchar examine[] = { 'e', 'x', 'a', 'm', 'i', 'n', 'e', 0 }; const zchar wait[] = { 'w', 'a', 'i', 't', 0 }; zbyte zchars[12]; const zchar *ptr = g->decoded; zchar c; short resolution = (g->h_version <= V3) ? 2 : 3; short i = 0; /* Expand abbreviations that some old Infocom games lack */ if (g->option_expand_abbreviations) if (padding == 0x05 && g->decoded[1] == 0) switch (g->decoded[0]) { case 'g': ptr = again; break; case 'x': ptr = examine; break; case 'z': ptr = wait; break; } /* Translate string to a sequence of Z-characters */ while (i < 3 * resolution) if ((c = *ptr++) != 0) { short index, set; zbyte c2; /* Search character in the alphabet */ for (set = 0; set < 3; set++) for (index = 0; index < 26; index++) if (c == alphabet (g, set, index)) goto letter_found; /* Character not found, store its ZSCII value */ c2 = (unsigned char)translate_to_zscii (g, c); zchars[i++] = 5; zchars[i++] = 6; zchars[i++] = c2 >> 5; zchars[i++] = c2 & 0x1f; continue; letter_found: /* Character found, store its index */ if (set != 0) zchars[i++] = ((g->h_version <= V2) ? 1 : 3) + set; zchars[i++] = index + 6; } else zchars[i++] = (unsigned char)padding; /* Three Z-characters make a 16bit word */ for (i = 0; i < resolution; i++) g->encoded[i] = (zchars[3 * i + 0] << 10) | (zchars[3 * i + 1] << 5) | (zchars[3 * i + 2]); g->encoded[resolution - 1] |= 0x8000; }/* encode_text */ /* * z_check_unicode, test if a unicode character can be read and printed. * * zargs[0] = Unicode * */ void z_check_unicode (struct sg *g) { zword c = g->zargs[0]; if (c >= 0x20 && c <= 0x7e) store (g, 3); else if (c == 0xa0) store (g, 1); else if (c >= 0xa1 && c <= 0xff) store (g, 3); else store (g, 0); }/* z_check_unicode */ /* * z_encode_text, encode a ZSCII string for use in a dictionary. * * zargs[0] = address of text buffer * zargs[1] = length of ASCII string * zargs[2] = offset of ASCII string within the text buffer * zargs[3] = address to store encoded text in * * This is a V5+ opcode and therefore the dictionary resolution must be * three 16bit words. * */ void z_encode_text (struct sg *g) { short i; load_string (g, (zword) (g->zargs[0] + g->zargs[2]), g->zargs[1]); encode_text (g, 0x05); for (i = 0; i < 3; i++) storew (g, (zword) (g->zargs[3] + 2 * i), g->encoded[i]); }/* z_encode_text */ /* * decode_text * * Convert encoded text to Unicode. The encoded text consists of 16bit * words. Every word holds 3 Z-characters (5 bits each) plus a spare * bit to mark the last word. The Z-characters translate to ZSCII by * looking at the current current character set. Some select another * character set, others refer to abbreviations. * * There are several different string types: * * LOW_STRING - from the lower 64KB (byte address) * ABBREVIATION - from the abbreviations table (word address) * HIGH_STRING - from the end of the memory map (packed address) * EMBEDDED_STRING - from the instruction stream (at PC) * VOCABULARY - from the dictionary (byte address) * * The last type is only used for word completion. * */ #define outchar(c) if (st==VOCABULARY) *ptr++=c; else print_char(g, c) void decode_text (struct sg *g, enum string_type st, zword addr) { zchar *ptr; long byte_addr; zchar c2; zword code; zbyte c, prev_c = 0; short shift_state = 0; short shift_lock = 0; short status = 0; /* Calculate the byte address if necessary */ if (st == ABBREVIATION) byte_addr = (long) addr << 1; else if (st == HIGH_STRING) { if (g->h_version <= V3) byte_addr = (long) addr << 1; else if (g->h_version <= V5) byte_addr = (long) addr << 2; else if (g->h_version <= V7) byte_addr = ((long) addr << 2) + ((long) g->h_strings_offset << 3); else /* h_version == V8 */ byte_addr = (long) addr << 3; if (byte_addr >= g->story_size) runtime_error (g, "Print at illegal address"); } /* Loop until a 16bit word has the highest bit set */ if (st == VOCABULARY) ptr = g->decoded; do { short i; /* Fetch the next 16bit word */ if (st == LOW_STRING || st == VOCABULARY) { LOW_WORD (addr, code) addr += 2; } else if (st == HIGH_STRING || st == ABBREVIATION) { HIGH_WORD (byte_addr, code) byte_addr += 2; } else CODE_WORD (code) /* Read its three Z-characters */ for (i = 10; i >= 0; i -= 5) { zword abbr_addr; zword ptr_addr; c = (code >> i) & 0x1f; switch (status) { case 0: /* normal operation */ if (shift_state == 2 && c == 6) status = 2; else if (g->h_version == V1 && c == 1) new_line (g); else if (g->h_version >= V2 && shift_state == 2 && c == 7) new_line (g); else if (c >= 6) outchar (alphabet (g, shift_state, c - 6)); else if (c == 0) outchar (' '); else if (g->h_version >= V2 && c == 1) status = 1; else if (g->h_version >= V3 && c <= 3) status = 1; else { shift_state = (shift_lock + (c & 1) + 1) % 3; if (g->h_version <= V2 && c >= 4) shift_lock = shift_state; break; } shift_state = shift_lock; break; case 1: /* abbreviation */ ptr_addr = g->h_abbreviations + 64 * (prev_c - 1) + 2 * c; LOW_WORD (ptr_addr, abbr_addr) decode_text (g, ABBREVIATION, abbr_addr); status = 0; break; case 2: /* ZSCII character - first part */ status = 3; break; case 3: /* ZSCII character - second part */ c2 = (unsigned char)translate_from_zscii (g, (prev_c << 5) | c); outchar (c2); status = 0; break; } prev_c = c; } } while (!(code & 0x8000)); if (st == VOCABULARY) *ptr = 0; }/* decode_text */ #undef outchar /* * z_new_line, print a new line. * * no zargs used * */ void z_new_line (struct sg *g) { new_line (g); }/* z_new_line */ /* * z_print, print a string embedded in the instruction stream. * * no zargs used * */ void z_print (struct sg *g) { decode_text (g, EMBEDDED_STRING, 0); }/* z_print */ /* * z_print_addr, print a string from the lower 64KB. * * zargs[0] = address of string to print * */ void z_print_addr (struct sg *g) { decode_text (g, LOW_STRING, g->zargs[0]); }/* z_print_addr */ /* * z_print_char print a single ZSCII character. * * zargs[0] = ZSCII character to be printed * */ void z_print_char (struct sg *g) { print_char (g, (unsigned char)translate_from_zscii (g, (unsigned char)g->zargs[0])); }/* z_print_char */ /* * z_print_form, print a formatted table. * * zargs[0] = address of formatted table to be printed * */ void z_print_form (struct sg *g) { zword count; zword addr = g->zargs[0]; short first = TRUE; for (;;) { LOW_WORD (addr, count) addr += 2; if (count == 0) break; if (!first) new_line (g); while (count--) { zbyte c; LOW_BYTE (addr, c) addr++; print_char (g, (unsigned char)translate_from_zscii (g, c)); } first = FALSE; } }/* z_print_form */ /* * print_num * * Print a signed 16bit number. * */ void print_num (struct sg *g, zword value) { short i; /* Print sign */ if ((short) value < 0) { print_char (g, '-'); value = - (short) value; } /* Print absolute value */ for (i = 10000; i != 0; i /= 10) if (value >= i || i == 1) print_char (g, '0' + (value / i) % 10); }/* print_num */ /* * z_print_num, print a signed number. * * zargs[0] = number to print * */ void z_print_num (struct sg *g) { print_num (g, g->zargs[0]); }/* z_print_num */ /* * print_object * * Print an object description. * */ void print_object (struct sg *g, zword object) { zword addr = object_name (g, object); zword code = 0x94a5; zbyte length; LOW_BYTE (addr, length) addr++; if (length != 0) LOW_WORD (addr, code) if (code == 0x94a5) { /* encoded text 0x94a5 == empty string */ print_string (g, "object#"); /* supply a generic name */ print_num (g, object); /* for anonymous objects */ } else decode_text (g, LOW_STRING, addr); }/* print_object */ /* * z_print_obj, print an object description. * * zargs[0] = number of object to be printed * */ void z_print_obj (struct sg *g) { print_object (g, g->zargs[0]); }/* z_print_obj */ /* * z_print_paddr, print the string at the given packed address. * * zargs[0] = packed address of string to be printed * */ void z_print_paddr (struct sg *g) { decode_text (g, HIGH_STRING, g->zargs[0]); }/* z_print_paddr */ /* * z_print_ret, print the string at PC, print newline then return true. * * no zargs used * */ void z_print_ret (struct sg *g) { decode_text (g, EMBEDDED_STRING, 0); new_line (g); ret (g, 1); }/* z_print_ret */ /* * print_string * * Print a string of ASCII characters. * */ void print_string (struct sg *g, const char *s) { char c; while ((c = *s++) != 0) if (c == '\n') new_line (g); else print_char (g, c); }/* print_string */ /* * z_print_unicode * * zargs[0] = Unicode * */ void z_print_unicode (struct sg *g) { print_char (g, (unsigned char)((g->zargs[0] <= 0xff) ? g->zargs[0] : '?')); }/* z_print_unicode */ /* * lookup_text * * Scan a dictionary searching for the given word. The first argument * can be * * 0x00 - find the first word which is >= the given one * 0x05 - find the word which exactly matches the given one * 0x1f - find the last word which is <= the given one * * The return value is 0 if the search fails. * */ zword lookup_text (struct sg *g, short padding, zword dct) { zword entry_addr; zword entry_count; zword entry; zword addr; zbyte entry_len; zbyte sep_count; short resolution = (g->h_version <= V3) ? 2 : 3; short entry_number; short lower, upper; short i; short sorted; encode_text (g, padding); LOW_BYTE (dct, sep_count) /* skip word separators */ dct += 1 + sep_count; LOW_BYTE (dct, entry_len) /* get length of entries */ dct += 1; LOW_WORD (dct, entry_count) /* get number of entries */ dct += 2; if ((short) entry_count < 0) { /* bad luck, entries aren't sorted */ entry_count = - (short) entry_count; sorted = FALSE; } else sorted = TRUE; /* entries are sorted */ lower = 0; upper = entry_count - 1; while (lower <= upper) { if (sorted) /* binary search */ entry_number = (lower + upper) / 2; else /* linear search */ entry_number = lower; entry_addr = dct + entry_number * entry_len; /* Compare word to dictionary entry */ addr = entry_addr; for (i = 0; i < resolution; i++) { LOW_WORD (addr, entry) if (g->encoded[i] != entry) goto continuing; addr += 2; } return entry_addr; /* exact match found, return now */ continuing: if (sorted) /* binary search */ if (g->encoded[i] > entry) lower = entry_number + 1; else upper = entry_number - 1; else lower++; /* linear search */ } /* No exact match has been found */ if (padding == 0x05) return 0; entry_number = (padding == 0x00) ? lower : upper; if (entry_number == -1 || entry_number == entry_count) return 0; return dct + entry_number * entry_len; }/* lookup_text */ /* * tokenise_text * * Translate a single word to a token and append it to the token * buffer. Every token consists of the address of the dictionary * entry, the length of the word and the offset of the word from * the start of the text buffer. Unknown words cause empty slots * if the flag is set (such that the text can be scanned several * times with different dictionaries); otherwise they are zero. * */ void tokenise_text (struct sg *g, zword text, zword length, zword from, zword parse, zword dct, short flag) { zword addr; zbyte token_max, token_count; LOW_BYTE (parse, token_max) parse++; LOW_BYTE (parse, token_count) if (token_count < token_max) { /* sufficient space left for token? */ storeb (g, parse++, token_count + 1); load_string (g, (zword) (text + from), length); addr = lookup_text (g, 0x05, dct); if (addr != 0 || !flag) { parse += 4 * token_count; storew (g, (zword) (parse + 0), addr); storeb (g, (zword) (parse + 2), (unsigned char)length); storeb (g, (zword) (parse + 3), (unsigned char)from); } } }/* tokenise_text */ /* * tokenise_line * * Split an input line into words and translate the words to tokens. * */ void tokenise_line (struct sg *g, zword text, zword token, zword dct, short flag) { zword addr1; zword addr2; zbyte length; zbyte c; /* Use standard dictionary if the given dictionary is zero */ if (dct == 0) dct = g->h_dictionary; /* Remove all tokens before inserting new ones */ storeb (g, (zword) (token + 1), 0); /* Move the first pointer across the text buffer searching for the beginning of a word. If this succeeds, store the position in a second pointer. Move the first pointer searching for the end of the word. When it is found, "tokenise" the word. Continue until the end of the buffer is reached. */ addr1 = text; addr2 = 0; if (g->h_version >= V5) { addr1++; LOW_BYTE (addr1, length) } do { zword sep_addr; zbyte sep_count; zbyte separator; /* Fetch next ZSCII character */ addr1++; if (g->h_version >= V5 && addr1 == text + 2 + length) c = 0; else LOW_BYTE (addr1, c) /* Check for separator */ sep_addr = dct; LOW_BYTE (sep_addr, sep_count) sep_addr++; do { LOW_BYTE (sep_addr, separator) sep_addr++; } while (c != separator && --sep_count != 0); /* This could be the start or the end of a word */ if (sep_count == 0 && c != ' ' && c != 0) { if (addr2 == 0) addr2 = addr1; } else if (addr2 != 0) { tokenise_text (g, text, (zword) (addr1 - addr2), (zword) (addr2 - text), token, dct, flag ); addr2 = 0; } /* Translate separator (which is a word in its own right) */ if (sep_count != 0) tokenise_text (g, text, (zword) (1), (zword) (addr1 - text), token, dct, flag ); } while (c != 0); }/* tokenise_line */ /* * z_tokenise, make a lexical analysis of a ZSCII string. * * zargs[0] = address of string to analyze * zargs[1] = address of token buffer * zargs[2] = address of dictionary (optional) * zargs[3] = set when unknown words cause empty slots (optional) * */ void z_tokenise (struct sg *g) { /* Supply default arguments */ if (g->zargc < 3) g->zargs[2] = 0; if (g->zargc < 4) g->zargs[3] = 0; /* Call tokenise_line to do the real work */ tokenise_line (g, g->zargs[0], g->zargs[1], g->zargs[2], g->zargs[3] != 0); }/* z_tokenise */ /* * completion * * Scan the vocabulary to complete the last word on the input line * (similar to "tcsh" under Unix). The return value is * * 2 ==> completion is impossible * 1 ==> completion is ambiguous * 0 ==> completion is successful * * The function also returns a string in its second argument. In case * of 2, the string is empty; in case of 1, the string is the longest * extension of the last word on the input line that is common to all * possible completions (for instance, if the last word on the input * is "fo" and its only possible completions are "follow" and "folly" * then the string is "ll"); in case of 0, the string is an extension * to the last word that results in the only possible completion. * */ short completion (struct sg *g, const zchar *buffer, zchar *result) { zword minaddr; zword maxaddr; zchar *ptr; zchar c; short len; short i; *result = 0; /* Copy last word to "decoded" string */ len = 0; while ((c = *buffer++) != 0) if (c != ' ') { if (len < 9) g->decoded[len++] = c; } else len = 0; g->decoded[len] = 0; /* Search the dictionary for first and last possible extensions */ minaddr = lookup_text (g, 0x00, g->h_dictionary); maxaddr = lookup_text (g, 0x1f, g->h_dictionary); if (minaddr == 0 || maxaddr == 0 || minaddr > maxaddr) return 2; /* Copy first extension to "result" string */ decode_text (g, VOCABULARY, minaddr); ptr = result; for (i = len; (c = g->decoded[i]) != 0; i++) *ptr++ = c; *ptr = 0; /* Merge second extension with "result" string */ decode_text (g, VOCABULARY, maxaddr); for (i = len, ptr = result; (c = g->decoded[i]) != 0; i++, ptr++) if (*ptr != c) break; *ptr = 0; /* Search was ambiguous or successful */ return (minaddr == maxaddr) ? 0 : 1; }/* completion */