home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: Product
/
Product.zip
/
ISPSRC.ZIP
/
fixdict.X
< prev
next >
Wrap
Text File
|
1992-08-14
|
4KB
|
125 lines
: Use /bin/sh
#
# $Id: fixdict.X,v 1.10 91/07/03 18:20:35 geoff Exp $
#
# Copyright 1987, 1988, 1989, by Geoff Kuenning, Manhattan Beach, CA
# Permission for non-profit use is hereby granted.
# All other rights reserved.
# See "version.h" for a more complete copyright notice.
#
# $Log: fixdict.X,v $
# Revision 1.10 91/07/03 18:20:35 geoff
# Don't use the ":-" notation in defining TMPDIR, since some
# braindamaged Bourne shells don't handle it.
#
# Revision 1.9 89/04/28 01:07:58 geoff
# Change Header to Id; nobody cares about my pathnames.
#
# Revision 1.8 88/12/26 02:24:36 geoff
# Update the copyright notice.
#
# Revision 1.7 88/02/20 23:10:48 geoff
# Fix the usage of the -e switch to specify standard input properly.
# Remove an unneeded sort.
#
# Revision 1.6 87/09/24 23:24:03 geoff
# Get rid of colons in the optional-variable setting (Israel Pinkas).
#
# Revision 1.5 87/09/14 22:38:28 geoff
# Add copyright comments
#
# Revision 1.4 87/07/20 23:21:16 geoff
# Get rid of the EXPAND stuff; it's obsolete. Add DEFHASH and SORTTMP
# support. Look in the current directory for DEFHASH first.
#
# Revision 1.3 87/06/07 14:47:22 geoff
# Make LIBDIR auto-configurable
#
# Revision 1.2 87/05/27 23:16:08 geoff
# Update expand script usage
#
# Revision 1.1 87/04/19 22:25:04 geoff
# Initial revision
#
#
# Add capitalization information to an ispell dictionary
#
# Usage:
#
# fixdict dict-file
#
# Requires availability of UNIX spell. The new dictionary is
# rewritten in place. A list of words that couldn't be
# resolved (because spell doesn't know them) is written to
# standard output. This list appears in lowercase in the
# dictionary, and if there are any errors the must be edited
# by hand.
#
# The final dictionary appears in expanded form and must be
# passed through munchlist to regenerate suffixes.
#
LIBDIR=!!LIBDIR!!
DEFHASH=!!DEFHASH!!
SORTTMP="-T ${TMPDIR-/usr/tmp}" # !!SORTTMP!!
TDIR=${TMPDIR-/tmp}
TMP=${TDIR}/fix$$
#
# Figure out where to get the hash file. The preference is
# for one in the current directory, if it exists, since this script
# is intended primarily to be used during installation.
#
DICT="$DEFHASH"
[ -r "$DICT" ] || DICT="$LIBDIR/$DEFHASH"
trap "/bin/rm -f ${TMP}*; exit 1" 1 2 15
ispell -e -d $DICT -p /dev/null < $1 \
| tr '[A-Z]' '[a-z]' \
| spell > ${TMP}a
#
# ${TMP}a contains all the words that spell doesn't like.
# Now figure out which of those are because spell doesn't know them at
# all, and leave those in ${TMP}b.
#
tr '[a-z]' '[A-Z]' < ${TMP}a | spell | tr '[A-Z]' '[a-z]' > ${TMP}b
#
# The wrongly-capitalized words are those that spell didn't object to
# in the last step. Produce a list of them in, and capitalize the
# first letter of each. Save this list in ${TMP}c.
#
comm -23 ${TMP}a ${TMP}b \
| sed 's/^a/A/;s/^b/B/;s/^c/C/;s/^d/D/;s/^e/E/;s/^f/F/;s/^g/G/;s/^h/H/
s/^i/I/;s/^j/J/;s/^k/K/;s/^l/L/;s/^m/M/;s/^n/N/;s/^o/O/;s/^p/P/
s/^q/Q/;s/^r/R/;s/^s/S/;s/^t/T/;s/^u/U/;s/^v/V/;s/^w/W/;s/^x/X/
s/^y/Y/;s/^z/Z/' > ${TMP}c
#
# Find out which of those spell objects to, saving the failures in ${TMP}d.
#
spell ${TMP}c > ${TMP}d
#
# Extract the words which were correctly capitalized at the first letter,
# combine them with an all-capitals version of the ones that weren't, and
# put the result into ${TMP}e.
#
(comm -23 ${TMP}c ${TMP}d; tr '[a-z]' '[A-Z]' < ${TMP}d) \
| sort $SORTTMP -o ${TMP}e
#
# At this point, ${TMP}b contains the words that spell just plain doesn't
# like, and ${TMP}e contains the words that are now capitalized correctly.
#
/bin/rm ${TMP}[cd]
#
# Put it all together, rewriting the dictionary in place.
#
ispell -e -d $DICT -p /dev/null < $1 \
| tr '[A-Z]' '[a-z]' \
| sort $SORTTMP \
| comm -23 - ${TMP}a \
| sort $SORTTMP -f -o $1 - ${TMP}b ${TMP}e
#
# Finally, write the list of words that have questionable capitalization
# to the standard output.
#
cat ${TMP}b
/bin/rm ${TMP}*