home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
OL.LZH
/
PROCS.LZH
/
NGRAMS.ICN
< prev
next >
Wrap
Text File
|
1991-07-13
|
2KB
|
51 lines
############################################################################
#
# Name: ngrams.icn
#
# Title: Generate n-grams
#
# Author: Ralph E. Griswold
#
# Date: June 10, 1988
#
############################################################################
#
# The procedure ngrams(file,n,c,t) generates a tabulation of the n-grams
# in the specified file. If c is non-null, it is used as the set of
# characters from which n-grams are taken (other characters break n-grams).
# The default for c is the upper- and lowercase letters. If t is non-null,
# the tabulation is given in order of frequency; otherwise in alphabetical
# order of n-grams.
#
# Note:
#
# The n-grams are kept in a table within the procedure and all n-grams
# are processed before the tabulation is generated. Consequently, this
# procedure is unsuitable if there are very many different n-grams.
#
############################################################################
procedure ngrams(f,i,c,t)
local line, grams, a, count
if not (integer(i) > 0) then stop("invalid ngrams specification")
if type(f) ~== "file" then stop("invalid file specification")
/c := &lcase || &ucase
if not (c := cset(c)) then stop("invalid cset specification")
grams := table(0)
line := ""
while line ||:= reads(f,1000) do
line ? while tab(upto(c)) do
(tab(many(c)) \ 1) ? while grams[move(i)] +:= 1 do
move(-i + 1)
if /t then {
a := sort(grams,4)
while count := pull(a) do
suspend pull(a) || right(count,8)
}
else {
a := sort(grams,3)
suspend |(get(a) || right(get(a),8))
}
end