home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.cs.arizona.edu
/
ftp.cs.arizona.edu.tar
/
ftp.cs.arizona.edu
/
icon
/
historic
/
v941.tgz
/
icon.v941src.tar
/
icon.v941src
/
ipl
/
procs
/
gedcom.icn
< prev
next >
Wrap
Text File
|
2002-03-25
|
11KB
|
418 lines
############################################################################
#
# File: gedcom.icn
#
# Subject: Procedures for reading GEDCOM files
#
# Author: Gregg M. Townsend
#
# Date: March 25, 2002
#
############################################################################
#
# This file is in the public domain.
#
############################################################################
#
# These procedures read and interpret GEDCOM files, a standard
# format for genealogy databases.
#
############################################################################
#
# gedload(f) loads GEDCOM data from file f and returns a gedcom
# record containing the following fields:
# tree root of tree of gednode records
# id table of labeled nodes, indexed by @ID@
# fam list of FAM nodes (marriages)
# ind list of INDI nodes (individuals)
#
# The tree is composed of gednode records R containing these fields:
# level level
# id ID (label), including @...@ delimiters
# tag tag
# data data
# lnum line number
# parent parent node in tree
# ref referenced node, if any
# sub sub-entry list
# hcode unique hashcode, if INDI node
#
# gedwalk(tree) generates the nodes of the tree in preorder.
#
# Three procedures find descendants of a node based on a sequence
# of identifying tag strings:
# gedsub(R, tag...) generates subnodes specified by tag sequence
# gedval(R, tag...) generates data values of those subnodes
# gedref(R, tag...) generates nodes referenced by those subnodes
#
# Three procedures extract a person's name from an INDI record:
# gedfnf(R) produces "John Quincy Adams" form
# gedlnf(R) produces "Adams, John Quincy" form
# gednmf(R,f) produces an arbitrary format, substituting
# prefix, firstname, lastname, suffix for
# "P", "F", "L", "S" (respectively) in f
#
# geddate(R) finds the DATE subnode of a node and returns a string
# of at least 12 characters in a standard form such as "11 Jul 1767"
# or "abt 1810". It is assumed that the input is in English.
#
# gedyear(R) returns the year from the DATE subnode of a node.
#
# gedfind(g,s) generates the individuals under gedcom record g
# that are named by s, a string of whitespace-separated words.
# gedfind() generates each INDI node for which every word of s
# is matched by either a word of the individual's name or by
# the birth year. Matching is case-insensitive.
#
############################################################################
record gedcom(
tree, # tree of data records
id, # table of labeled nodes, indexed by @ID@
fam, # list of FAM nodes
ind # list of INDI nodes
)
record gednode(
level, # level
id, # ID (label), including @...@ delimiters
tag, # tag
data, # data
lnum, # line number
parent, # parent node in tree
ref, # referenced node, if any
sub, # sub-entry list
hcode # hashcode, if INDI node
)
$define WHITESPACE ' \t\n\r'
# gedload(f) -- load GEDCOM data from file f, returning gedcom record.
procedure gedload(f) #: load GEDCOM data from file f
local line, lnum, r, curr
local root, id, fam, ind
local hset, h1, h2, c
lnum := 0
root := curr := gednode(-1, , "ROOT", "", lnum, , , [])
id := table()
fam := []
ind := []
while line := read(f) do {
lnum +:= 1
if *line = 0 then
next
if not (r := gedscan(line)) then {
write(&errout, "ERR, line ", lnum, ": ", line)
next
}
r.lnum := lnum
r.sub := []
if r.tag == "CONC" then { # continuation line (no \n)
curr.data ||:= r.data
next
}
if r.tag == "CONT" then { # continuation line (with \n)
curr.data ||:= "\n" || r.data
next
}
while curr.level >= r.level do
curr := curr.parent
put(curr.sub, r)
r.parent := curr
curr := r
id[\r.id] := r
case r.tag of {
"FAM": put(fam, r)
"INDI": put(ind, r)
}
}
every r := gedwalk(root) do
r.ref := id[r.data]
hset := set()
every r := !ind do {
h1 := h2 := gedhi(r)
every c := !"123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" do
if member(hset, h2) then
h2 := h1 || c # add disambiguating suffix if needed
else
break
insert(hset, r.hcode := h2)
}
return gedcom(root, id, fam, ind)
end
# gedscan(f) -- scan one line of a GEDCOM record, returning gednode record
procedure gedscan(s) # (internal procedure)
local level, id, tag, data
static alnum
initial alnum := &letters ++ &digits ++ '_'
s ? {
tab(many(WHITESPACE))
level := tab(many(&digits)) | fail
tab(many(WHITESPACE))
if id := (="@" || tab(upto('@') + 1)) then
tab(many(WHITESPACE))
tag := tab(many(alnum)) | fail
tab(many(WHITESPACE))
data := tab(0)
return gednode(level, id, tag, data)
}
end
# gedwalk(r) -- walk GEDCOM tree, generating nodes in preorder
procedure gedwalk(r) #: generate GEDCOM tree nodes in preorder
suspend r | gedwalk(!r.sub)
fail
end
# gedsub(r, field...) -- generate subrecords with given tags
# gedval(r, field...) -- generate values of subrecords with given tags
# gedref(r, field...) -- generate nodes referenced by given tags
procedure gedsub(r, f[]) #: find subrecords
local tag, x
tag := get(f) | fail
every x := !r.sub do {
if x.tag == tag then
if *f > 0 then
suspend gedsub ! push(f, x)
else
suspend x
}
end
procedure gedval(a[]) #: find subrecord values
suspend (gedsub ! a).data
end
procedure gedref(a[]) #: find referenced nodes
suspend \(gedsub ! a).ref
end
# gedfnf(r) -- get name from individual record, first name first
procedure gedfnf(r) #: get first name first
return gednmf(r, "P F L S")
end
# gedlnf(r) -- get name from individual record, last name first
procedure gedlnf(r) #: get last name first
local s
s := gednmf(r, "L, P F S")
s ? {
=", "
return tab(0)
}
end
# gednmf(r, f) -- general name formatter
#
# substitutes the first name, last name, prefix, and suffix
# for the letters F, L, P, S respectively in string f.
# multiple spaces are suppressed.
procedure gednmf(r, f) #: format name
local c, s, prefix, first, last, suffix
prefix := gedval(r, "TITL" | "NPFX") | gedval(r, "NAME", "NPFX")
s := gedval(r, "NAME") | fail
s ? {
first := trim(tab(upto('/') | 0))
="/"
last := trim(tab(upto('/') | 0))
="/"
suffix := gedval(r, "NSFX") | ("" ~== tab(0))
}
s := ""
f ? {
while s ||:= tab(upto('PFLS ')) do {
while c := tab(any('PFLS ')) do {
s ||:= case c of {
"P": \prefix
"F": \first
"L": \last
"S": \suffix
" ": s[-1] ~== " "
}
}
}
s ||:= tab(0)
}
return trim(s)
end
# geddate(r) -- get date from record in standard form
procedure geddate(r) #: get canonical date
local s, t, w
static ftab
initial {
ftab := table()
ftab["JAN"] := "Jan"; ftab["FEB"] := "Feb"; ftab["MAR"] := "Mar"
ftab["APR"] := "Apr"; ftab["MAY"] := "May"; ftab["JUN"] := "Jun"
ftab["JUL"] := "Jul"; ftab["AUG"] := "Aug"; ftab["SEP"] := "Sep"
ftab["OCT"] := "Oct"; ftab["NOV"] := "Nov"; ftab["DEC"] := "Dec"
ftab["ABT"] := "abt"; ftab["BEF"] := "bef"; ftab["AFT"] := "aft"
ftab["CAL"] := "cal"; ftab["EST"] := "est"
}
s := trim(gedval(r, "DATE"), WHITESPACE) | fail
t := ""
s ? while not pos(0) do {
tab(many(WHITESPACE))
w := tab(upto(WHITESPACE) | 0)
t ||:= " " || (\ftab[w] | w)
}
if *t > 13 then
return t[2:0]
else
return right(t, 12)
end
# gedyear(r) -- get year from event record
procedure gedyear(r) #: get year
local d, y
d := gedval(r, "DATE") | fail
d ? while tab(upto(&digits)) do
if (y := tab(many(&digits)) \ 1) >= 1000 then
return y
end
# gedhi -- generate hashcode for individual record
#
# The hashcode uses two initials, final digits of birth year,
# and a 3-letter hashing of the full name and birthdate fields.
procedure gedhi(r) # (internal procedure)
local s, name, bdate, bd
static lc, uc
initial {
uc := string(&ucase)
lc := string(&lcase)
}
s := ""
name := gedval(r, "NAME") | ""
name ? {
# prefer initial of nickname; else skip unused firstname in parens
tab(upto('"') + 1) | (="(" & tab(upto(')') + 1))
tab(any(' \t'))
s ||:= tab(any(&letters)) | "X" # first initial
tab(upto('/') + 1)
tab(any(' \t'))
s ||:= tab(any(&letters)) | "X" # second initial
}
bdate := geddate(gedsub(r, "BIRT")) | ""
bd := bdate[-2:0] | "00"
if not (bd ? (tab(many(&digits)) & pos(0))) then
bd := "99"
s ||:= bd || gedh3a(name || bdate)
return map(s, lc, uc)
end
# gedh3a(s) -- hash arbitrary string into three alphabetic characters
procedure gedh3a(s) # (internal procedure)
local n, d1, d2, d3, c
n := 0
every c := !map(s) do
if not upto(' \t\f\r\n', c) then
n := 37 * n + ord(c) - 32
d1 := 97 + (n / 676) % 26
d2 := 97 + (n / 26) % 26
d3 := 97 + n % 26
return char(d1) || char(d2) || char(d3)
end
# gedfind(g, s) -- find records by name from gedcom record
#
# g is a gedcom record; s is a string of whitespace-separated words.
# gedfind() generates each INDI node for which every word of s
# is matched by either a word of the individual's name or by
# the birth year. Matching is case-insensitive.
procedure gedfind(g, s) #: find individual by name
local r
every r := !g.ind do
if gedmatch(r, s) then
suspend r
end
# gedmatch(r, s) -- match record against name
#
# s is a string of words to match name field and/or birth year.
# Matching is case sensitive.
procedure gedmatch(r, s) # (internal procedure)
local w
every w := gedlcw(s) do
(w == (gedlcw(gedval(r, "NAME")) | gedyear(gedsub(r, "BIRT")))) | fail
return r
end
# gedlcw(s, c) -- generate words from string s separated by chars from c
#
# words are mapped to lower-case to allow case-insensitive comparisons
procedure gedlcw(s, c) # (internal procedure)
/c := '/ \t\r\n\v\f'
map(s) ? {
tab(many(c))
while not pos(0) do {
suspend tab(upto(c) | 0) \ 1
tab(many(c))
}
}
fail
end