home *** CD-ROM | disk | FTP | other *** search
- #############################################################################
- #
- # File: mtf3.icn
- #
- # Subject: Program to map tar file
- #
- # Author: Richard Goerwitz
- #
- # Date: June 3, 1991
- #
- ############################################################################
- #
- # Version: 3.4
- #
- ###########################################################################
- #
- # PURPOSE: Maps 15+ char. filenames in a tar archive to 14 chars.
- # Handles both header blocks and the archive itself. Mtf is intended
- # to facilitate installation of tar'd archives on systems subject to
- # the System V 14-character filename limit.
- #
- # USAGE: mtf inputfile [-r reportfile] [-e .extensions] [-x exceptions]
- #
- # "Inputfile" is a tar archive. "Reportfile" is file containing a
- # list of files already mapped by mtf in a previous run (used to
- # avoid clashes with filenames in use outside the current archive).
- # The -e switch precedes a list of filename .extensions which mtf is
- # supposed to leave unscathed by the mapping process
- # (single-character extensions such as .c and .o are automatically
- # preserved; -e allows the user to specify additional extensions,
- # such as .pxl, .cpi, and .icn). The final switch, -x, precedes a
- # list of strings which should not be mapped at all. Use this switch
- # if, say, you have a C file with a structure.field combination such
- # as "thisisveryverybig.hashptr" in an archive that contains a file
- # called "thisisveryverybig.h," and you want to avoid mapping that
- # portion of the struct name which matches the name of the overlong
- # file (to wit, "mtf inputfile -x thisisveryverybig.hashptr"). To
- # prevent mapping of any string (including overlong filenames) begin-
- # ning, say, with "thisisvery," use "mtf inputfile -x thisisvery."
- # Be careful with this option, or you might end up defeating the
- # whole point of using mtf in the first place.
- #
- # OUTPUT FORMAT: Mtf writes a mapped tar archive to the stdout.
- # When finished, it leaves a file called "map.report" in the current
- # directory which records what filenames were mapped and how. Rename
- # and save this file, and use it as the "reportfile" argument to any
- # subsequent runs of mtf in this same directory. Even if you don't
- # plan to run mtf again, this file should still be examined, just to
- # be sure that the new filenames are acceptable, and to see if
- # perhaps additional .extensions and/or exceptions should be
- # specified.
- #
- # BUGS: Mtf only maps filenames found in the main tar headers.
- # Because of this, mtf cannot accept nested tar archives. If you try
- # to map a tar archive within a tar file, mtf will abort with a nasty
- # message about screwing up your files. Please note that, unless you
- # give mtf a "reportfile" to consider, it knows nothing about files
- # existing outside the archive. Hence, if an input archive refers to
- # an overlong filename in another archive, mtf naturally will not
- # know to shorten it. Mtf will, in fact, have no way of knowing that
- # it is a filename, and not, say, an identifier in a C program.
- # Final word of caution: Try not to use mtf on binaries. It cannot
- # possibly preserve the correct format and alignment of strings in an
- # executable. Same goes for compressed files. Mtf can't map
- # filenames that it can't read!
- #
- ####################################################################
-
-
- global filenametbl, chunkset, short_chunkset # see procedure mappiece(s)
- global extensions, no_nos # ditto
-
- record hblock(name,junk,size,mtime,chksum, # tar header struct;
- linkflag,linkname,therest) # see readtarhdr(s)
-
-
- procedure main(a)
- local usage, intext, i, current_list
-
- usage := "usage: mtf inputfile [-r reportfile] " ||
- "[-e .extensions] [-x exceptions]"
-
- *a = 0 & stop(usage)
-
- intext := open_input_file(a[1]) & pop(a)
-
- i := 0
- extensions := []; no_nos := []
- while (i +:= 1) <= *a do {
- case a[i] of {
- "-r" : readin_old_map_report(a[i+:=1])
- "-e" : current_list := extensions
- "-x" : current_list := no_nos
- default : put(current_list,a[i])
- }
- }
-
- every !extensions ?:= (=".", tab(0))
-
- # Run through all the headers in the input file, filling
- # (global) filenametbl with the names of overlong files;
- # make_table_of_filenames fails if there are no such files.
- make_table_of_filenames(intext) | {
- write(&errout,"mtf: no overlong path names to map")
- a[1] ? (tab(find(".tar")+4), pos(0)) |
- write(&errout,"(Is ",a[1]," even a tar archive?)")
- exit(1)
- }
-
- # Now that a table of overlong filenames exists, go back
- # through the text, remapping all occurrences of these names
- # to new, 14-char values; also, reset header checksums, and
- # reformat text into correctly padded 512-byte blocks. Ter-
- # minate output with 512 nulls.
- seek(intext,1)
- every writes(output_mapped_headers_and_texts(intext))
-
- close(intext)
- write_report() # Record mapped file and dir names for future ref.
- exit(0)
-
- end
-
-
-
- procedure open_input_file(s)
- local intext
-
- intext := open("" ~== s,"r") |
- stop("mtf: can't open ",s)
- find("UNIX",&features) |
- stop("mtf: I'm not tested on non-UNIX systems.")
- s[-2:0] == ".Z" &
- stop("mtf: sorry, can't accept compressed files")
- return intext
- end
-
-
-
- procedure readin_old_map_report(s)
- local mapfile, line, chunk, lchunk
-
- initial {
- filenametbl := table()
- chunkset := set()
- short_chunkset := set()
- }
-
- mapfile := open_input_file(s)
- while line := read(mapfile) do {
- line ? {
- if chunk := tab(many(~' \t')) & tab(upto(~' \t')) &
- lchunk := move(14) & pos(0) then {
- filenametbl[chunk] := lchunk
- insert(chunkset,chunk)
- insert(short_chunkset,chunk[1:16])
- }
- if /chunk | /lchunk
- then stop("mtf: report file, ",s," seems mangled.")
- }
- }
-
- end
-
-
-
- procedure make_table_of_filenames(intext)
-
- local header # chunkset is global
-
- # search headers for overlong filenames; for now
- # ignore everything else
- while header := readtarhdr(reads(intext,512)) do {
- # tab upto the next header block
- tab_nxt_hdr(intext,trim_str(header.size),1)
- # record overlong filenames in several global tables, sets
- fixpath(trim_str(header.name))
- }
- *\chunkset ~= 0 | fail
- return &null
-
- end
-
-
-
- procedure output_mapped_headers_and_texts(intext)
-
- # Remember that filenametbl, chunkset, and short_chunkset
- # (which are used by various procedures below) are global.
- local header, newtext, full_block, block, lastblock
-
- # Read in headers, one at a time.
- while header := readtarhdr(reads(intext,512)) do {
-
- # Replace overlong filenames with shorter ones, according to
- # the conversions specified in the global hash table filenametbl
- # (which were generated by fixpath() on the first pass).
- header.name := left(map_filenams(header.name),100,"\x00")
- header.linkname := left(map_filenams(header.linkname),100,"\x00")
-
- # Use header.size field to determine the size of the subsequent text.
- # Read in the text as one string. Map overlong filenames found in it
- # to shorter names as specified in the global hash table filenamtbl.
- newtext := map_filenams(tab_nxt_hdr(intext,trim_str(header.size)))
-
- # Now, find the length of newtext, and insert it into the size field.
- header.size := right(exbase10(*newtext,8) || " ",12," ")
-
- # Calculate the checksum of the newly retouched header.
- header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ")
-
- # Finally, join all the header fields into a new block and write it out
- full_block := ""; every full_block ||:= !header
- suspend left(full_block,512,"\x00")
-
- # Now we're ready to write out the text, padding the final block
- # out to an even 512 bytes if necessary; the next header must start
- # right at the beginning of a 512-byte block.
- newtext ? {
- while block := move(512)
- do suspend block
- pos(0) & next
- lastblock := left(tab(0),512,"\x00")
- suspend lastblock
- }
- }
- # Write out a final null-filled block. Some tar programs will write
- # out 1024 nulls at the end. Dunno why.
- return repl("\x00",512)
-
- end
-
-
-
- procedure trim_str(s)
-
- # Knock out spaces, nulls from those crazy tar header
- # block fields (some of which end in a space and a null,
- # some just a space, and some just a null [anyone know
- # why?]).
- return s ? {
- (tab(many(' ')) | &null) &
- trim(tab(find("\x00")|0))
- }
-
- end
-
-
-
- procedure tab_nxt_hdr(f,size_str,firstpass)
-
- # Tab upto the next header block. Return the bypassed text
- # as a string if not the first pass.
-
- local hs, next_header_offset
-
- hs := integer("8r" || size_str)
- next_header_offset := (hs / 512) * 512
- hs % 512 ~= 0 & next_header_offset +:= 512
- if 0 = next_header_offset then return ""
- else {
- # if this is pass no. 1 don't bother returning a value; we're
- # just collecting long filenames;
- if \firstpass then {
- seek(f,where(f)+next_header_offset)
- return
- }
- else {
- return reads(f,next_header_offset)[1:hs+1] |
- stop("mtf: error reading in ",
- string(next_header_offset)," bytes.")
- }
- }
-
- end
-
-
-
- procedure fixpath(s)
- local s2, piece
-
- # Fixpath is a misnomer of sorts, since it is used on
- # the first pass only, and merely examines each filename
- # in a path, using the procedure mappiece to record any
- # overlong ones in the global table filenametbl and in
- # the global sets chunkset and short_chunkset; no fixing
- # is actually done here.
-
- s2 := ""
- s ? {
- while piece := tab(find("/")+1)
- do s2 ||:= mappiece(piece)
- s2 ||:= mappiece(tab(0))
- }
- return s2
-
- end
-
-
-
- procedure mappiece(s)
- local chunk, i, lchunk
-
- # Check s (the name of a file or dir as recorded in the tar header
- # being examined) to see if it is over 14 chars long. If so,
- # generate a unique 14-char version of the name, and store
- # both values in the global hashtable filenametbl. Also store
- # the original (overlong) file name in chunkset. Store the
- # first fifteen chars of the original file name in short_chunkset.
- # Sorry about all of the tables and sets. It actually makes for
- # a reasonably efficient program. Doing away with both sets,
- # while possible, causes a tenfold drop in execution speed!
-
- # global filenametbl, chunkset, short_chunkset, extensions
- local j, ending
-
- initial {
- /filenametbl := table()
- /chunkset := set()
- /short_chunkset := set()
- }
-
- chunk := trim(s,'/')
- if chunk ? (tab(find(".tar")+4), pos(0)) then {
- write(&errout, "mtf: Sorry, I can't let you do this.\n",
- " You've nested a tar archive within\n",
- " another tar archive, which makes it\n",
- " likely I'll f your filenames ubar.")
- exit(2)
- }
- if *chunk > 14 then {
- i := 0
-
- if /filenametbl[chunk] then {
- # if we have not seen this file, then...
- repeat {
- # ...find a new unique 14-character name for it;
- # preserve important suffixes like ".Z," ".c," etc.
- # First, check to see if the original filename (chunk)
- # ends in an important extension...
- if chunk ?
- (tab(find(".")),
- ending := move(1) || tab(match(!extensions)|any(&ascii)),
- pos(0)
- )
- # ...If so, then leave the extension alone; mess with the
- # middle part of the filename (e.g. file.with.extension.c ->
- # file.with001.c).
- then {
- j := (15 - *ending - 3)
- lchunk:= chunk[1:j] || right(string(i+:=1),3,"0") || ending
- }
- # If no important extension is present, then reformat the
- # end of the file (e.g. too.long.file.name -> too.long.fi01).
- else lchunk := chunk[1:13] || right(string(i+:=1),2,"0")
-
- # If the resulting shorter file name has already been used...
- if lchunk == !filenametbl
- # ...then go back and find another (i.e. increment i & try
- # again; else break from the repeat loop, and...
- then next else break
- }
- # ...record both the old filename (chunk) and its new,
- # mapped name (lchunk) in filenametbl. Also record the
- # mapped names in chunkset and short_chunkset.
- filenametbl[chunk] := lchunk
- insert(chunkset,chunk)
- insert(short_chunkset,chunk[1:16])
- }
- }
-
- # If the filename is overlong, return lchunk (the shortened
- # name), else return the original name (chunk). If the name,
- # as passed to the current function, contained a trailing /
- # (i.e. if s[-1]=="/"), then put the / back. This could be
- # done more elegantly.
- return (\lchunk | chunk) || ((s[-1] == "/") | "")
-
- end
-
-
-
- procedure readtarhdr(s)
- local this_block
-
- # Read the silly tar header into a record. Note that, as was
- # complained about above, some of the fields end in a null, some
- # in a space, and some in a space and a null. The procedure
- # trim_str() may (and in fact often _is_) used to remove this
- # extra garbage.
-
- this_block := hblock()
- s ? {
- this_block.name := move(100) # <- to be looked at later
- this_block.junk := move(8+8+8) # skip the permissions, uid, etc.
- this_block.size := move(12) # <- to be looked at later
- this_block.mtime := move(12)
- this_block.chksum := move(8) # <- to be looked at later
- this_block.linkflag := move(1)
- this_block.linkname := move(100) # <- to be looked at later
- this_block.therest := tab(0)
- }
- integer(this_block.size) | fail # If it's not an integer, we've hit
- # the final (null-filled) block.
- return this_block
-
- end
-
-
-
- procedure map_filenams(s)
- local el, ch
-
- # Chunkset is global, and contains all the overlong filenames
- # found in the first pass through the input file; here the aim
- # is to map these filenames to the shortened variants as stored
- # in filenametbl (GLOBAL).
-
- local s2, tmp_chunk_tbl, tmp_lst
- static new_chunklist
- initial {
-
- # Make sure filenames are sorted, longest first. Say we
- # have a file called long_file_name_here.1 and one called
- # long_file_name_here.1a. We want to check for the longer
- # one first. Otherwise the portion of the second file which
- # matches the first file will get remapped.
- tmp_chunk_tbl := table()
- every el := !chunkset
- do insert(tmp_chunk_tbl,el,*el)
- tmp_lst := sort(tmp_chunk_tbl,4)
- new_chunklist := list()
- every put(new_chunklist,tmp_lst[*tmp_lst-1 to 1 by -2])
-
- }
-
- s2 := ""
- s ? {
- until pos(0) do {
- # first narrow the possibilities, using short_chunkset
- if member(short_chunkset,&subject[&pos:&pos+15])
- # then try to map from a long to a shorter 14-char filename
- then {
- if match(ch := !new_chunklist) & not match(!no_nos)
- then s2 ||:= filenametbl[=ch]
- else s2 ||:= move(1)
- }
- else s2 ||:= move(1)
- }
- }
- return s2
-
- end
-
-
- # From the IPL. Thanks, Ralph -
- # Author: Ralph E. Griswold
- # Date: June 10, 1988
- # exbase10(i,j) convert base-10 integer i to base j
- # The maximum base allowed is 36.
-
- procedure exbase10(i,j)
-
- static digits
- local s, d, sign
- initial digits := &digits || &lcase
- if i = 0 then return 0
- if i < 0 then {
- sign := "-"
- i := -i
- }
- else sign := ""
- s := ""
- while i > 0 do {
- d := i % j
- if d > 9 then d := digits[d + 1]
- s := d || s
- i /:= j
- }
- return sign || s
-
- end
-
- # end IPL material
-
-
- procedure get_checksum(r)
- local sum, field
-
- # Calculates the new value of the checksum field for the
- # current header block. Note that the specification say
- # that, when calculating this value, the chksum field must
- # be blank-filled.
-
- sum := 0
- r.chksum := " "
- every field := !r
- do every sum +:= ord(!field)
- return sum
-
- end
-
-
-
- procedure write_report()
-
- # This procedure writes out a list of filenames which were
- # remapped (because they exceeded the SysV 14-char limit),
- # and then notifies the user of the existence of this file.
-
- local outtext, stbl, i, j, mapfile_name
-
- # Get a unique name for the map.report (thereby preventing
- # us from overwriting an older one).
- mapfile_name := "map.report"; j := 1
- until not close(open(mapfile_name,"r"))
- do mapfile_name := (mapfile_name[1:11] || string(j+:=1))
-
- (outtext := open(mapfile_name,"w")) |
- open(mapfile_name := "/tmp/map.report","w") |
- stop("mtf: Can't find a place to put map.report!")
- stbl := sort(filenametbl,3)
- every i := 1 to *stbl -1 by 2 do {
- match(!no_nos,stbl[i]) |
- write(outtext,left(stbl[i],35," ")," ",stbl[i+1])
- }
- write(&errout,"\nmtf: ",mapfile_name," contains the list of changes.")
- write(&errout," Please save this list!")
- close(outtext)
- return &null
-
- end
-