home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: comp.sources.misc
- X-UNIX-From: news@usc.edu
- organization: RAND Corp., Santa Monica, Ca.
- subject: v15i083: Awk program for putting a set of files on minimum floppies.
- from: thomas%randvax.UUCP@usc.edu (Susan Thomas)
- Sender: allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc)
-
- Posting-number: Volume 15, Issue 83
- Submitted-by: thomas%randvax.UUCP@usc.edu (Susan Thomas)
- Archive-name: greedy/part01
-
- [Assuming I've finally figured out the latest rearrangement at uunet, I'm going
- to post some 30 submissions now, then notify the new moderator. When I get
- confirmation form him/her, I will post anything remaining in the queue and send
- a message notifying the net, then redirect the aliases. ++bsa]
-
- (I'm posting this for a friend)
-
- This is a awk implementation of the "greedy" bin-packing algorithm
- as applied to the problem of spreading a set of files over a set
- of floppies in such a way as to require the least number of
- floppies. It takes a list of files and generates copying scripts
- which embody the optimal allocation of files.
-
- Greedy is not the optimal algorithm. It's just a easily implemented
- heuristic.
-
- The archive contains a read.me with more information.
-
- ---------------------------------------------------------------------------
- Submitted-by: ajayshah@usc.edu
- Archive-name: greedy/part01
-
- ---- Cut Here and unpack ----
- #!/bin/sh
- # This is greedy, a shell archive (shar 3.32)
- # made 10/22/1990 07:17 UTC by ajayshah@usc.edu
- # Source directory /tmp/PACKING
- #
- # existing files WILL be overwritten
- #
- # This shar contains:
- # length mode name
- # ------ ---------- ------------------------------------------
- # 1691 -rw-r--r-- copying.bat.good
- # 1983 -rw-r--r-- lookup.table.good
- # 3968 -rw-r--r-- pack.awk
- # 1864 -rw-r--r-- read.me
- # 1281 -rw-r--r-- testdata
- #
- if touch 2>&1 | fgrep 'amc' > /dev/null
- then TOUCH=touch
- else TOUCH=true
- fi
- # ============= copying.bat.good ==============
- echo "x - extracting copying.bat.good (Text)"
- sed 's/^X//' << 'SHAR_EOF' > copying.bat.good &&
- X@echo off
- Xecho This set of files will need 3 floppies.
- Xecho Make sure you have so many HD formatted floppies handy.
- Xecho and then hit ENTER.
- Xpause
- Xecho Insert Floppy Number 1 and hit ENTER:
- Xpause
- Xcopy ./PC/C/backlog.c b:
- Xcopy ./PC/C/boss01.zip b:
- Xcopy ./PC/C/boss02a.zip b:
- Xcopy ./PC/C/boss02b.zip b:
- Xcopy ./PC/C/cc03.arc b:
- Xcopy ./PC/C/ccc1053b.arc b:
- Xcopy ./PC/C/ccc1053c.arc b:
- Xcopy ./PC/C/cnews005.arc b:
- Xcopy ./PC/C/cnews009.arc b:
- Xecho Insert Floppy Number 2 and hit ENTER:
- Xpause
- Xcopy ./PC/BORLAND/bgidriv.arc b:
- Xcopy ./PC/BORLAND/bgifont.arc b:
- Xcopy ./PC/BORLAND/bgifonts.arc b:
- Xcopy ./PC/BORLAND/bgiherc.arc b:
- Xcopy ./PC/BORLAND/bgvga256.arc b:
- Xcopy ./PC/BORLAND/tcppt1.zip b:
- Xcopy ./PC/C/abmake14.arc b:
- Xcopy ./PC/C/ansi-c.arc b:
- Xcopy ./PC/C/apm.arc b:
- Xcopy ./PC/C/boss03.zip b:
- Xcopy ./PC/C/bplus11.arc b:
- Xcopy ./PC/C/c-flow.arc b:
- Xcopy ./PC/C/c4window.arc b:
- Xcopy ./PC/C/cc01.arc b:
- Xcopy ./PC/C/cc02.arc b:
- Xcopy ./PC/C/cc04.arc b:
- Xcopy ./PC/C/ccc1053a.arc b:
- Xcopy ./PC/C/ccompile.arc b:
- Xcopy ./PC/C/cephes.arc b:
- Xcopy ./PC/C/clp_v11.arc b:
- Xcopy ./PC/C/cnews003.arc b:
- Xcopy ./PC/C/cnews004.arc b:
- Xcopy ./PC/C/cnews006.arc b:
- Xcopy ./PC/C/cnews008.arc b:
- Xcopy ./PC/C/cnews010.arc b:
- Xecho Insert Floppy Number 3 and hit ENTER:
- Xpause
- Xcopy ./PC/BORLAND/00-index.txt b:
- Xcopy ./PC/BORLAND/td1pat.arc b:
- Xcopy ./PC/C/00-index.txt b:
- Xcopy ./PC/C/64colors.zip b:
- Xcopy ./PC/C/advc11.arc b:
- Xcopy ./PC/C/argtest.c b:
- Xcopy ./PC/C/asyncpec.arc b:
- Xcopy ./PC/C/bituudec.c b:
- Xcopy ./PC/C/break.arc b:
- Xcopy ./PC/C/btoa.arc b:
- Xcopy ./PC/C/c_check.arc b:
- Xcopy ./PC/C/cbooks.arc b:
- Xcopy ./PC/C/cdecl.arc b:
- Xcopy ./PC/C/cnews001.arc b:
- Xcopy ./PC/C/cnews002.arc b:
- Xcopy ./PC/C/cnews007.arc b:
- Xecho Toodles.
- SHAR_EOF
- $TOUCH -am 1022000890 copying.bat.good &&
- chmod 0644 copying.bat.good ||
- echo "restore of copying.bat.good failed"
- set `wc -c copying.bat.good`;Wc_c=$1
- if test "$Wc_c" != "1691"; then
- echo original size 1691, current size $Wc_c
- fi
- # ============= lookup.table.good ==============
- echo "x - extracting lookup.table.good (Text)"
- sed 's/^X//' << 'SHAR_EOF' > lookup.table.good &&
- X./PC/BORLAND/00-index.txt --> Floppy Number 3
- X./PC/BORLAND/bgidriv.arc --> Floppy Number 2
- X./PC/BORLAND/bgifont.arc --> Floppy Number 2
- X./PC/BORLAND/bgifonts.arc --> Floppy Number 2
- X./PC/BORLAND/bgiherc.arc --> Floppy Number 2
- X./PC/BORLAND/bgvga256.arc --> Floppy Number 2
- X./PC/BORLAND/tcppt1.zip --> Floppy Number 2
- X./PC/BORLAND/td1pat.arc --> Floppy Number 3
- X./PC/C/00-index.txt --> Floppy Number 3
- X./PC/C/64colors.zip --> Floppy Number 3
- X./PC/C/abmake14.arc --> Floppy Number 2
- X./PC/C/advc11.arc --> Floppy Number 3
- X./PC/C/ansi-c.arc --> Floppy Number 2
- X./PC/C/apm.arc --> Floppy Number 2
- X./PC/C/argtest.c --> Floppy Number 3
- X./PC/C/asyncpec.arc --> Floppy Number 3
- X./PC/C/backlog.c --> Floppy Number 1
- X./PC/C/bituudec.c --> Floppy Number 3
- X./PC/C/boss01.zip --> Floppy Number 1
- X./PC/C/boss02a.zip --> Floppy Number 1
- X./PC/C/boss02b.zip --> Floppy Number 1
- X./PC/C/boss03.zip --> Floppy Number 2
- X./PC/C/bplus11.arc --> Floppy Number 2
- X./PC/C/break.arc --> Floppy Number 3
- X./PC/C/btoa.arc --> Floppy Number 3
- X./PC/C/c-flow.arc --> Floppy Number 2
- X./PC/C/c4window.arc --> Floppy Number 2
- X./PC/C/c_check.arc --> Floppy Number 3
- X./PC/C/cbooks.arc --> Floppy Number 3
- X./PC/C/cc01.arc --> Floppy Number 2
- X./PC/C/cc02.arc --> Floppy Number 2
- X./PC/C/cc03.arc --> Floppy Number 1
- X./PC/C/cc04.arc --> Floppy Number 2
- X./PC/C/ccc1053a.arc --> Floppy Number 2
- X./PC/C/ccc1053b.arc --> Floppy Number 1
- X./PC/C/ccc1053c.arc --> Floppy Number 1
- X./PC/C/ccompile.arc --> Floppy Number 2
- X./PC/C/cdecl.arc --> Floppy Number 3
- X./PC/C/cephes.arc --> Floppy Number 2
- X./PC/C/clp_v11.arc --> Floppy Number 2
- X./PC/C/cnews001.arc --> Floppy Number 3
- X./PC/C/cnews002.arc --> Floppy Number 3
- X./PC/C/cnews003.arc --> Floppy Number 2
- X./PC/C/cnews004.arc --> Floppy Number 2
- X./PC/C/cnews005.arc --> Floppy Number 1
- X./PC/C/cnews006.arc --> Floppy Number 2
- X./PC/C/cnews007.arc --> Floppy Number 3
- X./PC/C/cnews008.arc --> Floppy Number 2
- X./PC/C/cnews009.arc --> Floppy Number 1
- X./PC/C/cnews010.arc --> Floppy Number 2
- SHAR_EOF
- $TOUCH -am 1022000890 lookup.table.good &&
- chmod 0644 lookup.table.good ||
- echo "restore of lookup.table.good failed"
- set `wc -c lookup.table.good`;Wc_c=$1
- if test "$Wc_c" != "1983"; then
- echo original size 1983, current size $Wc_c
- fi
- # ============= pack.awk ==============
- echo "x - extracting pack.awk (Text)"
- sed 's/^X//' << 'SHAR_EOF' > pack.awk &&
- X
- X# This awk program packs a set of files over a set of floppies
- X# using the "greedy bin-packing algorithm".
- X
- X# It is hardcoded for 3.5" HD floppies being written to b:
- X# To change the "b:" assumption goto line 106.
- X# To change the 3.5" HD assumption goto lines 16-17.
- X
- XBEGIN {
- X name = 1; #
- X size = 2; #
- X taken = 3; #
- X FID = 4; #
- X # Ignore these four defns.
- X
- X FCAPACITY = 1457664; # change this for floppies other than DSHD 3.5"
- X CLUSTERSIZE = 512; # ditto.
- X
- X stderr = "/dev/tty";
- X }
- X
- X{table[NR, name] = $1;
- X table[NR, size] = $2;
- X table[NR, taken] = 0; # 0 for not yet taken, 1 for taken.
- X table[NR, FID] = 0; #allocating the FID column now tends to make it faster.
- X
- X if (table[NR, size] > FCAPACITY) {
- X print "File " table[NR, name] " on line " NR " is too large for one floppy.";
- X goch = 1;
- X exit 1
- X }
- X}
- X
- XEND {
- X if (goch == 1) exit 1;
- X #So far, all that has been done is setup table.
- X print "Finished reading in all data." > stderr;
- X
- X floppy = 0;
- X todo = NR;
- X done = 0;
- X while (done < todo) {
- X # If there are more files to be done, then open a
- X # new floppy.
- X print "done = " done " todo = " todo > stderr;
- X
- X floppy++; spaceleft = FCAPACITY;
- X trymore = 0;
- X while (trymore == 0) {
- X # look for a file to put into this floppy.
- X bestfile = 0; bestsize = 0;
- X for (i = 1; i <= todo; i++) {
- X # Linear scan over all files looking for best fit.
- X if (table[i, taken] == 0) {
- X #this file has not yet been taken
- X if ((table[i, size]+CLUSTERSIZE) < spaceleft) {
- X #this file can (in principle) fit in the slot
- X if (table[i, size] > bestsize) {
- X #this file does it better than the best
- X #file we've found so far!
- X bestfile = i;
- X bestsize = table[i, size];
- X }
- X }
- X }
- X }
- X #End of linear scan.
- X if (bestfile == 0) {
- X #we didn't find a file to fit this space
- X trymore = 1 #quit trying further with this floppy
- X print "Wasted space on floppy " floppy " = " spaceleft;
- X }
- X else {
- X print "Putting file " table[bestfile, name] " of size " table[bestfile, size] " into floppy " floppy "." > stderr;
- X table[bestfile, FID] = floppy;
- X table[bestfile, taken] = 1;
- X spaceleft = spaceleft - table[bestfile, size] - CLUSTERSIZE;
- X # CLUSTERSIZE bytes is worstcase wastage of space
- X # with clusters of CLUSTERSIZE bytes.
- X done++
- X }
- X }
- X }
- X # End of greedy algorithm.
- X
- X # Once you land here, you've finished with all files.
- X # We generate two things now:
- X # First, generate the MS-DOS batch file which'll do the copying.
- X # Next, generate the lookup table which maps a file into it's
- X # FID (floppy ID)
- X
- X # Printing batch file to StdOut:
- X msdosbat = "copying.bat";
- X print "@echo off" > msdosbat;
- X print "echo This set of files will need " floppy " floppies." > msdosbat;
- X print "echo Make sure you have so many HD formatted floppies handy." > msdosbat;
- X print "echo and then hit ENTER." > msdosbat;
- X print "pause" > msdosbat;
- X for (f = 1; f <= floppy; f++) {
- X # running over every floppy
- X print "echo Insert Floppy Number " f " and hit ENTER:" > msdosbat;
- X print "pause" > msdosbat;
- X # Now generate code for the copying:
- X for (i = 1; i <= todo; i++) {
- X if (table[i, FID] == f) {
- X print "copy " table[i, name] " b:" > msdosbat;
- X } #\
- X } # \
- X } # \__ Change this for other drives.
- X print "echo Toodles." > msdosbat;
- X
- X # Now to generate the lookup table to file "lookup.table"
- X for (i = 1; i <= todo; i++) {
- X print table[i, name] " --> Floppy Number " table[i, FID] > "lookup.table"
- X }
- X
- X for (i=1; i<=5; i++) print "" > stderr;
- X print "I have created two files for you." > stderr;
- X print "" > stderr;
- X print "The file copying.bat is a MessDos batch file which does the copying." > stderr;
- X print "The file lookup.table is a lookup table mapping a file to it's floppy." > stderr;
- X print "" > stderr;
- X print "Toodles!" > stderr;
- X}
- X
- SHAR_EOF
- $TOUCH -am 1022001690 pack.awk &&
- chmod 0644 pack.awk ||
- echo "restore of pack.awk failed"
- set `wc -c pack.awk`;Wc_c=$1
- if test "$Wc_c" != "3968"; then
- echo original size 3968, current size $Wc_c
- fi
- # ============= read.me ==============
- echo "x - extracting read.me (Text)"
- sed 's/^X//' << 'SHAR_EOF' > read.me &&
- X
- XWHAT?
- X
- XThis is a awk implementation of the "greedy" bin-packing algorithm
- Xas applied to the problem of spreading a set of files over a set
- Xof floppies in such a way as to require the least number of
- Xfloppies. It takes a list of files and generates copying scripts
- Xwhich embody the optimal allocation of files.
- X
- XGreedy is not the optimal algorithm. It's just a easily implemented
- Xheuristic.
- X
- X---------------------------------------------------------------------------
- X
- X
- X
- XHOW?
- X
- XIt needs an input file of the form
- X
- X./PC/BORLAND/00-index.txt 866
- X./PC/BORLAND/bgidriv.arc 101151
- X
- Xetc., where the first field is the filename and the 2nd field is
- Xthe filesize.
- X
- XUsage:
- X
- X nawk -f pack.awk filelist
- X or
- X gawk -f pack.awk filelist
- X
- XThe program talks a lot on /dev/tty about it's activities. Finally,
- Xit generates two files: a MS-DOS batch file which does the copying
- Xand a lookup table mapping filenames to floppy numbers. Modifications
- Xon what is generated, etc., are not difficult.
- X
- XThe file 'testdata' is for demo purposes. Try saying
- X
- X nawk -f pack.awk testdata
- X
- XIf you want a doublecheck, then the "correct" results are files
- X'copying.bat.good' and 'lookup.table.good'.
- X
- XIt does NOT work with awk; you must have either nawk or gawk.
- X
- X---------------------------------------------------------------------------
- X
- X
- X
- XIt's horrendously slow. I only plan to be using it once in a while
- Xso it's fine by me. If someone ports it to C or so, I'd like to get
- Xa copy!
- X
- XIf someone implements a smarter heuristic, then I'd be even more
- Xinterested!
- X
- X_______________________________________________________________________________
- XAjay Shah, ajayshah@usc.edu ajayshah%rcc%rand.org
- XApt: (213) 734-3930 RAND Corporation: (213) 393-0411 x7281
- X_______________________________________________________________________________
- X
- SHAR_EOF
- $TOUCH -am 1022001290 read.me &&
- chmod 0644 read.me ||
- echo "restore of read.me failed"
- set `wc -c read.me`;Wc_c=$1
- if test "$Wc_c" != "1864"; then
- echo original size 1864, current size $Wc_c
- fi
- # ============= testdata ==============
- echo "x - extracting testdata (Text)"
- sed 's/^X//' << 'SHAR_EOF' > testdata &&
- X./PC/BORLAND/00-index.txt 866
- X./PC/BORLAND/bgidriv.arc 101151
- X./PC/BORLAND/bgifont.arc 81908
- X./PC/BORLAND/bgifonts.arc 87716
- X./PC/BORLAND/bgiherc.arc 58540
- X./PC/BORLAND/bgvga256.arc 28922
- X./PC/BORLAND/tcppt1.zip 40323
- X./PC/BORLAND/td1pat.arc 11443
- X./PC/C/00-index.txt 11904
- X./PC/C/64colors.zip 12382
- X./PC/C/abmake14.arc 62897
- X./PC/C/advc11.arc 14336
- X./PC/C/ansi-c.arc 15213
- X./PC/C/apm.arc 86050
- X./PC/C/argtest.c 1442
- X./PC/C/asyncpec.arc 6186
- X./PC/C/backlog.c 3797
- X./PC/C/bituudec.c 7191
- X./PC/C/boss01.zip 139935
- X./PC/C/boss02a.zip 241954
- X./PC/C/boss02b.zip 200308
- X./PC/C/boss03.zip 81988
- X./PC/C/bplus11.arc 22618
- X./PC/C/break.arc 8204
- X./PC/C/btoa.arc 6182
- X./PC/C/c-flow.arc 62706
- X./PC/C/c4window.arc 53248
- X./PC/C/c_check.arc 21504
- X./PC/C/cbooks.arc 12874
- X./PC/C/cc01.arc 4073
- X./PC/C/cc02.arc 93940
- X./PC/C/cc03.arc 117176
- X./PC/C/cc04.arc 113452
- X./PC/C/ccc1053a.arc 36508
- X./PC/C/ccc1053b.arc 227761
- X./PC/C/ccc1053c.arc 309945
- X./PC/C/ccompile.arc 49024
- X./PC/C/cdecl.arc 15088
- X./PC/C/cephes.arc 58368
- X./PC/C/clp_v11.arc 26056
- X./PC/C/cnews001.arc 7995
- X./PC/C/cnews002.arc 7393
- X./PC/C/cnews003.arc 25600
- X./PC/C/cnews004.arc 67584
- X./PC/C/cnews005.arc 115712
- X./PC/C/cnews006.arc 40020
- X./PC/C/cnews007.arc 13312
- X./PC/C/cnews008.arc 73748
- X./PC/C/cnews009.arc 96256
- X./PC/C/cnews010.arc 72437
- SHAR_EOF
- $TOUCH -am 1021235690 testdata &&
- chmod 0644 testdata ||
- echo "restore of testdata failed"
- set `wc -c testdata`;Wc_c=$1
- if test "$Wc_c" != "1281"; then
- echo original size 1281, current size $Wc_c
- fi
- exit 0
-
-