home *** CD-ROM | disk | FTP | other *** search
- Path: sparky!uunet!gatech!usenet.INS.CWRU.Edu!agate!darkstar.UCSC.EDU!osr
- From: dfk@wildcat.dartmouth.edu (David Kotz)
- Newsgroups: comp.os.research
- Subject: Parallel I/O bibliography
- Date: 1 Sep 1992 18:57:33 GMT
- Organization: Dartmouth College, Hanover, NH
- Lines: 2534
- Approved: comp-os-research@ftp.cse.ucsc.edu
- Message-ID: <180eetINNjp7@darkstar.UCSC.EDU>
- NNTP-Posting-Host: ftp.cse.ucsc.edu
- Originator: osr@ftp
-
- BibTeX bibliography file: Parallel I/O
-
- Third Edition
- August 27, 1992
-
- This bibliography covers parallel I/O, with a significant emphasis on
- file systems rather than, say, network or graphics I/O. This includes
- architecture, operating systems, some algorithms, and some workload
- characterization. You can probably also see a bias toward prefetching
- and caching. This supercedes my older bibliographies.
-
- The entries are alphabetized by cite key. The emphasis is on including
- everything relevant, rather than selecting a few key articles of
- interest. Thus, you probably don't want (or need) to read everything
- here. There are many repeated entries, in the sense that a paper is
- often published first as a TR, then in a conference, then in a
- journal.
-
- NOTE: all comments are mine, and any opinions expressed there are mine
- only. In some cases I am simply restating the opinion or result
- obtained by the paper's authors, and thus even I might disagree with
- the statement. I keep most editorial comments to a minimum.
-
- Please let me know if you have any additions or corrections. You may
- use the bibliography (and copy it around) as you please except for
- publishing it as a whole, since the compilation is mine.
-
- Please leave this header on the collection; BibTeX won't mind.
-
- This bibliography (and many others) is archived in ftp.cse.ucsc.edu:pub/bib.
-
- David Kotz
- Assistant Professor
- Mathematics and Computer Science
- Dartmouth College
- 6188 Bradley Hall
- Hanover NH 03755-3551
- @string {email = "David.Kotz@Dartmouth.edu"} % have to hide this from bibtex
- -----------------------------------------------------------------------------
-
- @InProceedings{abali:ibm370,
- author = {B\"{u}lent Abali and Bruce D. Gavril and Richard W. Hadsell and
- Linh Lam and Brion Shimamoto},
- title = {{Many/370: A} Parallel Computer Prototype for {I/O} Intensive
- Applications},
- booktitle = {Sixth Annual Distributed-Memory Computer Conference},
- year = {1991},
- pages = {728--730},
- keyword = {parallel I/O, multiprocessor file system},
- comment = {Describes a parallel IBM/370, where they attach several small 370s
- to a switch, and several disks to each 370. But they don't seem to have much
- in the way of striping. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{abu-safah:speedup,
- author = {Walid Abu-Safah and Harlan Husmann and David Kuck},
- title = {On {Input/Output} Speed-up in Tightly-coupled Multiprocessors},
- journal = {IEEE Transactions on Computers},
- year = {1986},
- pages = {520--530},
- keyword = {parallel I/O, I/O},
- comment = {Also TR UIUCDCS-R-84-1182 from CS at UIUC. Derives formulas for
- the speedup with and without I/O considered and with parallel software and
- hardware format conversion. Considering I/O gives a more optimistic view of
- the speedup of a program {\em assuming} that the parallel version can use its
- I/O bandwidth as effectively as the serial processor. Concludes that, for a
- given number of processors, increasing the I/O bandwaidth is the most
- effective way to speed up the program (over the format conversion
- improvements). [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{alverson:tera,
- author = {Robert Alverson and David Callahan and Daniel Cummings and Brian
- Koblenz and Allan Porterfield and Burton Smith},
- title = {The {Tera} Computer System},
- booktitle = {1990 International Conference on Supercomputing},
- year = {1990},
- pages = {1--6},
- keyword = {parallel architecture, MIMD, NUMA},
- comment = {Interesting architecture. 3-d mesh of pipelined packet-switch
- nodes, e.g., 16x16x16 is 4096 nodes, with 256 procs, 512 memory units, 256 I/O
- cache units, and 256 I/O processors attached. 2816 remaining nodes are just
- switching nodes. Each processor is 64-bit custom chip with up to 128
- simultaneous threads in execution. It alternates between ready threads, with
- a deep pipeline. Inter-instruction dependencies explicitly encoded by the
- compiler, stalling those threads until the appropriate time. Each thread has
- a complete set of registers! Memory units have 4-bit tags on each word, for
- full/empty and trap bits. Shared memory across the network: NUMA.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{arendt:genome,
- author = {James W. Arendt},
- title = {Parallel Genome Sequence Comparison Using a Concurrent File System},
- year = {1991},
- number = {UIUCDCS-R-91-1674},
- institution = {University of Illinois at Urbana-Champaign},
- keyword = {parallel file system, parallel I/O, Intel iPSC/2},
- comment = {Studies the performance of Intel CFS. Uses an application that
- reads in a huge file of records, each a genome sequence, and compares each
- sequence against a given sequence. Looks at cache performance, message
- latency, cost of prefetches and directory reads, and throughput. He tries
- one-disk, one-proc transfer rates. Because of contention with the directory
- server on one of the two I/O nodes, it was faster to put all of the file on
- the other I/O node. Striping is good for multiple readers though. Best access
- pattern was interleaved, not segmented or separate files, because it avoided
- disk seeks. Apparently the files are stored contiguously. Can get good
- speedup by reading the sequences in big integral record sizes, from CFS,
- using a load-balancing scheduled by the host. Contention for directory blocks
- -- through single-node directory server. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{asbury:fortranio,
- author = {Raymond K. Asbury and David S. Scott},
- title = {{FORTRAN} {I/O} on the {iPSC/2}: Is there read after write?},
- booktitle = {Fourth Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1989},
- pages = {129--132},
- keyword = {parallel I/O, hypercube, Intel iPSC/2, file access pattern}
- }
-
- @InProceedings{baldwin:hyperfs,
- author = {C. H. Baldwin and W. C. Nestlerode},
- title = {A Large Scale File Processing Application on a Hypercube},
- booktitle = {Fifth Annual Distributed-Memory Computer Conference},
- year = {1990},
- pages = {1400-1404},
- keyword = {multiprocessor file system, file access pattern, parallel I/O,
- hypercube},
- comment = {Census-data processing on an nCUBE/10 at USC. Their program uses
- an interleaved pattern, which is like lps or gw with multi-record records
- (i.e., the application does its own blocking). Shifted to asynchronous I/O to
- do OBL manually. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{barak:hfs,
- author = {Amnon Barak and Bernard A. Galler and Yaron Farber},
- title = {A Holographic File System for a Multicomputer with Many Disk Nodes},
- year = {1988},
- month = {May},
- number = {88-6},
- institution = {Dept. of Computer Science, Hebrew University of Jerusalem},
- keyword = {parallel I/O, hashing, reliability, disk shadowing},
- comment = {Describes a file system for a distributed system that scatters
- records of each file over many disks using hash functions. The hash function
- is known by all processors, so no one processor must be up to access the
- file. Any portion of the file whose disknode is available may be accessed.
- Shadow nodes are used to take over for nodes that go down, saving the info
- for later use by the proper node. Intended to easily parallelize read/write
- accesses and global file operations, and to increase file availability.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{bell:physics,
- author = {Jean L. Bell},
- title = {A Specialized Data Management System for Parallel Execution of
- Particle Physics Codes},
- booktitle = {ACM SIGMOD Conference},
- year = {1988},
- pages = {277--285},
- keyword = {file access pattern, disk prefetch, file system},
- comment = {A specialized database system for particle physics codes. Valuable
- for its description of access patterns and subsequent file access
- requirements. Particle-in-cell codes iterate over timesteps, updating the
- position of each particle, and then the characteristics of each cell in the
- grid. Particles may move from cell to cell. Particle update needs itself and
- nearby gridcell data. The whole dataset is too big for memory, and each
- timestep must be stored on disk for later analysis anyway. Regular file
- systems are inadequate: specialized DBMS is more appropriate. Characteristics
- needed by their application class: multidimensional access (by particle type
- or by location, i.e., multiple views of the data), coordination between grid
- and particle data, coordination between processors, coordinated access to
- meta-data, inverted files, horizontal clustering, large blocking of data,
- asynchronous I/O, array data, complicated joins, and prefetching according to
- user-prespecified order. Note that many of these things can be provided by a
- file system, but that most are hard to come by in typical file systems, if
- not impossible. Many of these features are generalizable to other
- applications. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{benner:pargraphics,
- author = {Robert E. Benner},
- title = {Parallel Graphics Algorithms on a 1024-Processor Hypercube},
- booktitle = {Fourth Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1989},
- pages = {133--140},
- keyword = {hypercube, graphics, parallel algorithms, parallel I/O},
- comment = {About using the nCUBE/10's RT Graphics System. They were
- frustrated by an unusual mapping from the graphics memory to the display, a
- shortage of memory on the graphics nodes, and small message buffers on the
- graphics nodes. They wrote some algorithms for collecting the columns of
- pixels from the hypercube nodes, and routing them to the appropriate graphics
- node. They also would have liked a better interconnection network between the
- graphics nodes, at least for synchronization. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{bestavros:raid,
- author = {Azer Bestavros},
- title = {{IDA}-Based Redundant Arrays of Inexpensive Disks},
- booktitle = {Proceedings of the First International Conference on Parallel
- and Distributed Information Systems},
- year = {1991},
- pages = {2--9},
- keyword = {RAID, disk array, reliability, parallel I/O},
- comment = {[Not with the RAID project.] Uses the Information Dispersal
- Algorithm (IDA) to generate $n+m$ blocks from $n$ blocks, to tolerate $m$
- disk failures; all of the data from the $n$ blocks is hidden in the $n+m$
- blocks. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{bitton:schedule,
- author = {Dina Bitton},
- title = {Arm Scheduling in Shadowed Disks},
- booktitle = {Proceedings of IEEE Compcon},
- year = {1989},
- month = {Spring},
- pages = {132--136},
- keyword = {parallel I/O, disk shadowing, reliability, mirrored disk, disk
- seek time},
- comment = {Goes further than bitton:shadow. Uses simulation to verify results
- from that paper, which were expressions for the expected seek distance of
- shadowed disks, using shortest-seek-time arm scheduling. Problem is her
- assumption that arm positions stay independent, in the face of correlating
- effects like writes, which move all arms to the same place. Simulations match
- model only barely, and only in some cases. Anyway, shadowed disks can improve
- performance for workloads more than 60 or 70\% reads. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{bitton:shadow,
- author = {D. Bitton and J. Gray},
- title = {Disk Shadowing},
- booktitle = {14th International Conference on Very Large Data Bases},
- year = {1988},
- pages = {331--338},
- keyword = {parallel I/O, disk shadowing, reliability, mirrored disk, disk
- seek time},
- comment = {Also TR UIC EECS 88-1 from Univ of Illinois at Chicago. Shadowed
- disks are mirroring with more than 2 disks. Writes to all disks, reads from
- one with shortest seek time. Acknowledges but ignores problem posed by
- lo:disks. Also considers that newer disk technology does not have linear seek
- time $(a+bx)$ but rather $(a+b\sqrt{x})$. Shows that with either seek
- distribution the average seek time for workloads with at least 60\% reads
- decreases in the number of disks. See also bitton:schedule.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{boral:bubba,
- author = {Haran Boral and William Alexander and Larry Clay and George
- Copeland and Scott Danforth and Michael Franklin and Brian Hart and Marc
- Smith and Patrick Valduriez},
- title = {Prototyping {Bubba}, a Highly Parallel Database System},
- journal = {IEEE Transactions on Knowledge and Data Engineering},
- year = {1990},
- month = {March},
- volume = {2},
- number = {1},
- keyword = {parallel I/O, database, disk caching},
- comment = {More recent than copeland:bubba, and a little more general. This
- gives few details, and doesn't spend much time on the parallel I/O. Bubba
- does use parallel independent disks, with a significant effort to place data
- on the disks, and do the work local to the disks, to balance the load and
- minimize interprocessor communication. Also they use a single-level store
- (i.e., memory-mapped files) to improve performance of their I/O system,
- including page locking that is assisted by the MMU. The OS has hooks for the
- database manager to give memory-management policy hints.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{boral:critique,
- author = {H. Boral and D. {DeWitt}},
- title = {Database machines: an idea whose time has passed?},
- booktitle = {Proceedings of the Fourth International Workshop on Database
- Machines},
- year = {1983},
- pages = {166--187},
- publisher = {Springer-Verlag},
- keyword = {file access pattern, parallel I/O, I/O, database machine},
- comment = {Improvements in I/O bandwidth crucial for supporting database
- machines, otherwise highly parallel DB machines are useless (I/O bound). Two
- ways to do it: 1) synchronized interleaving by using custom controller and
- regular disks to read/write same track on all disks, which speeds individual
- accesses. 2) use very large cache (100-200M) to keep blocks to re-use and to
- do prefetching. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{bradley:ipsc2io,
- author = {David K. Bradley and Daniel A. Reed},
- title = {Performance of the {Intel iPSC/2} Input/Output System},
- booktitle = {Fourth Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1989},
- pages = {141--144},
- keyword = {hypercube, parallel I/O, Intel},
- comment = {Some measurements and simulations of early CFS performance. Looks
- terrible, but they disclaim that it is a beta version of the first CFS. They
- determined that the disks are the bottleneck. But this may just imply that
- they need more disks. Their parallel synthetic applications had each process
- read a separate file. Files were too short (16K??). [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{brandwijn:dasd,
- author = {Alexandre Brandwajn},
- title = {Performance Benefits of Parallelism in Cached {DASD} Controllers},
- year = {1988},
- month = {November},
- number = {UCSC-CRL-88-30},
- institution = {Computer Research Laboratory, UC Santa Cruz},
- keyword = {parallel I/O, disk caching, disk hardware},
- comment = {Some new DASD products with caches overlap cache hits with
- prefetch of remainder of track into cache. They use analytical model to
- evaluate performance of these. They find performance improvements of 5-15
- percent under their assumptions. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{browne:io-arch,
- author = {J. C. Browne and A. G. Dale and C. Leung and R. Jenevein},
- title = {A Parallel Multi-Stage {I/O} Architecture with Self-managing Disk
- Cache for Database Management Applications},
- booktitle = {Proceedings of the Fourth International Workshop on Database
- Machines},
- year = {1985},
- month = {March},
- publisher = {Springer-Verlag},
- keyword = {parallel I/O, disk caching, database},
- comment = {A fancy interconnection from procs to I/O processors, intended
- mostly for DB applications, that uses cache at I/O end and a switch with
- smarts. Cache is associative. Switch helps out in sort and join operations.
- No page numbers. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{cabrera:pario,
- author = {Luis-Felipe Cabrera and Darrell D. E. Long},
- title = {Swift: {Using} Distributed Disk Striping to Provide High {I/O} Data
- Rates},
- year = {1991},
- number = {CRL-91-46},
- institution = {UC Santa Cruz},
- note = {To appear, {\em Computing Systems}},
- keyword = {parallel I/O, disk striping, distributed file system},
- comment = {See cabrera:swift, cabrera:swift2. Describes the performance of a
- Swift prototype and simulation results. They stripe data over multiple disk
- servers (here SPARC SLC with local disk), and access it from a SPARC2 client.
- Their prototype gets nearly linear speedup for reads and asynchronous writes;
- synchronous writes are slower. They hit the limit of the Ethernet and/or the
- client processor with three disk servers. Adding another Ethernet allowed
- them to go higher. Simulation shows good scaling. Seems like a smarter
- implementation would help, as would special-purpose parity-computation
- hardware. Good arguments for use of PID instead of RAID, to avoid a
- centralized controller that is both a bottleneck and a single point of
- failure. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{cabrera:swift,
- author = {Luis-Felipe Cabrera and Darrell D. E. Long},
- title = {Swift: A Storage Architecture fo Large Objects},
- year = {1990},
- number = {UCSC-CRL-89-04},
- institution = {U.C. Santa Cruz},
- keyword = {parallel I/O, disk striping, distributed file system, multimedia},
- comment = {See cabrera:swift. A brief outline of a design for a
- high-performance storage system, designed for storing and retrieving large
- objects like color video or visualization data at very high speed. They
- distribute data over several ``storage agents'', which are some form of disk
- or RAID. They are all connected by a high-speed network. A ``storage
- manager'' decides where to spread each file, what kind of reliability
- mechanism is used. User provides preallocation info such as size, reliability
- level, data rate requirements, {\em etc.} [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{cabrera:swift2,
- author = {Luis-Felipe Cabrera and Darell D. E. Long},
- title = {Exploiting Multiple {I/O} Streams to Provide High Data-Rates},
- booktitle = {Proceedings of the 1991 Summer Usenix Conference},
- year = {1991},
- pages = {31--48},
- keyword = {parallel I/O, disk striping, distributed file system, multimedia},
- comment = {See also cabrera:swift. More detail than the other paper.
- Experimental results from a prototype that stripes files across a distributed
- file system. Gets almost linear speedup in certain cases. Much better than
- NFS. Simulation to extend it to larger systems. Compare with DataMesh?
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{chen:eval,
- author = {Peter Chen and Garth Gibson and Randy Katz and David Patterson},
- title = {An Evaluation of Redundant Arrays of Disks using an {Amdahl 5890}},
- booktitle = {Proceedings of the 1990 ACM Sigmetrics Conference on Measurement
- and Modeling of Computer Systems},
- year = {1990},
- month = {May},
- pages = {74--85},
- keyword = {parallel I/O, RAID, disk array},
- comment = {A experimental validation of the performance predictions of
- patterson:raid, plus some extensions. Confirms that RAID level 5 (rotated
- parity) is best for large read/writes, and RAID level 1 (mirroring) is best
- for small reads/writes. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{chen:maxraid,
- author = {Peter M. Chen and David A. Patterson},
- title = {Maximizing Performance in a Striped Disk Array},
- booktitle = {Proceedings of the 17th Annual International Symposium on
- Computer Architecture},
- year = {1990},
- pages = {322--331},
- keyword = {parallel I/O, RAID, disk striping},
- comment = {Choosing the optimal striping unit, i.e., size of contiguous data
- on each disk (bit, byte, block, {\em etc.}). A small striping unit is good for
- low-concurrency workloads since it increases the parallelism applied to each
- request, but a large striping unit can support high-concurrency workloads
- where each independent request depends on fewer disks. They do simulations to
- find throughput, and thus to pick the striping unit. They find equations for
- the best compromise striping unit based on the concurrency and the disk
- parameters, or on the disk parameters alone. Some key assumptions may limit
- applicability, but this is not addressed. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{chen:raid,
- author = {Peter Chen and Garth Gibson and Randy Katz and David Patterson and
- Martin Schulze},
- title = {Two papers on {RAIDs}},
- year = {1988},
- month = {December},
- number = {UCB/CSD 88/479},
- institution = {UC Berkeley},
- keyword = {parallel I/O, RAID, disk array},
- comment = {Basically an updated version of patterson:raid and the
- prepublished version of gibson:failcorrect. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{chervenak:raid,
- author = {Ann L. Chervenak and Randy H. Katz},
- title = {Performance of a Disk Array Prototype},
- booktitle = {Proceedings of the 1991 ACM Sigmetrics Conference on Measurement
- and Modeling of Computer Systems},
- year = {1991},
- pages = {188--197},
- keyword = {parallel I/O, disk array, performance evaluation, RAID},
- comment = {Measuring the performance of a RAID prototype with a Sun4/280, 28
- disks on 7 SCSI strings, using 4 HBA controllers on a VME bus from the Sun.
- The found lots of bottlenecks really slowed them down. Under Sprite, the
- disks were the bottleneck for single disk I/O, single disk B/W, and string
- I/O. Sprite was a bottleneck for single disk I/O and String I/O. The host
- memory was a bottleneck for string B/W, HBA B/W, overall I/O, and overall
- B/W. With a simpler OS, that saved on data copying, they did better, but were
- still limited by the HBA, SCSI protocol, or the VME bus. Clearly they needed
- more parallelism in the busses and control system. [David.Kotz@Dartmouth.edu]}
- }
-
- @Manual{convex:stripe,
- title = {{CONVEX UNIX} Programmer's Manual, Part I},
- edition = {Eighth},
- year = {1988},
- month = {October},
- organization = {CONVEX Computer Corporation},
- address = {Richardson, Texas},
- keyword = {parallel I/O, parallel file system, striping},
- comment = {Implementation of striped disks on the CONVEX. Uses partitions of
- normal device drivers. Similar to SunOS, VMS, and BBN TC2000's nX striping.
- Kernel data structure knows about the interleaving granularity, the set of
- partitions, sizes, etc. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{copeland:bubba,
- author = {George Copeland and William Alexander and Ellen Boughter and Tom
- Keller},
- title = {Data Placement in {Bubba}},
- booktitle = {ACM SIGMOD Conference},
- year = {1988},
- month = {June},
- pages = {99--108},
- keyword = {parallel I/O, database, disk caching},
- comment = {A database machine. Experimental/analytical model of a placement
- algorithm that declusters relations across several parallel, independent
- disks. The declustering is done on a subset of the disks, and the choices
- involved are the number of disks to decluster onto, which relations to put
- where, and whether a relation should be cache-resident. Communications
- overhead limits the usefulness of declustering in some cases, depending on
- the workload. See boral:bubba [David.Kotz@Dartmouth.edu]}
- }
-
- @Misc{cray:pario,
- key = {Cray89},
- author = {Cray Research},
- title = {{Cray Research I/O} Solutions},
- year = {1989},
- note = {Sales literature},
- keyword = {parallel I/O, disk hardware},
- comment = {Glossies from Cray describing their I/O products.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Misc{cray:pario2,
- key = {Cray90},
- author = {Cray Research},
- title = {{DS-41} Disk Subsystem},
- year = {1990},
- note = {Sales literature number MCFS-4-0790},
- keyword = {parallel I/O, disk hardware},
- comment = {Glossy from Cray describing their new disk subsystem: up two four
- controllers and up to four ``drives'', each of which actually have four
- spindles. Thus, a full subsystem has 16 disks. Each drive or controller
- sustains 9.6 MBytes/sec sustained, for a total of 38.4 MBytes/sec. Each drive
- has 4.8 GBytes, for a total of 19.2 Gbytes. Access time per drive is 2--46.6
- msec, average 24 msec. They don't say how the 4 spindles within a driver are
- controlled or arranged. [David.Kotz@Dartmouth.edu]}
- }
-
- @Unpublished{crockett:manual,
- author = {Thomas W. Crockett},
- title = {Specification of the Operating System Interface for Parallel File
- Organizations},
- year = {1988},
- note = {Publication status unknown (ICASE technical report)},
- keyword = {parallel I/O, parallel file system},
- comment = {Man pages for his Flex version of file interface.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{crockett:par-files,
- author = {Thomas W. Crockett},
- title = {File Concepts for Parallel {I/O}},
- booktitle = {Proceedings of Supercomputing '89},
- year = {1989},
- pages = {574--579},
- keyword = {parallel I/O, file access pattern, parallel file system},
- comment = {Two views of a file: global (for sequential programs) and internal
- (for parallel programs). Standardized forms for these views, for long-lived
- files. Temp files have specialized forms. The access types are sequential,
- partitioned, interleaved, and self-scheduled, plus global random and
- partitioned random. He relates these to their best storage patterns. Buffer
- cache only needed for direct (random) access. The application must specify
- the access pattern desired. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{csa-io,
- author = {T. J. M.},
- title = {Now: Parallel storage to match parallel {CPU} power},
- journal = {Electronics},
- year = {1988},
- month = {December},
- volume = {61},
- number = {12},
- pages = {112},
- keyword = {parallel I/O, disk array}
- }
-
- @InProceedings{debenedictus:ncube,
- author = {Erik DeBenedictus and Juan Miguel del Rosario},
- title = {{nCUBE} Parallel {I/O} Software},
- booktitle = {Eleventh Annual IEEE International Phoenix Conference on
- Computers and Communications (IPCCC)},
- year = {1992},
- month = {April},
- pages = {0117--0124},
- keyword = {parallel file system, parallel I/O},
- comment = {Interesting paper. Describes their mechanism for mapping I/O so
- that the file system knows both the mapping of a data structure into memory
- and on the disks, so that it can do the permutation and send the right data
- to the right disk, and back again. Interesting Unix-compatible interface.
- Questionable whether it can handle complex formats. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{debenedictus:pario,
- author = {Erik DeBenedictus and Peter Madams},
- title = {{nCUBE's} Parallel {I/O} with {Unix} Capability},
- booktitle = {Sixth Annual Distributed-Memory Computer Conference},
- year = {1991},
- pages = {270--277},
- keyword = {parallel I/O, multiprocessor file system, file system interface},
- comment = {Looks like they give the byte-level mapping, then do normal reads
- and writes; mapping routes the data to and from the correct place. It does
- let you intermix comp with I/O. Elegant concept. Nice interface. Works best,
- I think, for cases where data layout known in advance, data format is known,
- and mapping is regular enough for easy specification. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{delrosario:nCUBE,
- author = {Juan Miguel del Rosario},
- title = {High Performance Parallel {I/O} on the {nCUBE} 2},
- journal = {Institute of Electronics, Information and Communications Engineers
- (Transactions)},
- year = {1992},
- month = {August},
- note = {To appear},
- keyword = {parallel I/O, parallel file system},
- comment = {Much is also covered in his other articles, but more detail here
- on the mapping functions, and more flexible mapping functions (can be user
- specified, or some from a library). Striped disks, parallel pipes, graphics,
- and HIPPI supported. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{dewitt:gamma,
- author = {David J. {DeWitt} and Robert H. Gerber and Goetz Graefe and Michael
- L. Heytens and Krishna B. Kumar and M. Muralikrishna},
- title = {{GAMMA}: A High Performance Dataflow Database Machine},
- year = {1986},
- month = {March},
- number = {TR-635},
- institution = {Dept. of Computer Science, Univ. of Wisconsin-Madison},
- keyword = {parallel I/O, database, GAMMA},
- comment = {Better to cite dewitt:gamma2. Multiprocessor (VAX) DBMS on a token
- ring with disk at each processor. They thought this was better than
- separating disks from processors by network since then network must handle
- {\em all} I/O rather than just what needs to move. Conjecture that shared
- memory might be best interconnection network. Relations are horizontally
- partitioned in some way, and each processor reads its own set and operates on
- them there. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{dewitt:gamma-dbm,
- author = {David J. DeWitt and Shahram Ghandeharizadeh and Donovan Schneider},
- title = {A Performance Analysis of the {GAMMA} Database Machine},
- booktitle = {ACM SIGMOD Conference},
- year = {1988},
- month = {June},
- pages = {350--360},
- keyword = {parallel I/O, database, performance analysis, Teradata, GAMMA},
- comment = {Compared Gamma with Teradata and showed speedup. For various
- operations on big relations. See fairly good linear speedup in many cases.
- Note that they vary one variable at a time to examine different things. Their
- bottleneck was at the memory-network interface. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{dewitt:gamma2,
- author = {David J. DeWitt and Robert H. Gerber and Goetz Graefe and Michael
- L. Heytens and Krishna B. Kumar and M. Muralikrishna},
- title = {{GAMMA} --- {A} High Performance Dataflow Database Machine},
- booktitle = {12th International Conference on Very Large Data Bases},
- year = {1986},
- pages = {228--237},
- keyword = {parallel I/O, database, GAMMA},
- comment = {Almost identical to dewitt:gamma, with some updates. See that for
- comments, but cite this one. See also dewitt:gamma3 for a more recent paper.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{dewitt:gamma3,
- author = {David J. DeWitt and Shahram Ghandeharizadeh and Donovan A.
- Schneider and Allan Bricker and Hui-I Hsaio and Rick Rasmussen},
- title = {The {Gamma} Database Machine Project},
- journal = {IEEE Transactions on Knowledge and Data Engineering},
- year = {1990},
- month = {March},
- volume = {2},
- number = {1},
- pages = {44--62},
- keyword = {parallel I/O, database, GAMMA},
- comment = {An updated version of dewitt:gamma2, with elements of
- dewitt:gamma-dbm. Really only need to cite this one. This is the same basic
- idea as dewitt:gamma2, but after they ported the system from the VAXen to an
- iPSC/2. Speedup results good. How about comparing it to a single-processor,
- single-disk system with increasing disk bandwidth? That is, how much of their
- speedup comes from the increasing disk bandwidth, and how much from the
- actual use of parallelism? [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{dewitt:pardbs,
- author = {David DeWitt and Jim Gray},
- title = {Parallel Database Systems: The Future of High-Performance Database
- Systems},
- journal = {Communications of the ACM},
- year = {1992},
- month = {June},
- volume = {35},
- number = {6},
- pages = {85--98},
- keyword = {database, parallel computing, parallel I/O},
- comment = {They point out that the comments of boral:critique --- that
- database machines were doomed --- did really not come true. Their new thesis
- is that specialized hardware is not necessary and has not been successful,
- but that parallel database systems are clearly succesful. In particular, they
- argue for shared-nothing layouts. They survey the state-of-the-art parallel
- DB systems. Earlier version in Computer Architecture News 12/90.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{dewitt:parsort,
- author = {David J. DeWitt and Jeffrey F. Naughton and Donovan A. Schneider},
- title = {Parallel Sorting on a Shared-Nothing Architecture using
- Probabilistic Splitting},
- booktitle = {Proceedings of the First International Conference on Parallel
- and Distributed Information Systems},
- year = {1991},
- pages = {280--291},
- keyword = {parallel I/O, parallel database, external sorting},
- comment = {Comparing exact and probabilistic splitting for external sorting
- on a database. Model and experimental results from Gamma machine. Basically,
- the idea is to decide on a splitting vector, which defines $N$ buckets for an
- $N$-process program, and have each program read its initial segment of the
- data and send each element to the appropriate bucket (other process). All
- elements received are written to disks as small sorted runs. Then each
- process mergesorts its runs. Probabilistic split uses only a sample of the
- elements to define the vector. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{dibble:bridge,
- author = {Peter Dibble and Michael Scott and Carla Ellis},
- title = {Bridge: {A} High-Performance File System for Parallel Processors},
- booktitle = {Proceedings of the Eighth International Conference on
- Distributed Computer Systems},
- year = {1988},
- month = {June},
- pages = {154--161},
- keyword = {Carla, Bridge, parallel file system, Butterfly}
- }
-
- @Article{dibble:pifs,
- author = {P. C. Dibble and M. L. Scott},
- title = {The {Parallel Interleaved File System:} {A} Solution to the
- Multiprocessor {I/O} Bottleneck},
- journal = {IEEE Transactions on Parallel and Distributed Systems},
- year = {1992},
- note = {To appear},
- keyword = {Bridge, parallel file system},
- abstract = {Parallel computers with non-parallel file systems are limited by
- the performance of the processor running the file system. We have designed
- and implemented a parallel file system called Bridge that eliminates this
- problem by spreading both data and file system computation over a large
- number of processors and disks. To assess the effectiveness of Bridge we have
- used it to implement several data-intensive applications, including a
- parallel external merge sort. The merge sort is a particularly demanding
- application; it requires significant amounts of interprocessor communication
- and data movement. A detailed analysis of this application indicates that
- Bridge can profitably be used on configurations in which disks are attached
- to more than 150 processors. Empirical results on a 32-processor
- implementation agree with the analysis, providing us with a high degree of
- confidence in this prediction. Based on our experience, we argue that file
- systems such as Bridge will satisfy the I/O needs of a wide range of parallel
- architectures and applications. This paper has been through one round of
- reviewing for IEEE TPDS, and is currently undergoing revision. The postscript
- in this directory is the version submitted to the journal in May of 1990:
- {\small\tt cayuga.cs.rochester.edu:pub/systems_papers/90.TPDS.Bridge.ps.Z}}
- }
-
- @Article{dibble:sort,
- author = {Peter C. Dibble and Michael L. Scott},
- title = {External Sorting on a Parallel Interleaved File System},
- journal = {University of Rochester 1989--90 Computer Science and Engineering
- Research Review},
- year = {1989},
- keyword = {parallel I/O, sorting, merging, parallel file reference pattern},
- comment = {Cite dibble:sort2. Based on Bridge file system (see
- dibble:bridge). Parallel external merge-sort tool. Sort file on each disk,
- then do a parallel merge. The merge is serialized by the token-passing
- mechanism, but the I/O time dominates. The key is to keep disks busy
- constantly. Uses some read-ahead, write-behind to control fluctuations in
- disk request timing. Analytical analysis of the algorithm lends insight and
- matches well with the timings. Locality is a big win in Bridge tools.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{dibble:sort2,
- author = {Peter C. Dibble and Michael L. Scott},
- title = {Beyond Striping: The {Bridge} Multiprocessor File System},
- journal = {Computer Architecture News},
- year = {1989},
- month = {September},
- volume = {19},
- number = {5},
- keyword = {parallel I/O, external sorting, merging, parallel file reference
- pattern},
- comment = {Subset of dibble:sort. Extra comments to distinguish from striping
- and RAID work. Good point that those projects are addressing a different
- bottleneck, and that they can provide essentially unlimited bandwidth to a
- single processor. Bridge could use those as individual file systems,
- parallelizing the overall file system, avoiding the software bottleneck.
- Using a very-reliable RAID at each node in Bridge could safeguard Bridge
- against failure for reasonable periods, removing reliability from Bridge
- level. [David.Kotz@Dartmouth.edu]}
- }
-
- @PhdThesis{dibble:thesis,
- author = {Peter C. Dibble},
- title = {A Parallel Interleaved File System},
- year = {1990},
- month = {March},
- school = {University of Rochester},
- keyword = {parallel I/O, external sorting, merging, parallel file system},
- comment = {Also TR 334. Mostly covered by other papers, but includes good
- introduction, discussion of reliability and maintenance issues, and
- implementation. Short mention of prefetching implied that simple OBL was
- counter-productive, but later tool-specific buffering with read-ahead was
- often important. The three interfaces to the PIFS server are interesting. A
- fourth compromise might help make tools easier to write.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{edelson:pario,
- author = {Daniel Edelson and Darrell D. E. Long},
- title = {High Speed Disk {I/O} for Parallel Computers},
- year = {1990},
- month = {January},
- number = {UCSC-CRL-90-02},
- institution = {Baskin Center for Computer Engineering and Information
- Science},
- keyword = {parallel I/O, disk caching, parallel file system, log-structured
- file system, Intel iPSC/2},
- comment = {Essentially a small literature survey. Mentions caching, striping,
- disk layout optimization, log-structured file systems, and Bridge and Intel
- CFS. Plugs their ``Swift'' architecture. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{ellis:interleaved,
- author = {Carla Ellis and P. Dibble},
- title = {An Interleaved File System for the {Butterfly}},
- year = {1987},
- month = {January},
- number = {CS-1987-4},
- institution = {Dept. of Computer Science, Duke University},
- keyword = {Carla, parallel file system, Bridge, Butterfly}
- }
-
- @InProceedings{ellis:prefetch,
- author = {Carla Schlatter Ellis and David Kotz},
- title = {Prefetching in File Systems for {MIMD} Multiprocessors},
- booktitle = {Proceedings of the 1989 International Conference on Parallel
- Processing},
- year = {1989},
- month = {August},
- pages = {I:306--314},
- keyword = {dfk, parallel file system, prefetching, disk caching, MIMD,
- parallel I/O},
- abstract = {The problem of providing file I/O to parallel programs has been
- largely neglected in the development of multiprocessor systems. There are two
- essential elements of any file system design intended for a highly parallel
- environment: parallel I/O and effective caching schemes. This paper
- concentrates on the second aspect of file system design and specifically, on
- the question of whether prefetching blocks of the file into the block cache
- can effectively reduce overall execution time of a parallel computation, even
- under favorable assumptions. Experiments have been conducted with an
- interleaved file system testbed on the Butterfly Plus multiprocessor. Results
- of these experiments suggest that 1) the hit ratio, the accepted measure in
- traditional caching studies, may not be an adequate measure of performance
- when the workload consists of parallel computations and parallel file access
- patterns, 2) caching with prefetching can significantly improve the hit ratio
- and the average time to perform an I/O operation, and 3) an improvement in
- overall execution time has been observed in most cases. In spite of these
- gains, prefetching sometimes results in increased execution times (a negative
- result, given the optimistic nature of the study). We explore why is it not
- trivial to translate savings on individual I/O requests into consistently
- better overall performance and identify the key problems that need to be
- addressed in order to improve the potential of prefetching techniques in this
- environment.},
- comment = {Superseded by kotz:prefetch. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{flynn:hyper-fs,
- author = {Robert J. Flynn and Haldun Hadimioglu},
- title = {A Distributed {Hypercube} File System},
- booktitle = {Third Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1988},
- pages = {1375--1381},
- keyword = {parallel I/O, hypercube, parallel file system},
- comment = {For hypercube-like architectures. Interleaved files, though
- flexible. Separate network for I/O, maybe not hypercube. I/O is blocked and
- buffered -- no coherency or prefetching issues discussed. Buffered close to
- point of use. Parallel access is ok. Broadcast supported? I/O nodes
- distinguished from comp nodes. I/O hooked to front-end too. See hadimioglu:fs
- and hadimioglu:hyperfs [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{french:balance,
- author = {James C. French},
- title = {Characterizing the Balance of Parallel {I/O} Systems},
- booktitle = {Sixth Annual Distributed-Memory Computer Conference},
- year = {1991},
- pages = {724--727},
- keyword = {parallel I/O, multiprocessor file system},
- comment = {Proposes the min\_SAR, max\_SAR, and ratio $\phi$ as measures of
- aggregate file system bandwidth. Has to do with load balance issues; how well
- the file system balances between competing nodes in a heavy-use period. Might
- be worth using. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{french:ipsc2io,
- author = {James C. French and Terrence W. Pratt and Mriganka Das},
- title = {Performance Measurement of a Parallel Input/Output System for the
- {Intel iPSC/2} Hypercube},
- journal = {Proceedings of the 1991 ACM Sigmetrics Conference on Measurement
- and Modeling of Computer Systems},
- year = {1991},
- pages = {178--187},
- keyword = {parallel I/O, Intel iPSC/2},
- comment = {See french:ipsc2io-tr. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{french:ipsc2io-tr,
- author = {James C. French and Terrence W. Pratt and Mriganka Das},
- title = {Performance Measurement of a Parallel Input/Output System for the
- {Intel iPSC/2} Hypercube},
- year = {1991},
- number = {IPC-TR-91-002},
- institution = {Institute for Parallel Computation, University of Virginia},
- note = {Appeared in, Proceedings of the 1991 ACM Sigmetrics Conference on
- Measurement and Modeling of Computer Systems},
- keyword = {parallel I/O, Intel iPSC/2, disk caching, prefetching},
- comment = {Cite french:ipsc2io. Really nice study of performance of existing
- CFS system on 32-node + 4 I/O-node iPSC/2. They show big improvements due to
- declustering, preallocation, caching, and prefetching. See also pratt:twofs.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{garcia:striping-reliability,
- author = {Hector Garcia-Molina and Kenneth Salem},
- title = {The Impact of Disk Striping on Reliability},
- journal = {{IEEE} Database Engineering Bulletin},
- year = {1988},
- month = {March},
- volume = {11},
- number = {1},
- pages = {26--39},
- keyword = {parallel I/O, disk striping, reliability, disk array},
- comment = {Reliability of striped filesystems may not be as bad as you think.
- Parity disks help. Performance improvements limited to small number of disks
- ($n<10$). Good point: efficiency of striping will increase as the gap between
- CPU/memory performance and disk speed and file size widens. Reliability may
- be better if measured in terms of performing a task in time T, since the
- striped version may take less time. This gives disks less opportunity to fail
- during that period. Also consider the CPU failure mode, and its use over less
- time. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{gibson:failcorrect,
- author = {Garth A. Gibson and Lisa Hellerstein and Richard M. Karp and Randy
- H. Katz and David A. Patterson},
- title = {Failure Correction Techniques for Large Disk Arrays},
- booktitle = {Third International Conference on Architectural Support for
- Programming Languages and Operating Systems},
- year = {1989},
- month = {April},
- pages = {123--132},
- keyword = {parallel I/O, disk array, RAID, reliability},
- comment = {gibson:raid is the same. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{gibson:raid,
- author = {Garth Gibson and Lisa Hellerstein and Richard Karp and Randy Katz
- and David Patterson},
- title = {Coding techniques for handling failures in large disk arrays},
- year = {1988},
- month = {December},
- number = {UCB/CSD 88/477},
- institution = {UC Berkeley},
- keyword = {parallel I/O, RAID, reliability, disk array},
- comment = {Published as gibson:failcorrect. Design of parity encodings to
- handle more than one bit failure in any group. Their 2-bit correcting codes
- are good enough for 1000-disk RAIDs that 3-bit correction is not needed.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{gray:stripe,
- author = {Jim Gray and Bob Horst and Mark Walker},
- title = {Parity Striping of Disk Arrays: Low-cost Reliable Storage with
- Acceptable Throughput},
- booktitle = {Proceedings of the 16th VLDB Conference},
- year = {1990},
- pages = {148--159},
- keyword = {disk striping, reliability},
- comment = {Parity striping, a variation of RAID 5, is just a different way of
- mapping blocks to disks. It groups parity blocks into extents, and does not
- stripe the data blocks. A logical disk is mostly contained in one physical
- disk, plus a parity region in anothe disk. Good for transaction processing
- workloads. Has the low cost/GByte of RAID, the reliability of RAID, without
- the high transfer rate of RAID, but with much better requests/second
- throughput than RAID 5. (But 40\% worse than mirrors.) So it is a compromise
- between RAID and mirrors. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{grimshaw:ELFSTR,
- author = {Andrew S. Grimshaw and Loyot, Jr., Edmond C.},
- title = {{ELFS:} Object-oriented Extensible File Systems},
- year = {1991},
- month = {July},
- number = {TR-91-14},
- institution = {Univ. of Virginia Computer Science Department},
- keyword = {parallel I/O, parallel file system, object-oriented, file system
- interface, Intel iPSC/2},
- comment = {From uvacs.cs.virginia.edu. See also grimshaw:elfs. provide the
- high bandwidth and low latency, reduce the cognitive burden on the
- programmer, and manage proliferation of data formats and architectural
- changes. Details of the plan to make an extensible OO interface to file
- system. A few results. Objects each have a separate thread of control, so
- they can do asynchronous activity like prefetching and caching in the
- background, and support multiple outstanding requests. The Mentat object
- system makes it easy for them to support pipelining of I/O with I/O and
- computation in the user program. Let the user choose type of consistency
- needed. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{grimshaw:elfs,
- author = {Andrew S. Grimshaw and Loyot, Jr., Edmond C.},
- title = {{ELFS:} Object-oriented Extensible File Systems},
- booktitle = {Proceedings of the First International Conference on Parallel
- and Distributed Information Systems},
- year = {1991},
- pages = {177},
- keyword = {parallel I/O, parallel file system, object-oriented, file system
- interface},
- comment = {Full paper grimshaw:ELFSTR. Really neat idea. Uses OO interface to
- file system, which is mostly in user mode. The object classes represent
- particular access patterns (e.g., 2-D matrix) in the file, and hide the actual
- structure of the file. The object knows enough to taylor the cache and
- prefetch algorithms to the semantics. Class inheritance allows layering.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{grimshaw:objects,
- author = {Andrew S. Grimshaw and Jeff Prem},
- title = {High Performance Parallel File Objects},
- booktitle = {Sixth Annual Distributed-Memory Computer Conference},
- year = {1991},
- pages = {720--723},
- keyword = {parallel I/O, multiprocessor file system, file system interface},
- comment = {A little new from grimshaw:ELFSTR. Gives some CFS performance
- results. Note on p.721 he says that CFS prefetches into ``local memory from
- which to satify future user requests {\em that never come.}'' This happens if
- the local access pattern isn't purely sequential, as in an interleaved
- pattern. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{hadimioglu:fs,
- author = {Haldun Hadimioglu and Robert J. Flynn},
- title = {The Architectural Design of a Tightly-Coupled Distributed Hypercube
- File System},
- booktitle = {Fourth Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1989},
- pages = {147--150},
- keyword = {hypercube, multiprocessor file system},
- comment = {An early paper describing a proposed file system for hypercubes.
- Poorly written. See hadimioglu:hyperfs and flynn:hyper-fs
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{hadimioglu:hyperfs,
- author = {Haldun Hadimioglu and Robert J. Flynn},
- title = {The Design and Analysis of a Tightly Coupled Hypercube File System},
- booktitle = {Fifth Annual Distributed-Memory Computer Conference},
- year = {1990},
- pages = {1405--1410},
- keyword = {multiprocessor file system, parallel I/O, hypercube},
- comment = {Describes a hypercube file system based on I/O nodes and processor
- nodes. A few results from a hypercube simulator. See hadimioglu:fs and
- flynn:hyper-fs [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{hartman:zebra,
- author = {John H. Hartman and John K. Ousterhout},
- title = {{Zebra: A} Striped Network File System},
- booktitle = {Proceedings of the Usenix File Systems Workshop},
- year = {1992},
- month = {May},
- pages = {71--78},
- keyword = {disk striping, distributed file system}
- }
-
- @InProceedings{hatcher:linda,
- author = {Philip J. Hatcher and Michael J. Quinn},
- title = {{C*-Linda:} {A} Programming Environment with Multiple Data-Parallel
- Modules and Parallel {I/O}},
- booktitle = {Proceedings of the Twenty-Fourth Annual Hawaii International
- Conference on System Sciences},
- year = {1991},
- pages = {382--389},
- keyword = {parallel I/O, Linda, data parallel, nCUBE, parallel graphics,
- heterogeneous computing},
- comment = {C*-Linda is basically a combination of C* and C-Linda. The model
- is that of several SIMD modules interacting in a MIMD fashion through a Linda
- tuple space. The modules are created using {\tt eval}, as in Linda. In this
- case, the compiler statically assigns each eval to a separate subcube on an
- nCUBE 3200, although they also talk about multiprogramming several modules on
- a subcube (not supported by VERTEX). They envision having separate modules
- running on the nCUBE's graphics processors, or having the file system
- directly talk to the tuple space, to support I/O. They also envision talking
- to modules elsewhere on a network, e.g., a workstation, through the tuple
- space. They reject the idea of sharing memory between modules due to the lack
- of synchrony between modules, and message passing because it is error-prone.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{hayes:nCUBE,
- author = {John P. Hayes and Trevor N. Mudge and Quentin F. Stout and Stephen
- Colley and John Palmer},
- title = {Architecture of a Hypercube Supercomputer},
- booktitle = {Proceedings of the 1986 International Conference on Parallel
- Processing},
- year = {1986},
- pages = {653--660},
- keyword = {hypercube, parallel architecture, nCUBE},
- comment = {Description of the first nCUBE, the NCUBE/ten. Good historical
- background about hypercubes. Talks about their design choices. Says a little
- about the file system --- basically just a way of mounting disks on top of
- each other, within the nCUBE and to other nCUBEs. [David.Kotz@Dartmouth.edu]}
- }
-
- @Book{hennessy:arch,
- author = {John L. Hennessy and David A. Patterson},
- title = {Computer Architecture: A Quantitative Approach},
- year = {1990},
- publisher = {Morgan Kaufman},
- keyword = {computer architecture, textbook},
- comment = {Looks like a great coverage of architecture. Of course a chapter
- on I/O! [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{hou:disk,
- author = {Robert Y. Hou and Gregory R. Ganger and Yale N. Patt and Charles E.
- Gimarc},
- title = {Issues and Problems in the {I/O} Subsystem, Part {I} --- {The}
- Magnetic Disk},
- booktitle = {Proceedings of the Twenty-Fifth Annual Hawaii International
- Conference on System Sciences},
- year = {1992},
- pages = {48--57},
- keyword = {I/O},
- comment = {A short summary of disk I/O issues: disk technology, latency
- reduction, parallel I/O, {\em etc.}. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{hsiao:decluster,
- author = {Hui-I Hsiao and David DeWitt},
- title = {{Chained Declustering}: {A} New Availability Strategy for
- Multiprocessor Database Machines},
- booktitle = {Proceedings of 6th International Data Engineering Conference},
- year = {1990},
- pages = {456--465},
- keyword = {disk array, reliability, parallel I/O},
- comment = {Chained declustering has cost like mirroring, since it replicates
- each block, but has better load increase during failure than mirrors,
- interleaved declustering, or RAID. (Or parity striping (my guess)). Has
- reliability between that of mirrors and RAID, and much better than
- interleaved declustering. Would also be much easier in a distributed
- environment. See hsiao:diskrep. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{hsiao:diskrep,
- author = {Hui-I Hsiao and David DeWitt},
- title = {A Performance Study of Three High Availability Data Replication
- Strategies},
- booktitle = {Proceedings of the First International Conference on Parallel
- and Distributed Information Systems},
- year = {1991},
- pages = {18--28},
- keyword = {disk array, reliability, disk mirror, parallel I/O},
- comment = {Compares mirrored disks (MD) with interleaved declustering (ID)
- with chained declustering (CD). ID and CD found to have much better
- performance in normal and failure modes. But, it seems that they compared a
- single-queue (synchronous) MD system with an asynchronous ID and CD system,
- so one wonders whether the difference actually came from the asynchrony
- instead of the inherent difference. See hsiao:decluster.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @MastersThesis{husmann:format,
- author = {Harlan Edward Husmann},
- title = {High-Speed Format Conversion and Parallel {I/O} in Numerical
- Programs},
- year = {1984},
- month = {January},
- school = {Department of Computer Science, Univ. of Illinois at
- Urbana-Champaign},
- note = {Available as TR number UIUCDCS-R-84-1152.},
- keyword = {parallel I/O, I/O},
- comment = {Does FORTRAN format conversion in software in parallel or in
- hardware, to obtain good speedups for lots of programs. However he found that
- increasing the I/O bandwidth was the most significant change that could be
- made in the parallel program. [David.Kotz@Dartmouth.edu]}
- }
-
- @Booklet{intel:examples,
- key = {Intel},
- title = {Concurrent {I/O} Application Examples},
- year = {1989},
- howpublished = {Intel Corporation Background Information},
- keyword = {file access pattern, parallel I/O, Intel iPSC/2, hypercube},
- comment = {Lists several examples and the amount and types of data they
- require, and how much bandwidth. Fluid flow modeling, Molecular modeling,
- Seismic processing, and Tactical and strategic systems.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Booklet{intel:ipsc2io,
- key = {Intel},
- title = {{iPSC/2} {I/O} Facilities},
- year = {1988},
- howpublished = {Intel Corporation},
- note = {Order number 280120-001},
- keyword = {parallel I/O, hypercube, Intel iPSC/2},
- comment = {Simple overview, not much detail. See intel:ipsc2, pierce:pario,
- asbury:fortranio. Separate I/O nodes from compute nodes. Each I/O node has a
- SCSI bus to the disks, and communicates with other nodes in the system via
- Direct-Connect hypercube routing. [David.Kotz@Dartmouth.edu]}
- }
-
- @Booklet{intel:paragon,
- key = {Intel},
- title = {Paragon {XP/S} Product Overview},
- year = {1991},
- howpublished = {Intel Corporation},
- keyword = {parallel architecture, parallel I/O, Intel},
- comment = {Not a bad glossy. [David.Kotz@Dartmouth.edu]}
- }
-
- @Misc{intelio,
- key = {Intel},
- title = {Intel beefs up its {iPSC/2} supercomputer's {I/O} and memory
- capabilities},
- year = {1988},
- month = {November},
- volume = {61},
- number = {11},
- pages = {24},
- howpublished = {Electronics},
- keyword = {parallel I/O, hypercube, Intel iPSC/2}
- }
-
- @Article{katz:diskarch,
- author = {Randy H. Katz and Garth A. Gibson and David A. Patterson},
- title = {Disk System Architectures for High Performance Computing},
- journal = {Proceedings of the IEEE},
- year = {1989},
- month = {December},
- volume = {77},
- number = {12},
- pages = {1842--1858},
- keyword = {parallel I/O, RAID, disk striping},
- comment = {Good review of the background of disks and I/O architectures, but
- a shorter RAID presentation than patterson:RAID. Also addresses controller
- structure. Good ref for the I/O crisis background, though they don't use that
- term here. Good taxonomy of previous array techniques.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{katz:io-subsys,
- author = {Randy H. Katz and John K. Ousterhout and David A. Patterson and
- Michael R. Stonebraker},
- title = {A Project on High Performance {I/O} Subsystems},
- journal = {{IEEE} Database Engineering Bulletin},
- year = {1988},
- month = {March},
- volume = {11},
- number = {1},
- pages = {40--47},
- keyword = {parallel I/O, RAID, Sprite, reliability, disk striping, disk
- array},
- comment = {Early RAID project paper. Describes the Berkeley team's plan to
- use an array of small (100M) hard disks as an I/O server for network file
- service, transaction processing, and supercomputer I/O. Considering
- performance, reliability, and flexibility. Initially hooked to their SPUR
- multiprocessor, using Sprite operating system, new filesystem. Either
- asynchronous striped or independent operation. Use of parity disks to boost
- reliability. Files may be striped across one or more disks and extend over
- several sectors, thus a two-dimensional filesystem; striping need not involve
- all disks. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{katz:update,
- author = {Randy H. Katz and John K. Ousterhout and David A. Patterson and
- Peter Chen and Ann Chervenak and Rich Drewes and Garth Gibson and Ed Lee and
- Ken Lutz and Ethan Miller and Mendel Rosenblum},
- title = {A Project on High Performance {I/O} Subsystems},
- journal = {Computer Architecture News},
- year = {1989},
- month = {September},
- volume = {17},
- number = {5},
- pages = {24--31},
- keyword = {parallel I/O, RAID, reliability, disk array},
- comment = {A short summary of the RAID project. They had completed the first
- prototype with 8 SCSI strings and 32 disks. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{kim:asynch,
- author = {Michelle Y. Kim and Asser N. Tantawi},
- title = {Asynchronous Disk Interleaving: {Approximating} Access Delays},
- journal = {IEEE Transactions on Computers},
- year = {1991},
- month = {July},
- volume = {40},
- number = {7},
- pages = {801--810},
- keyword = {disk interleaving, parallel I/O, performance modelling},
- comment = {As opposed to synchronous disk interleaving, where disks are
- rotationally synchronous and one access is processed at a time. They develop
- a performance model and validate with traces of a database system's disk
- accesses. Average access delay on each disk can be approximated by a normal
- distribution. [David.Kotz@Dartmouth.edu]}
- }
-
- @PhdThesis{kim:interleave,
- author = {Michelle Y. Kim},
- title = {Synchronously Interleaved Disk Systems with their Application to the
- Very Large {FFT}},
- year = {1986},
- school = {IBM Thomas J. Watson Research Center},
- address = {Yorktown Heights, New York 10598},
- note = {IBM Report number RC12372},
- keyword = {parallel I/O, disk striping, file access pattern, disk array},
- comment = {Uniprocessor interleaving techniques. Good case for interleaving.
- Probably better to reference kim:interleaving. Discusses an 3D FFT algorithm
- in which the matrix is broken into subblocks that are accessed in layers. The
- layers are stored so this is either contiguous or with a regular stride, in
- fairly large chunks. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{kim:interleaving,
- author = {Michelle Y. Kim},
- title = {Synchronized Disk Interleaving},
- journal = {IEEE Transactions on Computers},
- year = {1986},
- month = {November},
- volume = {C-35},
- number = {11},
- pages = {978--988},
- keyword = {parallel I/O, disk striping, disk array},
- comment = {See kim:interleave. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{kotz:fsint,
- author = {David Kotz},
- title = {Multiprocessor File System Interfaces},
- year = {1992},
- month = {March},
- number = {PCS-TR92-179},
- institution = {Dept. of Math and Computer Science, Dartmouth College},
- note = {Abstract appeared in 1992 Usenix Workshop on File Systems},
- keyword = {dfk, parallel I/O, multiprocessor file system, file system
- interface},
- abstract = {Increasingly, file systems for multiprocessors are designed with
- parallel access to multiple disks, to keep I/O from becoming a serious
- bottleneck for parallel applications. Although file system software can
- transparently provide high-performance access to parallel disks, a new file
- system interface is needed to facilitate parallel access to a file from a
- parallel application. We describe the difficulties faced when using the
- conventional (Unix-like) interface in parallel applications, and then outline
- ways to extend the conventional interface to provide convenient access to the
- file for parallel programs, while retaining the traditional interface for
- programs that have no need for explicitly parallel file access. Our interface
- includes a single naming scheme, a {\em multiopen\/} operation, local and
- global file pointers, mapped file pointers, logical records, {\em
- multifiles}, and logical coercion for backward compatibility.},
- comment = {Submitted; will then be a conference paper. Can be ftp'd from
- sunapee.dartmouth.edu in pub/CS-techreports. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{kotz:fsint2p,
- author = {David Kotz},
- title = {Multiprocessor File System Interfaces},
- booktitle = {Proceedings of the Usenix File Systems Workshop},
- year = {1992},
- month = {May},
- pages = {149--150},
- keyword = {dfk, parallel I/O, multiprocessor file system, file system
- interface},
- comment = {Short paper (2 pages). [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{kotz:jpractical,
- author = {David Kotz and Carla Schlatter Ellis},
- title = {Practical Prefetching Techniques for Multiprocessor File Systems},
- journal = {Distributed and Parallel Databases},
- year = {1992},
- note = {To appear.},
- keyword = {dfk, parallel file system, prefetching, disk caching, parallel
- I/O, MIMD},
- abstract = {Improvements in the processing speed of multiprocessors are
- outpacing improvements in the speed of disk hardware. Parallel disk I/O
- subsystems have been proposed as one way to close the gap between processor
- and disk speeds. In a previous paper we showed that prefetching and caching
- have the {\em potential\/} to deliver the performance benefits of parallel
- file systems to parallel applications. In this paper we describe experiments
- with {\em practical\/} prefetching policies that base decisions only on
- on-line reference history, and that can be implemented efficiently. We also
- test the ability of these policies across a range of architectural
- parameters.},
- comment = {Journal version of kotz:practical. See also kotz:writeback.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{kotz:jwriteback,
- author = {David Kotz and Carla Schlatter Ellis},
- title = {Caching and Writeback Policies in Parallel File Systems},
- journal = {Journal of Parallel and Distributed Computing},
- year = {1992},
- month = {January},
- note = {Submitted.},
- keyword = {dfk, parallel file system, disk caching, parallel I/O, MIMD},
- abstract = {Improvements in the processing speed of multiprocessors are
- outpacing improvements in the speed of disk hardware. Parallel disk I/O
- subsystems have been proposed as one way to close the gap between processor
- and disk speeds. Such parallel disk systems require parallel file system
- software to avoid performance-limiting bottlenecks. We discuss cache
- management techniques that can be used in a parallel file system
- implementation. We examine several writeback policies, and give results of
- experiments that test their performance.},
- comment = {Journal version of kotz:writeback. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{kotz:practical,
- author = {David Kotz and Carla Schlatter Ellis},
- title = {Practical Prefetching Techniques for Parallel File Systems},
- booktitle = {Proceedings of the First International Conference on Parallel
- and Distributed Information Systems},
- year = {1991},
- month = {December},
- pages = {182--189},
- note = {To appear in {\em Distributed and Parallel Databases}.},
- keyword = {dfk, parallel file system, prefetching, disk caching, parallel
- I/O, MIMD, OS92W},
- abstract = {Improvements in the processing speed of multiprocessors are
- outpacing improvements in the speed of disk hardware. Parallel disk I/O
- subsystems have been proposed as one way to close the gap between processor
- and disk speeds. In a previous paper we showed that prefetching and caching
- have the {\em potential\/} to deliver the performance benefits of parallel
- file systems to parallel applications. In this paper we describe experiments
- with {\em practical\/} prefetching policies, and show that prefetching can be
- implemented efficiently even for the more complex parallel file access
- patterns. We also test the ability of these policies across a range of
- architectural parameters.},
- comment = {Short form of primary thesis results. See kotz:jpractical.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{kotz:prefetch,
- author = {David Kotz and Carla Schlatter Ellis},
- title = {Prefetching in File Systems for {MIMD} Multiprocessors},
- journal = {IEEE Transactions on Parallel and Distributed Systems},
- year = {1990},
- month = {April},
- volume = {1},
- number = {2},
- pages = {218--230},
- keyword = {dfk, parallel file system, prefetching, MIMD, disk caching,
- parallel I/O},
- abstract = {The problem of providing file I/O to parallel programs has been
- largely neglected in the development of multiprocessor systems. There are two
- essential elements of any file system design intended for a highly parallel
- environment: parallel I/O and effective caching schemes. This paper
- concentrates on the second aspect of file system design and specifically, on
- the question of whether prefetching blocks of the file into the block cache
- can effectively reduce overall execution time of a parallel computation, even
- under favorable assumptions. Experiments have been conducted with an
- interleaved file system testbed on the Butterfly Plus multiprocessor. Results
- of these experiments suggest that 1) the hit ratio, the accepted measure in
- traditional caching studies, may not be an adequate measure of performance
- when the workload consists of parallel computations and parallel file access
- patterns, 2) caching with prefetching can significantly improve the hit ratio
- and the average time to perform an I/O operation, and 3) an improvement in
- overall execution time has been observed in most cases. In spite of these
- gains, prefetching sometimes results in increased execution times (a negative
- result, given the optimistic nature of the study). We explore why is it not
- trivial to translate savings on individual I/O requests into consistently
- better overall performance and identify the key problems that need to be
- addressed in order to improve the potential of prefetching techniques in this
- environment.}
- }
-
- @PhdThesis{kotz:thesis,
- author = {David Kotz},
- title = {Prefetching and Caching Techniques in File Systems for {MIMD}
- Multiprocessors},
- year = {1991},
- month = {April},
- school = {Duke University},
- note = {Available as technical report CS-1991-016.},
- keyword = {dfk, parallel file system, prefetching, MIMD, disk caching,
- parallel I/O},
- abstract = {The increasing speed of the most powerful computers, especially
- multiprocessors, makes it difficult to provide sufficient I/O bandwidth to
- keep them running at full speed for the largest problems. Trends show that
- the difference in the speed of disk hardware and the speed of processors is
- increasing, with I/O severely limiting the performance of otherwise fast
- machines. This widening access-time gap is known as the ``I/O bottleneck
- crisis.'' One solution to the crisis, suggested by many researchers, is to
- use many disks in parallel to increase the overall bandwidth. This
- dissertation studies some of the file system issues needed to get high
- performance from parallel disk systems, since parallel hardware alone cannot
- guarantee good performance. The target systems are large MIMD multiprocessors
- used for scientific applications, with large files spread over multiple disks
- attached in parallel. The focus is on automatic caching and prefetching
- techniques. We show that caching and prefetching can transparently provide
- the power of parallel disk hardware to both sequential and parallel
- applications using a conventional file system interface. We also propose a
- new file system interface (compatible with the conventional interface) that
- could make it easier to use parallel disks effectively. Our methodology is a
- mixture of implementation and simulation, using a software testbed that we
- built to run on a BBN GP1000 multiprocessor. The testbed simulates the disks
- and fully implements the caching and prefetching policies. Using a synthetic
- workload as input, we use the testbed in an extensive set of experiments. The
- results show that prefetching and caching improved the performance of
- parallel file systems, often dramatically.}
- }
-
- @InProceedings{kotz:writeback,
- author = {David Kotz and Carla Schlatter Ellis},
- title = {Caching and Writeback Policies in Parallel File Systems},
- booktitle = {1991 IEEE Symposium on Parallel and Distributed Processing},
- year = {1991},
- month = {December},
- pages = {60--67},
- note = {To appear in the {\em Journal of Parallel and Distributed
- Computing}.},
- keyword = {dfk, parallel file system, disk caching, parallel I/O, MIMD},
- abstract = {Improvements in the processing speed of multiprocessors are
- outpacing improvements in the speed of disk hardware. Parallel disk I/O
- subsystems have been proposed as one way to close the gap between processor
- and disk speeds. Such parallel disk systems require parallel file system
- software to avoid performance-limiting bottlenecks. We discuss cache
- management techniques that can be used in a parallel file system
- implementation. We examine several writeback policies, and give results of
- experiments that test their performance.},
- comment = {See also kotz:practical, kotz:jwriteback. [David.Kotz@Dartmouth.edu]}
- }
-
- @Misc{ksr:overview,
- key = {KSR},
- title = {{KSR1} Technology Background},
- year = {1992},
- month = {January},
- howpublished = {Kendall Square Research},
- keyword = {MIMD, parallel architecture},
- comment = {Overview of the KSR 1. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{lee:impl,
- author = {Edward K. Lee},
- title = {Software and Performance Issues in the Implementation of a {RAID}
- Prototype},
- year = {1990},
- month = {May},
- number = {UCB/CSD 90/573},
- institution = {EECS, Univ. California at Berkeley},
- keyword = {parallel I/O, disk striping, performance}
- }
-
- @InProceedings{lee:parity,
- author = {Edward K. Lee and Randy H. Katz},
- title = {Performance Consequences of Parity Placement in Disk Arrays},
- booktitle = {Fourth International Conference on Architectural Support for
- Programming Languages and Operating Systems},
- year = {1991},
- pages = {190--199},
- keyword = {RAID, reliability, parallel I/O},
- comment = {Interesting comparison of several parity placement schemes. Boils
- down to two basic choices, depending on whether read performance or write
- performance is more important to you. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{livny:stripe,
- author = {M. Livny and S. Khoshafian and H. Boral},
- title = {Multi-Disk Management Algorithms},
- booktitle = {Proceedings of the 1987 ACM Sigmetrics Conference on Measurement
- and Modeling of Computer Systems},
- year = {1987},
- month = {May},
- pages = {69--77},
- keyword = {parallel I/O, disk striping, disk array}
- }
-
- @TechReport{lo:disks,
- author = {Raymond Lo and Norman Matloff},
- title = {A Probabilistic Limit on the Virtual Size of Replicated File
- Systems},
- year = {1989},
- institution = {Department of EE and CS, UC Davis},
- keyword = {parallel I/O, replication, file system, disk shadowing},
- comment = {A look at shadowed disks. If you have $k$ disks set up to read
- from the disk with the shortest seek, but write to all disks, you have
- increased reliability, read time like the min of the seeks, and write time
- like the max of the seeks. It appears that with increasing $k$ you can get
- good performance. But this paper clearly shows, since writes move all disk
- heads to the same location, that the effective value of $k$ is actually quite
- low. Only 4--10 disks are likely to be useful for most traffic loads.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{manuel:logjam,
- author = {Tom Manuel},
- title = {Breaking the Data-rate Logjam with arrays of small disk drives},
- journal = {Electronics},
- year = {1989},
- month = {February},
- volume = {62},
- number = {2},
- pages = {97--100},
- keyword = {parallel I/O, disk array, I/O bottleneck},
- comment = {See also Electronics, Nov. 88 p 24, Dec. 88 p 112. Trade journal
- short on disk arrays. Very good intro. No technical content. Concentrates on
- RAID project. Lists several commercial versions. Mostly concentrates on
- single-controller versions. [David.Kotz@Dartmouth.edu]}
- }
-
- @Manual{maspar:pario,
- author = {Maspar},
- title = {Parallel File {I/O} Routines},
- year = {1991},
- keyword = {parallel I/O, multiprocessor file system interface},
- comment = {Man pages for Maspar file system interface. They have either a
- single shared file pointer, after which all processors read or write in an
- interleaved pattern, or individual (plural) file pointer, allowing arbitrary
- access patterns. Thus, they can do lw, lw1, lps, lpr, seg, and int (see
- kotz:prefetch), but no self-scheduled patterns. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{masters:pario,
- author = {Del Masters},
- title = {Improve Disk Subsystem Performance with Multiple Serial Drives in
- Parallel},
- journal = {Computer Technology Review},
- year = {1987},
- month = {July},
- volume = {7},
- number = {9},
- pages = {76--77},
- keyword = {parallel I/O},
- comment = {Information about the early Maximum Strategy disk array, which
- striped over 4 disk drives, apparently synchronously. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{matloff:multidisk,
- author = {Norman S. Matloff},
- title = {A Multiple-Disk System for both Fault Tolerance and Improved
- Performance},
- journal = {IEEE Transactions on Reliability},
- year = {1987},
- month = {June},
- volume = {R-36},
- number = {2},
- pages = {199--201},
- keyword = {parallel I/O, reliability, disk shadowing},
- comment = {Variation on mirrored disks using more than 2 disks, to spread the
- files around. Good performance increases. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{meador:array,
- author = {Wes E. Meador},
- title = {Disk Array Systems},
- booktitle = {Proceedings of IEEE Compcon},
- year = {1989},
- month = {Spring},
- pages = {143--146},
- keyword = {parallel I/O, disk array, disk striping},
- comment = {Describes {\em Strategy 2 Disk Array Controller}, which allows 4
- or 8 drives, hardware striped, with parity drive and 0-4 hot spares. Up to 4
- channels to CPU(s). Logical block interface. Defects, errors, formatting,
- drive failures all handled automatically. Peak 40 MB/s data transfer on each
- channel. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{milenkovic:model,
- author = {Milan Milenkovic},
- title = {A Model for Multiprocessor {I/O}},
- year = {1989},
- month = {July},
- number = {89-CSE-30},
- institution = {Dept. of Computer Science and Engineering, Southern Methodist
- University},
- keyword = {multiprocessor I/O, I/O architecture, distributed systems},
- comment = {Advocates using dedicated server processors for all I/O, e.g., disk
- server, terminal server, network server. Pass I/O requests and data via
- messages or RPC calls over the interconnect (here a shared bus). Server
- handles packaging, blocking, caching, errors, interrupts, and so forth,
- freeing the main processors and the interconnect from all this activity.
- Benefits: encapsulates I/O-related stuff in specific places, accomodates
- heterogeneity, improves performance. Nice idea, but allows for an I/O
- bottleneck, unless server can handle all the demand. Otherwise would need
- multiple servers, more expensive than just multiple controllers.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{mokhoff:pario,
- author = {Nicholas Mokhoff},
- title = {Parallel Disk Assembly Packs 1.5 {GBytes}, runs at 4 {MBytes/s}},
- journal = {Electronic Design},
- year = {1987},
- month = {November},
- pages = {45--46},
- keyword = {parallel I/O, I/O, disk hardware, disk striping, reliability},
- comment = {Commercially available: Micropolis Systems' Parallel Disk 1800
- series. Four disks plus one parity disk, synchronized and byte-interleaved.
- SCSI interface. Total capacity 1.5 GBytes, sustained transfer rate of 4
- MBytes/s. MTTF 140,000 hours. Hard and soft errors corrected in real-time.
- Failed drives can be replaced while system is running.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{moren:controllers,
- author = {William D. Moren},
- title = {Design of Controllers is Key Element in Disk Subsystem Throughput},
- journal = {Computer Technology Review},
- year = {1988},
- month = {Spring},
- pages = {71--73},
- keyword = {parallel I/O, disk hardware},
- comment = {A short paper on some basic techniques used by disk controllers to
- improve throughput: seek optimization, request combining, request queuing,
- using multiple drives in parallel, scatter/gather DMA, data caching,
- read-ahead, cross-track read-ahead, write-back caching, segmented caching,
- reduced latency (track buffering), and format skewing. [Most of these are
- already handled in Unix file systems.] [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{muller:multi,
- author = {Keith Muller and Joseph Pasquale},
- title = {A High Performance Multi-Structured File System Design},
- booktitle = {Proceedings of the Thirteenth ACM Symposium on Operating Systems
- Principles},
- year = {1991},
- pages = {56--67},
- keyword = {file system, disk striping, disk mirroring}
- }
-
- @InProceedings{nagashima:pario,
- author = {Umpei Nagashima and Takashi Shibata and Hiroshi Itoh and Minoru
- Gotoh},
- title = {An Improvement of {I/O} Function for Auxiliary Storage: {Parallel
- I/O} for a Large Scale Supercomputing},
- booktitle = {1990 International Conference on Supercomputing},
- year = {1990},
- pages = {48--59},
- keyword = {parallel I/O},
- comment = {Poorly-written paper about the use of parallel I/O channels to
- access striped disks, in parallel from a supercomputer. They {\em chain}\/
- (i.e., combine) requests to a disk for large contiguous accesses.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{ncr:3600,
- key = {NCR},
- title = {{NCR 3600} Product Description},
- year = {1991},
- month = {September},
- number = {ST-2119-91},
- institution = {NCR},
- address = {San Diego},
- keyword = {multiprocessor architecture, MIMD, parallel I/O},
- comment = {Has 1-32 50MHz Intel 486 processors. Parallel independent disks on
- the disk nodes, separate from the processor nodes. Tree interconnect. Aimed
- at database applications. [David.Kotz@Dartmouth.edu]}
- }
-
- @Misc{ncube:overview,
- key = {nC},
- author = {nCUBE Corporation},
- title = {{nCUBE~2} Supercomputers: {Technical} Overview},
- year = {1990},
- howpublished = {Brochure},
- keyword = {parallel architecture, nCUBE},
- comment = {Gives a little I/O information. See also hayes:nCUBE
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{ng:diskarray,
- author = {Spencer Ng},
- title = {Some Design Issues of Disk Arrays},
- booktitle = {Proceedings of IEEE Compcon},
- year = {1989},
- month = {Spring},
- pages = {137--142},
- note = {San Francisco, CA},
- keyword = {parallel I/O, disk array},
- comment = {Discusses disk arrays and striping. Transfer size is important to
- striping success: small size transfers are better off with independent disks.
- Sychronized rotation is especially important for small transfer sizes, since
- then the increased rotational delays dominate. Fine grain striping involves
- less assembly/disassembly delay, but coarse grain (block) striping allows for
- request parallelism. Fine grain striping wastes capacity due to fixed size
- formatting overhead. He also derives exact MTTF equation for 1-failure
- tolerance and on-line repair. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{ng:interleave,
- author = {S. Ng and D. Lang and R. Selinger},
- title = {Trade-offs Between Devices and Paths in Achieving Disk
- Interleaving},
- booktitle = {Proceedings of the 15th Annual International Symposium on
- Computer Architecture},
- year = {1988},
- pages = {196--201},
- keyword = {parallel I/O, disk hardware, disk caching, I/O bottleneck},
- comment = {Compares four different ways of restructuring IBM disk controllers
- and channels to obtain more parallelism. They use parallel heads or parallel
- actuators. The best results come when they replicate the control electronics
- to maintain the number of data paths through the controller. Otherwise the
- controller bottleneck reduces performance. Generally, for large or small
- transfer sizes, parallel heads with replication gave better performance.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{nishino:sfs,
- author = {H. Nishino and S. Naka and K Ikumi},
- title = {High Performance File System for Supercomputing Environment},
- booktitle = {Proceedings of Supercomputing '89},
- year = {1989},
- pages = {747--756},
- keyword = {supercomputer, file system, parallel I/O},
- comment = {A modification to the Unix file system to allow for supercomputer
- access. Workload: file size from few KB to few GB, I/O operation size from
- few bytes to hundreds of MB. Generally programs split into I/O-bound and
- CPU-bound parts. Sequential and random access. Needs: giant files (bigger
- than device), peak hardware performance for large files, NFS access. Their FS
- is built into Unix ``transparently''. Space allocated in clusters, rather
- than blocks; clusters might be as big as a cylinder. Allows for efficient,
- large files. Mentions parallel disks as part of a ``virtual volume'' but does
- not elaborate. Prefetching within a cluster. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{nodine:sort,
- author = {Mark H. Nodine and Jeffrey Scott Vitter},
- title = {Large-Scale Sorting in Parallel Memories},
- booktitle = {SPAA},
- year = {1991},
- pages = {29--39},
- keyword = {external sorting, file access pattern, parallel I/O},
- comment = {Describes algorithms for external sorting that are optimal in the
- number of I/Os. Proposes a couple of fairly-realistic memory hierarchy
- models. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{ogata:diskarray,
- author = {Mikito Ogata and Michael J. Flynn},
- title = {A Queueing Analysis for Disk Array Systems},
- year = {1990},
- number = {CSL-TR-90-443},
- institution = {Stanford University},
- keyword = {disk array, performance analysis},
- comment = {Fairly complex analysis of a multiprocessor attached to a disk
- array system through a central server that is the buffer. Assumes
- task-oriented model for parallel system, where tasks can be assigned to any
- CPU; this makes for an easy model. Like Reddy, they compare declustering and
- striping (they call them striped and synchronized disks).
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{olson:random,
- author = {Thomas M. Olson},
- title = {Disk Array Performance in a Random {I/O} Environment},
- journal = {Computer Architecture News},
- year = {1989},
- month = {September},
- volume = {17},
- number = {5},
- pages = {71--77},
- keyword = {I/O benchmark, transaction processing},
- comment = {See wolman:iobench. Used IOBENCH to compare normal disk
- configuration with striped disks, RAID level 1, and RAID level 5, under a
- random I/O workload. Multiple disks with files on different disks gave good
- performance (high throughput and low response time) when multiple users.
- Striping ensures balanced load, similar performance. RAID level 1 or level 5
- ensures reliability at performance cost over striping, but still good.
- Especially sensitive to write/read ratio --- performance lost for large
- number of writes. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{park:pario,
- author = {Arvin Park and K. Balasubramanian},
- title = {Providing Fault Tolerance in Parallel Secondary Storage Systems},
- year = {1986},
- month = {November},
- number = {CS-TR-057-86},
- institution = {Department of Computer Science, Princeton University},
- keyword = {parallel I/O, reliability},
- comment = {They use ECC with one or more parity drives in bit-interleaved
- systems, and on-line regeneration of failed drives from spares. More
- cost-effective than mirrored disks. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{patterson:raid,
- author = {David Patterson and Garth Gibson and Randy Katz},
- title = {A case for redundant arrays of inexpensive disks {(RAID)}},
- booktitle = {ACM SIGMOD Conference},
- year = {1988},
- month = {June},
- pages = {109--116},
- keyword = {parallel I/O, RAID, reliability, cost analysis, I/O bottleneck,
- disk array, OS92W},
- comment = {Make a good case for the upcoming I/O crisis, compare single large
- expensive disks (SLED) with small cheap disks. Outline five levels of RAID
- the give different reliabilities, costs, and performances. Block-interleaved
- with a single check disk (level 4) or with check blocks interspersed (level
- 5) seem to give best performance for supercomputer I/O or database I/O or
- both. Note: the TR by the same name (UCB/CSD 87/391) is essentially
- identical. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{patterson:raid2,
- author = {David Patterson and Peter Chen and Garth Gibson and Randy H. Katz},
- title = {Introduction to Redundant Arrays of Inexpensive Disks {(RAID)}},
- booktitle = {Proceedings of IEEE Compcon},
- year = {1989},
- month = {Spring},
- pages = {112--117},
- keyword = {parallel I/O, RAID, reliability, cost analysis, I/O bottleneck,
- disk array},
- comment = {A short version of patterson:raid, with some slight updates.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{pierce:pario,
- author = {Paul Pierce},
- title = {A Concurrent File System for a Highly Parallel Mass Storage System},
- booktitle = {Fourth Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1989},
- pages = {155--160},
- keyword = {parallel I/O, hypercube, Intel iPSC/2, parallel file system},
- comment = {Chose to tailor system for high performance for large files, read
- in large chunks. Uniform logical file system view, Unix stdio interface.
- Blocks scattered over all disks, but not striped. Blocksize 4K optimizes
- message-passing performance without using blocks that are too big.
- Tree-directory is stored in ONE file and managed by ONE process, so opens are
- bottlenecked, but that is not their emphasis. File headers, however, are
- scattered. The file header info contains a list of blocks. File header is
- managed by disk process on its I/O node. Data caching is done only at the I/O
- node of the originating disk drive. Read-ahead is used but not detailed here.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{pratt:twofs,
- author = {Terrence W. Pratt and James C. French and Phillip M. Dickens and
- Janet, Jr., Stanley A.},
- title = {A Comparison of the Architecture and Performance of Two Parallel
- File Systems},
- booktitle = {Fourth Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1989},
- pages = {161--166},
- keyword = {parallel I/O, Intel iPSC/2, nCUBE},
- comment = {Simple comparison of the iPSC/2 and nCUBE/10 parallel I/O systems.
- Short description of each system, with simple transfer rate measurements. See
- also french:ipsc2io-tr. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{reddy:hyperio1,
- author = {A. L. Reddy and P. Banerjee and Santosh G. Abraham},
- title = {{I/O} Embedding in Hypercubes},
- booktitle = {Proceedings of the 1988 International Conference on Parallel
- Processing},
- year = {1988},
- volume = {1},
- pages = {331--338},
- keyword = {parallel I/O, hypercube},
- comment = {Emphasis is on adjacency (as usual for hypercube stuff), though
- this seems to be less important with more machines using fancy routers. Is
- also implies (and they assume) that data is distributed well across the disks
- so no data needs to move beyond the neighbors of an I/O node. Still, the idea
- of adjacency is good since it allows for good data distribution while not
- requiring it, and for balancing I/O procs among procs in a good way. Also
- avoids messing up the hypercube regularity with dedicated I/O nodes.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{reddy:hyperio2,
- author = {A. L. Reddy and P. Banerjee},
- title = {{I/O} issues for hypercubes},
- booktitle = {International Conference on Supercomputing},
- year = {1989},
- pages = {72--81},
- keyword = {parallel I/O, hypercube},
- comment = {See reddy:hyperio3 for extended version. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{reddy:hyperio3,
- author = {A. L. Narasimha Reddy and Prithviraj Banerjee},
- title = {Design, Analysis, and Simulation of {I/O} Architectures for
- Hypercube Multiprocessors},
- journal = {IEEE Transactions on Parallel and Distributed Systems},
- year = {1990},
- month = {April},
- volume = {1},
- number = {2},
- pages = {140--151},
- keyword = {parallel I/O, hypercube},
- comment = {An overall paper restating their embedding technique from
- reddy:hyperio1, plus a little bit of evaluation along the lines of
- reddy:pario2, plus some ideas about matrix layout on the disks. They claim
- that declustering is important, since synchronized disks do not provide
- enough parallelism, especially in the communication across the hypercube
- (since the synchronized disks must hang off one node).
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{reddy:notsame,
- author = {A. L. Narasimha Reddy},
- title = {Reads and Writes: When {I/O}s Aren't Quite the Same},
- booktitle = {Proceedings of the Twenty-Fifth Annual Hawaii International
- Conference on System Sciences},
- year = {1992},
- pages = {84--92},
- keyword = {disk array}
- }
-
- @InProceedings{reddy:pario,
- author = {A. Reddy and P. Banerjee},
- title = {An Evaluation of multiple-disk {I/O} systems},
- booktitle = {Proceedings of the 1989 International Conference on Parallel
- Processing},
- year = {1989},
- pages = {I:315--322},
- keyword = {parallel I/O, disk array, disk striping},
- comment = {see also expanded version reddy:pario2 [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{reddy:pario2,
- author = {A. Reddy and P. Banerjee},
- title = {Evaluation of multiple-disk {I/O} systems},
- journal = {IEEE Transactions on Computers},
- year = {1989},
- month = {December},
- volume = {38},
- pages = {1680--1690},
- keyword = {parallel I/O, disk array, disk striping},
- comment = {see version reddy:pario. Compares declustered disks (sortof
- MIMD-like) to synchronized-interleaved (SIMD-like). Declustering needed for
- scalability, and is better for scientific workloads. Handles large
- parallelism needed for scientific workloads and for RAID-like architectures.
- Synchronized interleaving is better for general file system workloads due to
- better utilization and reduction of seek overhead. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{reddy:pario3,
- author = {A. L. Reddy and Prithviraj Banerjee},
- title = {A Study of Parallel Disk Organizations},
- journal = {Computer Architecture News},
- year = {1989},
- month = {September},
- volume = {17},
- number = {5},
- pages = {40--47},
- keyword = {parallel I/O, disk array, disk striping},
- comment = {nothing new over expanded version reddy:pario2, little different
- from reddy:pario [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{reddy:perfectio,
- author = {A. L. Narasimha Reddy and Prithviraj Banerjee},
- title = {A Study of {I/O} Behavior of {Perfect} Benchmarks on a
- Multiprocessor},
- booktitle = {Proceedings of the 17th Annual International Symposium on
- Computer Architecture},
- year = {1990},
- pages = {312--321},
- keyword = {parallel I/O, file access pattern, workload, multiprocessor file
- system, benchmark},
- comment = {Using five applications from the Perfect benchmark suite, they
- studied both implicit (paging) and explicit (file) I/O activity. They found
- that the paging activity was relatively small and that sequential access to
- VM was common. All access to files was sequential, though this may be due to
- the programmer's belief that the file system is sequential. Buffered I/O
- would help to make transfers bigger and more efficient, but there wasn't
- enough rereferencing to make caching useful. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{rettberg:monarch,
- author = {Randall D. Rettberg and William R. Crowther and Philip P. Carvey
- and Raymond S. Tomlinson},
- title = {The {Monarch Parallel Processor} Hardware Design},
- journal = {IEEE Computer},
- year = {1990},
- month = {April},
- volume = {23},
- number = {4},
- pages = {18--30},
- keyword = {MIMD, parallel architecture, shared memory, parallel I/O},
- comment = {This describes the Monarch computer from BBN. It will never be
- built, though the article does not say this. 65K processors and memory
- modules. 65GB RAM. Bfly-style switch in dance-hall layout. Switch is
- synchronous; one switch time is a {\em frame} (one microsecond, equal to 3
- processor cycles) and all processors may reference memory in one frame time.
- Local I-cache only.y Contention reduces full bandwidth by 16 percent. Full
- 64-bit machine. Custom VLSI. Each memory location has 8 tag bits. One allows
- for a location to be locked by a processor. Thus, any FetchAndOp or
- full/empty model can be supported. I/O is done by adding I/O processors (up
- to 2K in a 65K-proc machine) in the switch. They plan 200 disks, each with an
- I/O processor, for 65K nodes. They would spread each block over 9 disks,
- including one for parity (essentially RAID). [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{salem:diskstripe,
- author = {Kenneth Salem and Hector Garcia-Molina},
- title = {Disk Striping},
- booktitle = {IEEE 1986 Conference on Data Engineering},
- year = {1986},
- pages = {336--342},
- keyword = {parallel I/O, disk striping, disk array},
- comment = {See the techreport salem:striping for a nearly identical but more
- detailed version. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{salem:striping,
- author = {Kenneth Salem and Hector Garcia-Molina},
- title = {Disk Striping},
- year = {1984},
- month = {December},
- number = {332},
- institution = {EECS Dept. Princeton Univ.},
- keyword = {parallel I/O, disk striping, disk array},
- comment = {Cite salem:diskstripe instead. Basic paper on striping. For
- uniprocessor, single-user machine. Interleaving asynchronous, even without
- matching disk locations though this is discussed. All done with models.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{salmon:cubix,
- author = {John Salmon},
- title = {{CUBIX: Programming} Hypercubes without Programming Hosts},
- booktitle = {Proceedings of the Second Conference on Hypercube
- Multiprocessors},
- year = {1986},
- pages = {3--9},
- keyword = {hypercube, multiprocessor file system interface},
- comment = {Previously, hypercubes were programmed as a combination of host
- and node programs. Salmon proposes to use a universal host program that acts
- essentially as a file server, responding to requests from the node programs.
- Two modes: crystalline, where node programs run in loose synchrony, and
- amorphous, where node programs are asynchronous. In the crystalline case,
- files have a single file pointer and are either single- or multiple- access;
- single access means all nodes must simultaneously issue the same request;
- multiple access means they all simultaneously issue the same request with
- different parameters, giving an interleaved pattern. Amorphous allows
- asynchronous activity, with separate file pointers per node.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{schulze:raid,
- author = {Martin Schulze},
- title = {Considerations in the Design of a {RAID} Prototype},
- year = {1988},
- month = {August},
- number = {UCB/CSD 88/448},
- institution = {UC Berkeley},
- keyword = {parallel I/O, RAID, disk array, disk hardware},
- comment = {Very practical description of the RAID I prototype.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{schulze:raid2,
- author = {Martin Schulze and Garth Gibson and Randy Katz and David
- Patterson},
- title = {How Reliable is a {RAID}?},
- booktitle = {Proceedings of IEEE Compcon},
- year = {1989},
- month = {Spring},
- keyword = {parallel I/O, reliability, RAID, disk array, disk hardware},
- comment = {Published version of second paper in chen:raid. Some overlap with
- schulze:raid, though that paper has more detail. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{shin:hartsio,
- author = {Kang G. Shin and Greg Dykema},
- title = {A Distributed {I/O} Architecture for {HARTS}},
- booktitle = {Proceedings of the 17th Annual International Symposium on
- Computer Architecture},
- year = {1990},
- pages = {332--342},
- keyword = {parallel I/O, multiprocessor architecture, MIMD, fault tolerance},
- comment = {HARTS is a multicomputer connected with a wrapped hexagonal mesh,
- with an emphasis on real-time and fault tolerance. The mesh consists of
- network routing chips. Hanging off each is a small bus-based multiprocessor
- ``node''. They consider how to integrate I/O devices into this architecture:
- attach device controllers to processors, to network routers, to node busses,
- or via a separate network. They decided to compromise and hang each I/O
- controller off three network routers, in the triangles of the hexagonal mesh.
- This keeps the traffic off of the node busses, and allows multiple paths to
- each controller. They discuss the reachability and hop count in the presence
- of failed nodes and links. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{smotherman:taxonomy,
- author = {Mark Smotherman},
- title = {A Sequencing-based Taxonomy of {I/O} Systems and Review of
- Historical Machines},
- journal = {Computer Architecture News},
- year = {1989},
- month = {September},
- volume = {17},
- number = {5},
- pages = {5--15},
- keyword = {I/O architecture, historical summary},
- comment = {Classifies I/O systems by how they initiate and terminate I/O.
- Uniprocessor and Multiprocessor systems. [David.Kotz@Dartmouth.edu]}
- }
-
- @Misc{snir:hpfio,
- author = {Marc Snir},
- title = {Proposal for {IO}},
- year = {1992},
- month = {July 7},
- howpublished = {Posted to HPFF I/O Forum by
- SNIR%YKTVMV.bitnet@cunyvm.cuny.edu},
- note = {Draft.},
- keyword = {parallel I/O, multiprocessor file system interface},
- comment = {An outline of two possible ways to specify mappings of arrays to
- storage nodes in a multiprocessor, and to make unformatted parallel transfers
- of multiple records. Seems to apply only to arrays, and to files that hold
- only arrays. It keeps the linear structure of files as sequences of records,
- but in some cases does not preserve the order of data items or of fields
- within subrecords. Difficult to understand unless you know HPF and Fortran
- 90. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{solworth:mirror,
- author = {John A. Solworth and Cyril U. Orji},
- title = {Distorted Mirrors},
- booktitle = {Proceedings of the First International Conference on Parallel
- and Distributed Information Systems},
- year = {1991},
- pages = {10--17},
- keyword = {disk mirror, parallel I/O},
- comment = {Write one disk (the master) in the usual way, and write the slave
- disk at the closest free block. Actually, logically partition the two disks
- so that each disk has a master partition and a slave partition. Up to 80\%
- improvement in small-write performance, while retaining good sequential read
- performance. [David.Kotz@Dartmouth.edu]}
- }
-
- @MastersThesis{stabile:disks,
- author = {James Joseph Stabile},
- title = {Disk Scheduling Algorithms for a Multiple Disk System},
- year = {1988},
- school = {UC Davis},
- keyword = {parallel I/O, parallel file system, mirrored disk, disk
- scheduling},
- comment = {Describes simulation based on model of disk access pattern.
- Multiple-disk system, much like in matloff:multidisk. Files stored in two
- copies, each on a separate disk, but there are more than two disks, so this
- differs from mirroring. He compares several disk scheduling algorithms. A
- variant of SCAN seems to be the best. He makes many statements that don't
- seem to follow from his reasoning. His model does not include sequentiality
- or prefetching in any direct way. [David.Kotz@Dartmouth.edu]}
- }
-
- @PhdThesis{staelin:phd,
- author = {Carl Hudson Staelin},
- title = {High Performance File System Design},
- year = {1991},
- month = {October},
- school = {Princeton University},
- note = {Available as TR CS-TR-347-91},
- keyword = {file system, parallel I/O},
- comment = {His new filesystem is called iPcress, and has a few key features:
- it is self-tuning based on dynamic statistics collected on each file; it
- reorganizes during idle times; and it uses several caching and allocation
- strategies, not just one, depending on the situation. The basic idea is to
- make {\em smart\/} file systems. A few performance results. Clustering active
- data in the middle of the disk gives better throughput. Meta-data is
- contained in files. [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{stone:query,
- author = {Harold S. Stone},
- title = {Parallel Querying of Large Databases: {A} Case Study},
- journal = {IEEE Computer},
- year = {1987},
- month = {October},
- volume = {20},
- number = {10},
- pages = {11--21},
- keyword = {parallel I/O, database, SIMD, connection machine},
- comment = {See also IEEE Computer, Jan 1988, p. 8 and 10. Examines a database
- query that is parallelized for the Connection Machine. He shows that in many
- cases, a smarter serial algorithm that reads only a portion of the database
- (through an index) will be faster than 64K processors reading the whole
- database. Uses a simple model for the machines to show this. Reemphasizes the
- point of Boral and DeWitt that I/O is the bottleneck of a database machine,
- and that parallelizing the processing will not necessarily help a great deal.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{stonebraker:radd,
- author = {Michael Stonebraker and Gerhard A. Schloss},
- title = {Distributed {RAID} --- {A} New Multiple Copy Algorithm},
- booktitle = {Proceedings of 6th International Data Engineering Conference},
- year = {1990},
- pages = {430--437},
- keyword = {disk striping, reliability},
- comment = {This is about ``RADD'', a distributed form of RAID. Meant for
- cases where the disks are physically distributed around several sites, and no
- one controller controls them all. Much lower space overhead than any
- mirroring technique, with comparable normal-mode performance at the expense
- of failure-mode performance. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{stonebraker:xprs,
- author = {Michael Stonebraker and Randy Katz and David Patterson and John
- Ousterhout},
- title = {The Design of {XPRS}},
- year = {1988},
- month = {March},
- number = {UCB/ERL M88/19},
- institution = {UC Berkeley},
- keyword = {parallel I/O, disk array, RAID, Sprite, disk hardware, database},
- comment = {Designing a DBMS for Sprite and RAID. High availability, high
- performance. Shared memory multiprocessor. Allocates extents to files that
- are a interleaved over a variable number of disks, and over a contiguous set
- of tracks on those disks. [David.Kotz@Dartmouth.edu]}
- }
-
- @Unpublished{taber:metadisk,
- author = {David Taber},
- title = {{MetaDisk} Driver Technical Description},
- year = {1990},
- month = {October},
- note = {SunFlash electronic mailing list 22(9)},
- keyword = {disk mirroring, parallel I/O},
- comment = {MetaDisk is a addition to the Sun SPARCstation server kernel. It
- allows disk mirroring between any two local disk partitions, or concatenation
- of several disk partitions into one larger partition. Can span up to 4
- partitions simultaneously. Appears not to be striped, just allows bigger
- partitions, and (by chance) some parallel I/O for large files.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Booklet{teradata:dbc,
- key = {Teradata},
- title = {{DBC/1012}},
- year = {1988},
- howpublished = {Teradata Corporation Booklet},
- keyword = {parallel I/O, database machine, Teradata}
- }
-
- @TechReport{think:cm-2,
- key = {TMC},
- title = {{Connection Machine} Model {CM-2} Technical Summary},
- year = {1987},
- month = {April},
- number = {HA87-4},
- institution = {Thinking Machines},
- keyword = {parallel I/O, connection machine, disk hardware, SIMD},
- comment = {I/O and Data Vault, pp. 27--30 [David.Kotz@Dartmouth.edu]}
- }
-
- @Book{think:cm5,
- key = {TMC},
- title = {The {Connection Machine} {CM-5} Technical Summary},
- year = {1991},
- month = {October},
- publisher = {Thinking Machines Corporation},
- keyword = {computer architecture, connection machine, MIMD, SIMD, parallel
- I/O},
- comment = {Some detail but still skips over some key aspects (like
- communication topology. Neat communications support makes for user-mode
- message-passing, broadcasting, reductions, all built in. Lots of info here.
- File system calls allows data to be transferred in parallel directly from I/O
- node to processing node, bypassing the partition and I/O management nodes.
- Multiple I/O devices (even DataVaults) can be logically striped.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Manual{tmc:cmio,
- key = {TMC},
- title = {Programming the {CM I/O} System},
- year = {1990},
- month = {November},
- organization = {Thinking Machines Corporation},
- keyword = {parallel I/O, file system interface, multiprocessor file system},
- comment = {There are more recent editions. They have two types of files,
- parallel and serial, differing in the way data is laid out internally. Also
- have three modes for reading the file: synchronous, streaming (asynchronous),
- and buffered. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{towsley:cpuio,
- author = {Donald F. Towsley},
- title = {The Effects of {CPU: I/O} Overlap in Computer System
- Configurations},
- booktitle = {Proceedings of the 5th Annual International Symposium on
- Computer Architecture},
- year = {1978},
- month = {April},
- pages = {238--241},
- keyword = {parallel processing, I/O},
- comment = {Difficult to follow since it is missing its figures. ``Our most
- important result is that multiprocessor systems can benefit considerably more
- than single processor systems with the introduction of CPU: I/O overlap.''
- They overlap I/O needed by some future CPU sequence with the current CPU
- operation. They claim it looks good for large numbers of processors. Their
- orientation seems to be for multiprocessors operating on independent tasks.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @Article{towsley:cpuio-parallel,
- author = {D. Towsley and K. M. Chandy and J. C. Browne},
- title = {Models for Parallel Processing within Programs: {Application} to
- {CPU: I/O} and {I/O: I/O} Overlap},
- journal = {Communications of the ACM},
- year = {1978},
- month = {October},
- volume = {21},
- number = {10},
- pages = {821--831},
- keyword = {parallel processing, I/O},
- comment = {Models CPU:I/O and I/O:I/O overlap within a program. ``Overlapping
- is helpful only when it allows a device to be utilized which would not be
- utilized without overlapping.'' In general the overlapping seems to help.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @MastersThesis{vaitzblit:media,
- author = {Lev Vaitzblit},
- title = {The Design and Implementation of a High-Bandwidth File Service for
- Continuous Media},
- year = {1991},
- month = {September},
- school = {MIT},
- keyword = {multimedia, distributed file system, disk striping},
- comment = {A DFS for multimedia. Expect large files, read-mostly, highly
- sequential. Temporal synchronization is key. An administration server handles
- opens and closes, and provides guarantees on performance (like Swift). The
- interface at the client nodes talks to the admin server transparently, and
- stripes requests over all storage nodes. Storage nodes may internally use
- RAIDs, I suppose. Files are a series of frames, rather than bytes. Each frame
- has a time offset in seconds. Seeks can be by frame number or time offset.
- File containers contain several files, and have attributes that specify
- performance requirements. Interface does prefetching, based on read direction
- (forward or backward) and any frame skips. But frames are not transmitted
- from storage server to client node until requested (client pacing). Claim
- that synchronous disk interleaving with a striping unit of one frame is best.
- Could get 30 frames/sec (3.5MB/s) with 2 DECstation 5000s and 4 disks,
- serving a client DEC 5000. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{vandegoor:unixio,
- author = {A. J. {van de Goor} and A. Moolenaar},
- title = {{UNIX I/O} in a Multiprocessor System},
- booktitle = {Proceedings of the 1988 Winter Usenix Conference},
- year = {1988},
- pages = {251--258},
- keyword = {unix, multiprocessor file system}
- }
-
- @Manual{vms:stripe,
- key = {DEC},
- title = {{VAX} Disk Striping Driver for {VMS}},
- year = {1989},
- month = {December},
- organization = {Digital Equipment Corporation},
- note = {Order Number AA-NY13A-TE},
- keyword = {disk striping},
- comment = {Describes the VAX disk striping driver. Stripes an apparently
- arbitrary number of disk devices. All devices must be the same type, and
- apparently completely used. Manager can specify ``chunksize'', the number of
- logical blocks per striped block. They suggest using the track size of the
- device as the chunk size. They also point out that multiple controllers
- should be used in order to gain parallelism. [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{wilcke:victor,
- author = {W. W. Wilcke and D. G. Shea and R. C. Booth and D. H. Brown and M.
- F. Giampapa and L. Huisman and G. R. Irwin and E. Ma and T. T. Murakami and
- F. T. Tong and P. R. Varker and D. J. Zukowski},
- title = {The {IBM Victor} Multiprocessor Project},
- booktitle = {Fourth Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1989},
- pages = {201--207},
- keyword = {parallel architecture, MIMD, message passing},
- comment = {Interesting architecture. Transputers arranged in a 2-D mesh with
- one disk for each column, and one graphics host for each quadrant. Each disk
- has its own controller (PID). This paper says little about I/O, and
- application examples include no I/O. Message-passing paradigm, although
- messages must pass through the CPUs along the route. [David.Kotz@Dartmouth.edu]}
- }
-
- @TechReport{wilkes:datamesh,
- author = {John Wilkes},
- title = {{DataMesh} --- scope and objectives: a commentary},
- year = {1989},
- month = {July},
- number = {HP-DSD-89-44},
- institution = {Hewlett-Packard},
- keyword = {parallel I/O, distributed systems, disk caching},
- comment = {Proposal for a project at HP, that hooks a heterogeneous set of
- storage devices together over a fast interconnect, each with its own
- identical processor. The whole would then act as a file server for a network.
- Data storage devices would range from fast to slow (e.g. optical jukebox),
- varying availability, {\em etc.}. See wilkes:datamesh1 for more recent status.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{wilkes:datamesh1,
- author = {John Wilkes},
- title = {{DataMesh} Research Project, Phase 1},
- booktitle = {Proceedings of the Usenix File Systems Workshop},
- year = {1992},
- month = {May},
- pages = {63--69},
- keyword = {distributed file system, parallel I/O, disk scheduling, disk
- layout},
- comment = {Write to wilkes@hplabs.hp.com for more info.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{willeman:pario,
- author = {Ray Willeman and Susan Phillips and Ron Fargason},
- title = {An Integrated Library For Parallel Processing: The Input/Output
- Component},
- booktitle = {Fourth Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1989},
- pages = {573--575},
- keyword = {parallel I/O},
- comment = {Like the CUBIX interface, in some ways. Meant for parallel access
- to non-striped (sequential) file. Self-describing format so that the reader
- can read the formatting information and distribute data accordingly.
- [David.Kotz@Dartmouth.edu]}
- }
-
- @InProceedings{witkowski:hyper-fs,
- author = {Andrew Witkowski and Kumar Chandrakumar and Greg Macchio},
- title = {Concurrent {I/O} System for the {Hypercube} Multiprocessor},
- booktitle = {Third Conference on Hypercube Concurrent Computers and
- Applications},
- year = {1988},
- pages = {1398--1407},
- keyword = {parallel I/O, hypercube, parallel file system},
- comment = {Concrete system for the Hypercube. Files resident on one disk
- only. Little support for cooperation except for sequentialized access to
- parts of the file, or broadcast. No mention of random-access files. I/O nodes
- are distinguished from computation nodes. I/O nodes have separate comm.
- network. How would prefetching fit in? No parallel access. I/O hooked to
- front-end too. [David.Kotz@Dartmouth.edu]}
- }
- --
- -----------------
- Mathematics and Computer Science
- Dartmouth College, 6188 Bradley Hall, Hanover NH 03755-3551
- email: David.Kotz@Dartmouth.edu or dfk@cs.dartmouth.edu
-
-
-