Fourth Edition July 18, 1993 This supercedes my older bibliographies. Nearly a year has passed since the third edition of this bibliography, and a lot has happened in the field of parallel I/O. There was the JPDC special issue, the IPPS workshop, and the Dartmouth DAGS symposium, all with a focus on parallel I/O issues. I expect interest in this topic to grow even more rapidly in the coming year. It is truly exciting to be involved in this field right now. This bibliography covers parallel I/O, with a significant emphasis on file systems rather than, say, network or graphics I/O. This includes architecture, operating systems, some algorithms, some databases, and some workload characterization. Because of the expanding nature of this field, I cannot cover everything, and this bibliography is admittedly spotty on topics like disk array reliability, parallel I/O algorithms, parallel databases, and parallel networking. The entries are alphabetized by cite key. The emphasis is on including everything I have, rather than selecting a few key articles of interest. Thus, you probably don't want (or need) to read everything here. There are many repeated entries, in the sense that a paper is often published first as a TR, then in a conference, then in a journal. There is a net gain of 82 entries since last year. Except where noted, all comments are mine, and any opinions expressed there are mine only. In some cases I am simply restating the opinion or result obtained by the paper's authors, and thus even I might disagree with the statement. I keep most editorial comments to a minimum. Please let me know if you have any additions or corrections. You may use the bibliography (and copy it around) as you please except for publishing it as a whole, since the compilation is mine. Please leave this header on the collection; BibTeX won't mind. This bibliography (and many others) is archived in ftp.cse.ucsc.edu:pub/bib. David Kotz Assistant Professor Mathematics and Computer Science Dartmouth College 6188 Bradley Hall Hanover NH 03755-3551 @string {email = "David.Kotz@Dartmouth.edu"} % have to hide this from bibtex ----------------------------------------------------------------------------- @InProceedings{abali:ibm370, author = {B\"{u}lent Abali and Bruce D. Gavril and Richard W. Hadsell and Linh Lam and Brion Shimamoto}, title = {{Many/370: A} Parallel Computer Prototype for {I/O} Intensive Applications}, booktitle = {Sixth Annual Distributed-Memory Computer Conference}, year = {1991}, pages = {728--730}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {Describes a parallel IBM/370, where they attach several small 370s to a switch, and several disks to each 370. Not much in the way of striping.} } @Article{abu-safah:speedup, author = {Walid Abu-Safah and Harlan Husmann and David Kuck}, title = {On {Input/Output} Speed-up in Tightly-coupled Multiprocessors}, journal = {IEEE Transactions on Computers}, year = {1986}, pages = {520--530}, keyword = {parallel I/O, I/O, pario bib}, comment = {Derives formulas for the speedup with and without I/O considered and with parallel software and hardware format conversion. Considering I/O gives a more optimistic view of the speedup of a program {\em assuming} that the parallel version can use its I/O bandwidth as effectively as the serial processor. Concludes that, for a given number of processors, increasing the I/O bandwidth is the most effective way to speed up the program (over the format conversion improvements).} } @Article{aggarwal:sorting, author = {Alok Aggarwal and Jeffrey Scott Vitter}, title = {The Input/Output Complexity of Sorting and Related Problems}, journal = {Communications of the ACM}, year = {1988}, month = {September}, volume = {31}, number = {9}, pages = {1116--1127}, keyword = {parallel I/O, sorting, pario bib}, comment = {Good comments on typical external sorts, and how big they are. Focuses on parallelism at the disk. They give tight theoretical bounds on the number of I/O's required to do external sorting and other problems (FFTs, matrix transpose, {\em etc.}). If $x$ is the number of blocks in the file and $y$ is the number of blocks that fit in memory, then the number of I/Os needed grows as $\Theta (x \log x / \log y)$. If parallel transfers of $p$ blocks are allowed, speedup linear in $p$ is obtained.} } @InProceedings{alverson:tera, author = {Robert Alverson and David Callahan and Daniel Cummings and Brian Koblenz and Allan Porterfield and Burton Smith}, title = {The {Tera} Computer System}, booktitle = {1990 International Conference on Supercomputing}, year = {1990}, pages = {1--6}, keyword = {parallel architecture, MIMD, NUMA, pario bib}, comment = {Interesting architecture. 3-d mesh of pipelined packet-switch nodes, e.g., 16x16x16 is 4096 nodes, with 256 procs, 512 memory units, 256 I/O cache units, and 256 I/O processors attached. 2816 remaining nodes are just switching nodes. Each processor is 64-bit custom chip with up to 128 simultaneous threads in execution. It alternates between ready threads, with a deep pipeline. Inter-instruction dependencies explicitly encoded by the compiler, stalling those threads until the appropriate time. Each thread has a complete set of registers! Memory units have 4-bit tags on each word, for full/empty and trap bits. Shared memory across the network: NUMA.} } @TechReport{arendt:genome, author = {James W. Arendt}, title = {Parallel Genome Sequence Comparison Using a Concurrent File System}, year = {1991}, number = {UIUCDCS-R-91-1674}, institution = {University of Illinois at Urbana-Champaign}, keyword = {parallel file system, parallel I/O, Intel iPSC/2, pario bib}, comment = {Studies the performance of Intel CFS. Uses an application that reads in a huge file of records, each a genome sequence, and compares each sequence against a given sequence. Looks at cache performance, message latency, cost of prefetches and directory reads, and throughput. He tries one-disk, one-proc transfer rates. Because of contention with the directory server on one of the two I/O nodes, it was faster to put all of the file on the other I/O node. Striping is good for multiple readers. Best access pattern was interleaved, not segmented or separate files, because it avoided disk seeks. Perhaps the files are stored contiguously? Can get good speedup by reading the sequences in big integral record sizes, from CFS, using a load-balancing scheduled by the host. Contention for directory blocks -- through single-node directory server.} } @InProceedings{asbury:fortranio, author = {Raymond K. Asbury and David S. Scott}, title = {{FORTRAN} {I/O} on the {iPSC/2}: Is there read after write?}, booktitle = {Fourth Conference on Hypercube Concurrent Computers and Applications}, year = {1989}, pages = {129--132}, keyword = {parallel I/O, hypercube, Intel iPSC/2, file access pattern, pario bib} } @InProceedings{baird:disa, author = {R. Baird and S. KaraMooz and H. Vazire}, title = {Distributed Information Storage Architecture}, booktitle = {Proceedings of the Twelfth IEEE Symposium on Mass Storage Systems}, year = {1993}, pages = {145--155}, keyword = {parallel I/O, distributed file system, mass storage, pario bib}, comment = {Architecture for distributed information storage. Integrates file systems, databases, etc. Single system image, lots of support for administration. O-O model, with storage device objects, logical device objects, volume objects, and file objects. Methods for each type of object, including administrative methods.} } @InProceedings{baldwin:hyperfs, author = {C. H. Baldwin and W. C. Nestlerode}, title = {A Large Scale File Processing Application on a Hypercube}, booktitle = {Fifth Annual Distributed-Memory Computer Conference}, year = {1990}, pages = {1400-1404}, keyword = {multiprocessor file system, file access pattern, parallel I/O, hypercube, pario bib}, comment = {Census-data processing on an nCUBE/10 at USC. Their program uses an interleaved pattern, which is like my lfp or gw with multi-record records (i.e., the application does its own blocking). Shifted to asynchronous I/O to do OBL manually. Better results if they did more computation per I/O (of course).} } @TechReport{barak:hfs, author = {Amnon Barak and Bernard A. Galler and Yaron Farber}, title = {A Holographic File System for a Multicomputer with Many Disk Nodes}, year = {1988}, month = {May}, number = {88-6}, institution = {Dept. of Computer Science, Hebrew University of Jerusalem}, keyword = {parallel I/O, hashing, reliability, disk mirroring, pario bib}, comment = {Describes a file system for a distributed system that scatters records of each file over many disks using hash functions. The hash function is known by all processors, so no one processor must be up to access the file. Any portion of the file whose disknode is available may be accessed. Shadow nodes are used to take over for nodes that go down, saving the info for later use by the proper node. Intended to easily parallelize read/write accesses and global file operations, and to increase file availability.} } @Article{batcher:staran, author = {K. E. Batcher}, title = {{STARAN} Parallel Processor System Hardware}, journal = {AFIPS Conference Proceedings}, year = {1974}, pages = {405--410}, keyword = {parallel architecture, array processor, parallel I/O, SIMD, pario bib}, comment = {This paper is reproduced in Kuhn and Padua's (1981, IEEE) survey ``Tutorial on Parallel Processing.'' The STARAN is an array processor that uses Multi-Dimensional-Access (MDA) memories and permutation networks to access data in bit slices in a variety of ways, with high-speed I/O capabilities. Its router (called the {\em flip} network) could permute data among the array processors, or between the array processors and external devices, including disks, video input, and displays.} } @Manual{bbn:admin, key = {BBN}, author = {BBN Advanced {Computers Inc.}}, title = {{TC2000} System Administration Guide}, edition = {Revision 3.0}, year = {1991}, month = {April}, keyword = {BBN, parallel I/O, pario bib}, comment = {Administrative manual for the TC2000 I/O system. Can stripe over partitions in a user-specified set of disks. Large requests automatically split and done in parallel.} } @InProceedings{bell:physics, author = {Jean L. Bell}, title = {A Specialized Data Management System for Parallel Execution of Particle Physics Codes}, booktitle = {ACM SIGMOD Conference}, year = {1988}, pages = {277--285}, keyword = {file access pattern, disk prefetch, file system, pario bib}, comment = {A specialized database system for particle physics codes. Valuable for its description of access patterns and subsequent file access requirements. Particle-in-cell codes iterate over timesteps, updating the position of each particle, and then the characteristics of each cell in the grid. Particles may move from cell to cell. Particle update needs itself and nearby gridcell data. The whole dataset is too big for memory, and each timestep must be stored on disk for later analysis anyway. Regular file systems are inadequate: specialized DBMS is more appropriate. Characteristics needed by their application class: multidimensional access (by particle type or by location, i.e., multiple views of the data), coordination between grid and particle data, coordination between processors, coordinated access to meta-data, inverted files, horizontal clustering, large blocking of data, asynchronous I/O, array data, complicated joins, and prefetching according to user-prespecified order. Note that many of these things can be provided by a file system, but that most are hard to come by in typical file systems, if not impossible. Many of these features are generalizable to other applications.} } @InProceedings{benner:pargraphics, author = {Robert E. Benner}, title = {Parallel Graphics Algorithms on a 1024-Processor Hypercube}, booktitle = {Fourth Conference on Hypercube Concurrent Computers and Applications}, year = {1989}, pages = {133--140}, keyword = {hypercube, graphics, parallel algorithm, parallel I/O, pario bib}, comment = {About using the nCUBE/10's RT Graphics System. They were frustrated by an unusual mapping from the graphics memory to the display, a shortage of memory on the graphics nodes, and small message buffers on the graphics nodes. They wrote some algorithms for collecting the columns of pixels from the hypercube nodes, and routing them to the appropriate graphics node. They also would have liked a better interconnection network between the graphics nodes, at least for synchronization.} } @InProceedings{best:cmmdio, author = {Michael L. Best and Adam Greenberg and Craig Stanfill and Lewis W. Tucker}, title = {{CMMD I/O}: A Parallel {Unix I/O}}, booktitle = {Proceedings of the Seventh International Parallel Processing Symposium}, year = {1993}, pages = {489--495}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {Much like Intel CFS, with different I/O modes that determine when the compute nodes synchronize, and the semantics of I/Os written to the file. They found it hard to get good bandwidth for independent I/Os, as opposed to coordinated I/Os; part of this was due to their RAID~3 disk array, but it is more complicated than that. Some performance numbers were given in talk.} } @InProceedings{bestavros:raid, author = {Azer Bestavros}, title = {{IDA}-Based Redundant Arrays of Inexpensive Disks}, booktitle = {Proceedings of the First International Conference on Parallel and Distributed Information Systems}, year = {1991}, pages = {2--9}, keyword = {RAID, disk array, reliability, parallel I/O, pario bib}, comment = {Uses the Information Dispersal Algorithm (IDA) to generate $n+m$ blocks from $n$ blocks, to tolerate $m$ disk failures; all of the data from the $n$ blocks is hidden in the $n+m$ blocks. Not with the RAID project.} } @InProceedings{bitton:schedule, author = {Dina Bitton}, title = {Arm Scheduling in Shadowed Disks}, booktitle = {Proceedings of IEEE Compcon}, year = {1989}, month = {Spring}, pages = {132--136}, keyword = {parallel I/O, disk shadowing, reliability, disk mirroring, disk optimization, pario bib}, comment = {Goes further than bitton:shadow. Uses simulation to verify results from that paper, which were expressions for the expected seek distance of shadowed disks, using shortest-seek-time arm scheduling. Problem is her assumption that arm positions stay independent, in the face of correlating effects like writes, which move all arms to the same place. Simulations match model only barely, and only in some cases. Anyway, shadowed disks can improve performance for workloads more than 60 or 70\% reads.} } @InProceedings{bitton:shadow, author = {D. Bitton and J. Gray}, title = {Disk Shadowing}, booktitle = {14th International Conference on Very Large Data Bases}, year = {1988}, pages = {331--338}, keyword = {parallel I/O, disk shadowing, reliability, disk mirroring, disk optimization, pario bib}, comment = {Also TR UIC EECS 88-1 from Univ of Illinois at Chicago. Shadowed disks are mirroring with more than 2 disks. Writes to all disks, reads from one with shortest seek time. Acknowledges but ignores problem posed by lo:disks. Also considers that newer disk technology does not have linear seek time $(a+bx)$ but rather $(a+b\sqrt{x})$. Shows that with either seek distribution the average seek time for workloads with at least 60\% reads decreases in the number of disks. See also bitton:schedule.} } @Article{boral:bubba, author = {Haran Boral and William Alexander and Larry Clay and George Copeland and Scott Danforth and Michael Franklin and Brian Hart and Marc Smith and Patrick Valduriez}, title = {Prototyping {Bubba}, a Highly Parallel Database System}, journal = {IEEE Transactions on Knowledge and Data Engineering}, year = {1990}, month = {March}, volume = {2}, number = {1}, keyword = {parallel I/O, database, disk caching, pario bib}, comment = {More recent than copeland:bubba, and a little more general. This gives few details, and doesn't spend much time on the parallel I/O. Bubba does use parallel independent disks, with a significant effort to place data on the disks, and do the work local to the disks, to balance the load and minimize interprocessor communication. Also they use a single-level store (i.e., memory-mapped files) to improve performance of their I/O system, including page locking that is assisted by the MMU. The OS has hooks for the database manager to give memory-management policy hints.} } @InProceedings{boral:critique, author = {H. Boral and D. {DeWitt}}, title = {Database machines: an idea whose time has passed?}, booktitle = {Proceedings of the Fourth International Workshop on Database Machines}, year = {1983}, pages = {166--187}, publisher = {Springer-Verlag}, keyword = {file access pattern, parallel I/O, database machine, pario bib}, comment = {Improvements in I/O bandwidth crucial for supporting database machines, otherwise highly parallel DB machines are useless (I/O bound). Two ways to do it: 1) synchronized interleaving by using custom controller and regular disks to read/write same track on all disks, which speeds individual accesses. 2) use very large cache (100-200M) to keep blocks to re-use and to do prefetching. But see dewitt:pardbs.} } @TechReport{bordawekar:delta-fs, author = {Rajesh Bordawekar and Alok Choudhary and Juan Miguel Del Rosario}, title = {An Experimental Performance Evaluation of {Touchstone Delta Concurrent File System}}, year = {1992}, number = {SCCS-420}, institution = {NPAC, Syracuse University}, note = {To appear, 1993 International Conference on Supercomputing}, keyword = {performance evaluation, multiprocessor file system, parallel I/O, pario bib}, comment = {Evaluating the Caltech Touchstone Delta (512 nodes, 32 I/O nodes, 64 disks, 8 MB cache per I/O node). Basic measurements of different access patterns and I/O modes. Location in network doesn't seem to matter. Throughput is often limited by the software; at least, the full hardware throughputs are rarely obtained. Sometimes they are compnode-limited, and other times they may be being limited by the cache management. There must be a way to push bottleneck back to the disks .} } @InProceedings{bradley:ipsc2io, author = {David K. Bradley and Daniel A. Reed}, title = {Performance of the {Intel iPSC/2} Input/Output System}, booktitle = {Fourth Conference on Hypercube Concurrent Computers and Applications}, year = {1989}, pages = {141--144}, keyword = {hypercube, parallel I/O, Intel, pario bib}, comment = {Some measurements and simulations of early CFS performance. Looks terrible, but they disclaim that it is a beta version of the first CFS. They determined that the disks are the bottleneck. But this may just imply that they need more disks. Their parallel synthetic applications had each process read a separate file. Files were too short (16K??). CFS had ridiculous traffic overhead. Again, this was beta CFS.} } @InProceedings{brezany:hpf, author = {Peter Brezany and Michael Gernt and Piyush Mehotra and Hans Zima}, title = {Concurrent File Operations in a {High Performance FORTRAN}}, booktitle = {Proceedings of Supercomputing '92}, year = {1992}, pages = {230--237}, keyword = {supercomputing, fortran, multiprocessor file system interface, pario bib}, comment = {Describing their way of writing arrays to files so that they are written in a fast, parallel way, and so that (if read in same distribution) they can be read fast and parallel. Normal read and write forces standard ordering, but cread and cwrite uses a compiler and runtime selected ordering, which is stored in the file so it can be used when rereading. Good for temp files.} } @InProceedings{broom:acacia, author = {Bradley M. Broom}, title = {A Synchronous File Server for Distributed File Systems}, booktitle = {Proceedings of the 16th Australian Computer Science Conference}, year = {1993}, keyword = {distributed file system, pario bib}, comment = {See broom:acacia-tr. See also broom:impl, lautenbach:pfs, mutisya:cache, and broom:cap.} } @TechReport{broom:acacia-tr, author = {Bradley M. Broom}, title = {A Synchronous File Server for Distributed File Systems}, year = {1992}, month = {August}, number = {TR--CS--92--12}, institution = {Dept. of Computer Science, Australian National University}, keyword = {distributed file system, pario bib}, comment = {This paper is not specifically about parallel I/O, but the file system will be used in the AP-1000 multiprocessor. Acacia is a file server that is optimized for synchronous writes, like those used in stateless protocols (eg, NFS). It writes inodes in blocks in any free location that is close to the current head position, using indirect inode blocks to track those. Indirect blocks are in turn written anywhere convenient, and their positions are tracked by the superblock. There is one slot in each cylinder reserved for the superblock, which is timestamped. They get good performance but claim to need a better implementation, and a faster allocation algorithm. No indication of effect on read performance. Cite broom:acacia.} } @InProceedings{broom:cap, author = {Bradley M. Broom and Robert Cohen}, title = {Acacia: A Distributed, Parallel File System for the {CAP-II}}, booktitle = {Proceedings of the First Fujitsu-ANU CAP Workshop}, year = {1990}, month = {November}, keyword = {distributed file system, multiprocessor file system, pario bib}, comment = {See also broom:acacia, broom:impl, lautenbach:pfs, and mutisya:cache.} } @InProceedings{broom:impl, author = {Bradley M. Broom}, title = {Implementation and Performance of the Acacia File System}, booktitle = {Proceedings of the Second Fujitsu-ANU CAP Workshop}, year = {1991}, month = {November}, keyword = {distributed file system, multiprocessor file system, pario bib}, comment = {See also broom:acacia, lautenbach:pfs, mutisya:cache, and broom:cap.} } @InProceedings{browne:io-arch, author = {J. C. Browne and A. G. Dale and C. Leung and R. Jenevein}, title = {A Parallel Multi-Stage {I/O} Architecture with Self-managing Disk Cache for Database Management Applications}, booktitle = {Proceedings of the Fourth International Workshop on Database Machines}, year = {1985}, month = {March}, publisher = {Springer-Verlag}, keyword = {parallel I/O, disk caching, database, pario bib}, comment = {A fancy interconnection from procs to I/O processors, intended mostly for DB applications, that uses cache at I/O end and a switch with smarts. Cache is associative. Switch helps out in sort and join operations.} } @Article{cabrera:pario, author = {Luis-Felipe Cabrera and Darrell D. E. Long}, title = {Swift: {Using} Distributed Disk Striping to Provide High {I/O} Data Rates}, journal = {Computing Systems}, year = {1991}, month = {Fall}, volume = {4}, number = {4}, keyword = {parallel I/O, disk striping, distributed file system, pario bib}, comment = {See cabrera:swift, cabrera:swift2. Describes the performance of a Swift prototype and simulation results. They stripe data over multiple disk servers (here SPARC SLC with local disk), and access it from a SPARC2 client. Their prototype gets nearly linear speedup for reads and asynchronous writes; synchronous writes are slower. They hit the limit of the Ethernet and/or the client processor with three disk servers. Adding another Ethernet allowed them to go higher. Simulation shows good scaling. Seems like a smarter implementation would help, as would special- purpose parity-computation hardware. Good arguments for use of PID instead of RAID, to avoid a centralized controller that is both a bottleneck and a single point of failure.} } @TechReport{cabrera:pariotr, author = {Luis-Felipe Cabrera and Darrell D. E. Long}, title = {Swift: {Using} Distributed Disk Striping to Provide High {I/O} Data Rates}, year = {1991}, number = {CRL-91-46}, institution = {UC Santa Cruz}, note = {Appeared in {\em Computing Systems}}, keyword = {parallel I/O, disk striping, distributed file system, pario bib}, comment = {Cite cabrera:pario. See that for notes.} } @TechReport{cabrera:stripe, author = {Luis-Felipe Cabrera and Darell D. E. Long}, title = {Using Data Striping in a Local Area Network}, year = {1992}, month = {March}, number = {UCSC-CRL-92-09}, institution = {Univ. California at Santa Cruz}, keyword = {striping, parallel I/O, distributed system, pario bib}, comment = {See cabrera:swift2, cabrera:swift, cabrera:pario. Not much new here. Simulates higher-performance architectures. Shows reasonable scalability. Counts 5 inst/byte for parity computation.} } @TechReport{cabrera:swift, author = {Luis-Felipe Cabrera and Darrell D. E. Long}, title = {Swift: A Storage Architecture fo Large Objects}, year = {1990}, number = {UCSC-CRL-89-04}, institution = {U.C. Santa Cruz}, keyword = {parallel I/O, disk striping, distributed file system, multimedia, pario bib}, comment = {See cabrera:swift2. A brief outline of a design for a high-performance storage system, designed for storing and retrieving large objects like color video or visualization data at very high speed. They distribute data over several ``storage agents'', which are some form of disk or RAID. They are all connected by a high-speed network. A ``storage manager'' decides where to spread each file, what kind of reliability mechanism is used. User provides preallocation info such as size, reliability level, data rate requirements, and so forth.} } @InProceedings{cabrera:swift2, author = {Luis-Felipe Cabrera and Darell D. E. Long}, title = {Exploiting Multiple {I/O} Streams to Provide High Data-Rates}, booktitle = {Proceedings of the 1991 Summer Usenix Conference}, year = {1991}, pages = {31--48}, keyword = {parallel I/O, disk striping, distributed file system, multimedia, pario bib}, comment = {See also cabrera:swift. More detail than the other paper. Experimental results from a prototype that stripes files across a distributed file system. Gets almost linear speedup in certain cases. Much better than NFS. Simulation to extend it to larger systems.} } @InProceedings{cao:tickertaip, author = {Pei Cao and Swee Boon Lim and Shivakumar Venkataraman and John Wilkes}, title = {The {TickerTAIP} parallel {RAID} architecture}, booktitle = {Proceedings of the 20th Annual International Symposium on Computer Architecture}, year = {1993}, pages = {52--63}, keyword = {parallel I/O, RAID, pario bib}, comment = {See cao:tickertaip-tr.} } @TechReport{cao:tickertaip-tr, author = {Pei Cao and Swee Boon Lim and Shivakumar Venkataraman and John Wilkes}, title = {The {TickerTAIP} parallel {RAID} architecture}, year = {1992}, month = {December}, number = {HPL-92-151}, institution = {HP Labs}, keyword = {parallel I/O, RAID, pario bib}, comment = {A parallelized RAID architecture that distributes the RAID controller operations across several worker nodes. Multiple hosts can connect to different workers, allowing multiple paths into the array. The workers then communicate on their own fast interconnect to accomplish the requests, distributing parity computations across multiple workers. They get much better performance and reliability than plain RAID. They built a prototype and a performance simulator. Two-phase commit was needed for request atomicity, and a request sequencer was needed for serialization. Also found it was good to give the whole request info to all workers and to let them figure out what to do and when. Cite cao:tickertaip.} } @TechReport{carter:benchmark, author = {Russell Carter and Bob Ciotti and Sam Fineberg and Bill Nitzberg}, title = {{NHT-1} {I/O} Benchmarks}, year = {1992}, month = {November}, number = {RND-92-016}, institution = {NAS Systems Division, NASA Ames}, keyword = {parallel I/O, benchmark, pario bib}, comment = {Specs for three scalable-I/O benchmarks to be used for evaluating I/O for multiprocessors. One measures application I/O by mixing I/O and computation, one measures max disk I/O by reading and writing 80\% of the total RAM memory, and the last one is for sending that data from the file system, through the network, and back. See fineberg:nht1.} } @TechReport{chao:datamesh, author = {Chia Chao and Robert English and David Jacobson and Bart Sears and Alexander Stepanov and John Wilkes}, title = {{DataMesh} architecture 1.0}, year = {1992}, month = {December}, number = {HPL-92-153}, institution = {HP Labs}, keyword = {parallel I/O, parallel file system, pario bib}, comment = {A more detailed spec of the datamesh architecture, specifying components and operations. It is a block server where blocks are associatively addressed by tags. Some search operations are supported, as are atomic tag-changing operations. See also cao:tickertaip, wilkes:datamesh1, wilkes:datamesh, wilkes:houses, wilkes:lessons.} } @InProceedings{chen:eval, author = {Peter Chen and Garth Gibson and Randy Katz and David Patterson}, title = {An Evaluation of Redundant Arrays of Disks using an {Amdahl 5890}}, booktitle = {Proceedings of the 1990 ACM Sigmetrics Conference on Measurement and Modeling of Computer Systems}, year = {1990}, month = {May}, pages = {74--85}, keyword = {parallel I/O, RAID, disk array, pario bib}, comment = {A experimental validation of the performance predictions of patterson:raid, plus some extensions. Confirms that RAID level 5 (rotated parity) is best for large read/writes, and RAID level 1 (mirroring) is best for small reads/writes.} } @InProceedings{chen:maxraid, author = {Peter M. Chen and David A. Patterson}, title = {Maximizing Performance in a Striped Disk Array}, booktitle = {Proceedings of the 17th Annual International Symposium on Computer Architecture}, year = {1990}, pages = {322--331}, keyword = {parallel I/O, RAID, disk striping, pario bib}, comment = {Choosing the optimal striping unit, i.e., size of contiguous data on each disk (bit, byte, block, {\em etc.}). A small striping unit is good for low-concurrency workloads since it increases the parallelism applied to each request, but a large striping unit can support high-concurrency workloads where each independent request depends on fewer disks. They do simulations to find throughput, and thus to pick the striping unit. They find equations for the best compromise striping unit based on the concurrency and the disk parameters, or on the disk parameters alone. Some key assumptions may limit applicability, but this is not addressed.} } @TechReport{chen:raid, author = {Peter Chen and Garth Gibson and Randy Katz and David Patterson and Martin Schulze}, title = {Two papers on {RAIDs}}, year = {1988}, month = {December}, number = {UCB/CSD 88/479}, institution = {UC Berkeley}, keyword = {parallel I/O, RAID, disk array, pario bib}, comment = {Basically an updated version of patterson:raid and the prepublished version of gibson:failcorrect.} } @InProceedings{chen:raid2, author = {Peter M. Chen and Edward K. Lee and Ann L. Drapeau and Ken Lutz and Ethan L. Miller and Srinivasan Seshan and Ken Shirriff and David A. Patterson and Randy H. Katz}, title = {Performance and Design Evaluation of the {RAID-II} Storage Server}, booktitle = {IPPS~'93 Workshop on Input/Output in Parallel Computer Systems}, year = {1993}, pages = {110--120}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {A special back-end box for a Sun4 file server, that hooks a HIPPI network through a crossbar to fast memory, a parity engine, and a bunch of disks on SCSI. They pulled about 20~MB/s through it, basically disk-limited; with more disks they would hit 32--40~MB/s. Much improved over RAID-I, which was limited by the memory bandwidth of the Sun4 server.} } @InProceedings{chervenak:raid, author = {Ann L. Chervenak and Randy H. Katz}, title = {Performance of a Disk Array Prototype}, booktitle = {Proceedings of the 1991 ACM Sigmetrics Conference on Measurement and Modeling of Computer Systems}, year = {1991}, pages = {188--197}, keyword = {parallel I/O, disk array, performance evaluation, RAID, pario bib}, comment = {Measuring the performance of a RAID prototype with a Sun4/280, 28 disks on 7 SCSI strings, using 4 HBA controllers on a VME bus from the Sun. The found lots of bottlenecks really slowed them down. Under Sprite, the disks were the bottleneck for single disk I/O, single disk B/W, and string I/O. Sprite was a bottleneck for single disk I/O and String I/O. The host memory was a bottleneck for string B/W, HBA B/W, overall I/O, and overall B/W. With a simpler OS, that saved on data copying, they did better, but were still limited by the HBA, SCSI protocol, or the VME bus. Clearly they needed more parallelism in the busses and control system.} } @Manual{convex:stripe, title = {{CONVEX UNIX} Programmer's Manual, Part I}, edition = {Eighth}, year = {1988}, month = {October}, organization = {CONVEX Computer Corporation}, address = {Richardson, Texas}, keyword = {parallel I/O, parallel file system, striping, pario bib}, comment = {Implementation of striped disks on the CONVEX. Uses partitions of normal device drivers. Kernel data structure knows about the interleaving granularity, the set of partitions, sizes, etc.} } @InProceedings{copeland:bubba, author = {George Copeland and William Alexander and Ellen Boughter and Tom Keller}, title = {Data Placement in {Bubba}}, booktitle = {ACM SIGMOD Conference}, year = {1988}, month = {June}, pages = {99--108}, keyword = {parallel I/O, database, disk caching, pario bib}, comment = {A database machine. Experimental/analytical model of a placement algorithm that declusters relations across several parallel, independent disks. The declustering is done on a subset of the disks, and the choices involved are the number of disks to decluster onto, which relations to put where, and whether a relation should be cache-resident. Communications overhead limits the usefulness of declustering in some cases, depending on the workload. See boral:bubba.} } @InProceedings{corbett:vesta, author = {Peter F. Corbett and Sandra Johnson Baylor and Dror G. Feitelson}, title = {Overview of the {Vesta} Parallel File System}, booktitle = {IPPS~'93 Workshop on Input/Output in Parallel Computer Systems}, year = {1993}, pages = {1--16}, keyword = {parallel I/O, multiprocessor file system, concurrent file checkpointing, multiprocessor file system interface, pario bib}, comment = {Design of a file system for a message-passing MIMD multiprocessor to be used for scientific computing. Separate I/O nodes from compute nodes; I/O nodes and disks are viewed as a data-staging area. File system runs on I/O nodes only. Files declustered by record, among physical partitions, each residing on a separate disk, and each separately growable. Then the user maps logical partitions, one per process, on the file at open time. These are designed to be two-dimensional, so that mapping arrays of various strides and contiguities, with records as the basic unit, is easy. Various consistency and atomicity requirements. File checkpointing, really snapshotting, is built in. No client caching, no redundancy for reliability.} } @InProceedings{cormen:bmmc, author = {Thomas H. Cormen and Leonard F. Wisniewski}, title = {Asymptotically Tight Bounds for Performing {BMMC} Permutations on Parallel Disk Systems}, booktitle = {Proceedings of the 5th Annual ACM Symposium on Parallel Algorithms and Architectures}, year = {1993}, month = {June}, pages = {130--139}, keyword = {parallel I/O, algorithm, pario bib} } @InProceedings{cormen:integrate, author = {Thomas H. Cormen and David Kotz}, title = {Integrating Theory and Practice in Parallel File Systems}, booktitle = {Proceedings of the 1993 DAGS/PC Symposium}, year = {1993}, month = {June}, pages = {64--74}, organization = {Dartmouth Institute for Advanced Graduate Studies}, address = {Hanover, NH}, note = {Revised from Dartmouth PCS-TR93-188.}, keyword = {parallel I/O, multiprocessor file systems, algorithm, file system interface, dfk, pario bib}, abstract = {Several algorithms for parallel disk systems have appeared in the literature recently, and they are asymptotically optimal in terms of the number of disk accesses. Scalable systems with parallel disks must be able to run these algorithms. We present for the first time a list of capabilities that must be provided by the system to support these optimal algorithms: control over declustering, querying about the configuration, independent I/O, and turning off parity, file caching, and prefetching. We summarize recent theoretical and empirical work that justifies the need for these capabilities. In addition, we sketch an organization for a parallel file interface with low-level primitives and higher-level operations.}, comment = {Describing the file system capabilities needed by parallel I/O algorithms to effectively use a parallel disk system.} } @TechReport{cormen:integrate-tr, author = {Thomas H. Cormen and David Kotz}, title = {Integrating Theory and Practice in Parallel File Systems}, year = {1993}, month = {March}, number = {PCS-TR93--188}, institution = {Dartmouth College}, note = {Appeared in Proceedings of the 1993 DAGS/PC Symposium}, keyword = {parallel I/O, multiprocessor file systems, algorithm, file system interface, dfk, pario bib}, abstract = {Several algorithms for parallel disk systems have appeared in the literature recently, and they are asymptotically optimal in terms of the number of disk accesses. Scalable systems with parallel disks must be able to run these algorithms. We present for the first time a list of capabilities that must be provided by the system to support these optimal algorithms: control over declustering, querying about the configuration, independent I/O, and turning off parity, file caching, and prefetching. We summarize recent theoretical and empirical work that justifies the need for these capabilities. In addition, we sketch an organization for a parallel file interface with low-level primitives and higher-level operations.}, comment = {Describing the file system capabilities needed by parallel I/O algorithms to effectively use a parallel disk system. Cite cormen:integrate.} } @Article{cormen:permute, author = {Thomas H. Cormen}, title = {Fast Permuting on Disk Arrays}, journal = {Journal of Parallel and Distributed Computing}, year = {1993}, month = {January and February}, volume = {17}, number = {1--2}, pages = {41--57}, keyword = {parallel I/O algorithm, pario bib}, comment = {See also cormen:thesis.} } @PhdThesis{cormen:thesis, author = {Thomas H. Cormen}, title = {Virtual Memory for Data-Parallel Computing}, year = {1992}, school = {Department of Electrical Engineering and Computer Science, Massachusetts Institute of Technology}, keyword = {parallel I/O, algorithm, pario bib}, comment = {Lots of algorithms for out-of-core permutation problems. See also cormen:permute, cormen:integrate.} } @Misc{cray:pario2, key = {Cray90}, author = {Cray Research}, title = {{DS-41} Disk Subsystem}, year = {1990}, note = {Sales literature number MCFS-4-0790}, keyword = {parallel I/O, disk architecture, disk array, pario bib}, comment = {Glossy from Cray describing their new disk subsystem: up two four controllers and up to four ``drives'', each of which actually have four spindles. Thus, a full subsystem has 16 disks. Each drive or controller sustains 9.6 MBytes/sec sustained, for a total of 38.4 MBytes/sec. Each drive has 4.8 GBytes, for a total of 19.2 Gbytes. Access time per drive is 2--46.6 msec, average 24 msec. They don't say how the 4 spindles within a driver are controlled or arranged.} } @Unpublished{crockett:manual, author = {Thomas W. Crockett}, title = {Specification of the Operating System Interface for Parallel File Organizations}, year = {1988}, note = {Publication status unknown (ICASE technical report)}, keyword = {parallel I/O, parallel file system, pario bib}, comment = {Man pages for his Flex version of file interface. See crockett:par-files.} } @InProceedings{crockett:par-files, author = {Thomas W. Crockett}, title = {File Concepts for Parallel {I/O}}, booktitle = {Proceedings of Supercomputing '89}, year = {1989}, pages = {574--579}, keyword = {parallel I/O, file access pattern, parallel file system, pario bib}, comment = {Two views of a file: global (for sequential programs) and internal (for parallel programs). Standardized forms for these views, for long-lived files. Temp files have specialized forms. The access types are sequential, partitioned, interleaved, and self-scheduled, plus global random and partitioned random. He relates these to their best storage patterns. No mention of prefetching. Buffer cache only needed for direct (random) access. The application must specify the access pattern desired.} } @Article{csa-io, author = {T. J. M.}, title = {Now: Parallel storage to match parallel {CPU} power}, journal = {Electronics}, year = {1988}, month = {December}, volume = {61}, number = {12}, pages = {112}, keyword = {parallel I/O, disk array, pario bib} } @Article{debenedictis:modular, author = {Erik P. DeBenedictis and Juan Miguel {del Rosario}}, title = {Modular Scalable {I/O}}, journal = {Journal of Parallel and Distributed Computing}, year = {1993}, month = {January and February}, volume = {17}, number = {1--2}, pages = {122--128}, keyword = {parallel I/O, MIMD, pario bib}, comment = {Journalized version of debenedictis:pario, debenedictis:ncube, and delrosario:nCUBE.} } @InProceedings{debenedictis:ncube, author = {Erik DeBenedictis and Juan Miguel del Rosario}, title = {{nCUBE} Parallel {I/O} Software}, booktitle = {Eleventh Annual IEEE International Phoenix Conference on Computers and Communications (IPCCC)}, year = {1992}, month = {April}, pages = {0117--0124}, keyword = {parallel file system, parallel I/O, pario bib}, comment = {Interesting paper. Describes their mechanism for mapping I/O so that the file system knows both the mapping of a data structure into memory and on the disks, so that it can do the permutation and send the right data to the right disk, and back again. Interesting Unix-compatible interface. Needs to be extended to handle complex formats.} } @InProceedings{debenedictis:pario, author = {Erik DeBenedictis and Peter Madams}, title = {{nCUBE's} Parallel {I/O} with {Unix} Capability}, booktitle = {Sixth Annual Distributed-Memory Computer Conference}, year = {1991}, pages = {270--277}, keyword = {parallel I/O, multiprocessor file system, file system interface, pario bib}, comment = {Looks like they give the byte-level mapping, then do normal reads and writes; mapping routes the data to and from the correct place. But it does let you intermix comp with I/O. Elegant concept. Nice interface. Works best for cases where 1) data layout known in advance, data format is known, and mapping is regular enough for easy specification. I think that irregular or unknown mappings could still be done with a flat mapping.} } @Article{debenedictis:scalable-unix, author = {Erik P. DeBenedictis and Stephen C. Johnson}, title = {Extending {Unix} for Scalable Computing}, journal = {IEEE Computer}, year = {1993}, note = {To appear}, keyword = {parallel I/O, Unix, pario bib}, comment = {A more polished version of his other papers with del Rosario. The mapping-based mechanism is released in nCUBE software 3.0. It does support shared file pointers for self-scheduled I/O, as well as support for variable-length records, and asynchronous I/O (although the primary mechanism is for synchronous, i.e., SPMD, I/O). The basic idea of scalable pipes (between programs, devices, {\em etc.}) with mappings that determine routings to units seems like a good idea.} } @Article{delrosario:ncube, author = {Juan Miguel del Rosario}, title = {High Performance Parallel {I/O} on the {nCUBE}~2}, journal = {Institute of Electronics, Information and Communications Engineers (Transactions)}, year = {1992}, month = {August}, note = {To appear}, keyword = {parallel I/O, parallel file system, pario bib}, comment = {More detail on the mapping functions, and more flexible mapping functions (can be user specified, or some from a library). Striped disks, parallel pipes, graphics, and HIPPI supported.} } @InProceedings{delrosario:two-phase, author = {Juan Miguel {del Rosario} and Rajesh Bordawekar and Alok Choudhary}, title = {Improved Parallel {I/O} via a Two-Phase Run-time Access Strategy}, booktitle = {IPPS~'93 Workshop on Input/Output in Parallel Computer Systems}, year = {1993}, pages = {56--70}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {See comments for delrosario:two-phase-tr.} } @TechReport{delrosario:two-phase-tr, author = {Juan Miguel del Rosario and Rajesh Bordawekar and Alok Choudhary}, title = {Improving Parallel {I/O} Performance using a Two-Phase Access Strategy}, year = {1993}, number = {SCCS--406}, institution = {NPAC at Syracuse University}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {They show performance measurements of various data distributions on an nCUBE and the Touchstone Delta, for reading matrix from a column-major file striped across disks, into some distribution across procs. Distributions that don't match the I/O distribution are really terrible, due to having more, smaller requests, and sometimes mismatching the stripe size (getting seg-like contention) or block size (reading partial blocks). They find it is better to read the file using the `best' distribution, then to reshuffle the data in memory. Big speedups.} } @TechReport{dewitt:gamma, author = {David J. {DeWitt} and Robert H. Gerber and Goetz Graefe and Michael L. Heytens and Krishna B. Kumar and M. Muralikrishna}, title = {{GAMMA}: A High Performance Dataflow Database Machine}, year = {1986}, month = {March}, number = {TR-635}, institution = {Dept. of Computer Science, Univ. of Wisconsin-Madison}, keyword = {parallel I/O, database, GAMMA, pario bib}, comment = {Better to cite dewitt:gamma3. Multiprocessor (VAX) DBMS on a token ring with disk at each processor. They thought this was better than separating disks from processors by network since then network must handle {\em all} I/O rather than just what needs to move. Conjecture that shared memory might be best interconnection network. Relations are horizontally partitioned in some way, and each processor reads its own set and operates on them there.} } @InProceedings{dewitt:gamma-dbm, author = {David J. DeWitt and Shahram Ghandeharizadeh and Donovan Schneider}, title = {A Performance Analysis of the {GAMMA} Database Machine}, booktitle = {ACM SIGMOD Conference}, year = {1988}, month = {June}, pages = {350--360}, keyword = {parallel I/O, database, performance analysis, Teradata, GAMMA, pario bib}, comment = {Compared Gamma with Teradata. Various operations on big relations. See fairly good linear speedup in many cases. They vary only one variable at a time. Their bottleneck was at the memory-network interface.} } @InProceedings{dewitt:gamma2, author = {David J. DeWitt and Robert H. Gerber and Goetz Graefe and Michael L. Heytens and Krishna B. Kumar and M. Muralikrishna}, title = {{GAMMA} --- {A} High Performance Dataflow Database Machine}, booktitle = {12th International Conference on Very Large Data Bases}, year = {1986}, pages = {228--237}, keyword = {parallel I/O, database, GAMMA, pario bib}, comment = {Almost identical to dewitt:gamma, with some updates. See that for comments, but cite this one. See also dewitt:gamma3 for a more recent paper.} } @Article{dewitt:gamma3, author = {David J. DeWitt and Shahram Ghandeharizadeh and Donovan A. Schneider and Allan Bricker and Hui-I Hsaio and Rick Rasmussen}, title = {The {Gamma} Database Machine Project}, journal = {IEEE Transactions on Knowledge and Data Engineering}, year = {1990}, month = {March}, volume = {2}, number = {1}, pages = {44--62}, keyword = {parallel I/O, database, GAMMA, pario bib}, comment = {An updated version of dewitt:gamma2, with elements of dewitt:gamma-dbm. Really only need to cite this one. This is the same basic idea as dewitt:gamma2, but after they ported the system from the VAXen to an iPSC/2. Speedup results good. Question: how about comparing it to a single-processor, single-disk system with increasing disk bandwidth? That is, how much of their speedup comes from the increasing disk bandwidth, and how much from the actual use of parallelism?} } @Article{dewitt:pardbs, author = {David DeWitt and Jim Gray}, title = {Parallel Database Systems: The Future of High-Performance Database Systems}, journal = {Communications of the ACM}, year = {1992}, month = {June}, volume = {35}, number = {6}, pages = {85--98}, keyword = {database, parallel computing, parallel I/O, pario bib}, comment = {They point out that the comments of boral:critique --- that database machines were doomed --- did really not come true. Their new thesis is that specialized hardware is not necessary and has not been successful, but that parallel database systems are clearly succesful. In particular, they argue for shared-nothing layouts. They survey the state-of-the-art parallel DB systems. Earlier version in Computer Architecture News 12/90.} } @InProceedings{dewitt:parsort, author = {David J. DeWitt and Jeffrey F. Naughton and Donovan A. Schneider}, title = {Parallel Sorting on a Shared-Nothing Architecture using Probabilistic Splitting}, booktitle = {Proceedings of the First International Conference on Parallel and Distributed Information Systems}, year = {1991}, pages = {280--291}, keyword = {parallel I/O, parallel database, external sorting, pario bib}, comment = {Comparing exact and probabilistic splitting for external sorting on a database. Model and experimental results from Gamma machine. Basically, the idea is to decide on a splitting vector, which defines $N$ buckets for an $N$-process program, and have each program read its initial segment of the data and send each element to the appropriate bucket (other process). All elements received are written to disks as small sorted runs. Then each process mergesorts its runs. Probabilistic split uses only a sample of the elements to define the vector.} } @InProceedings{dibble:bridge, author = {Peter Dibble and Michael Scott and Carla Ellis}, title = {Bridge: {A} High-Performance File System for Parallel Processors}, booktitle = {Proceedings of the Eighth International Conference on Distributed Computer Systems}, year = {1988}, month = {June}, pages = {154--161}, keyword = {Carla, Bridge, multiprocessor file system, Butterfly, parallel I/O, pario bib}, comment = {See ellis:interleaved, dibble:*} } @Article{dibble:sort, author = {Peter C. Dibble and Michael L. Scott}, title = {External Sorting on a Parallel Interleaved File System}, journal = {University of Rochester 1989--90 Computer Science and Engineering Research Review}, year = {1989}, keyword = {parallel I/O, sorting, merging, parallel file reference pattern, pario bib}, comment = {Cite dibble:sort2. Based on Bridge file system (see dibble:bridge). Parallel external merge-sort tool. Sort file on each disk, then do a parallel merge. The merge is serialized by the token-passing mechanism, but the I/O time dominates. The key is to keep disks busy constantly. Uses some read-ahead, write-behind to control fluctuations in disk request timing. Analytical analysis of the algorithm lends insight and matches well with the timings. Locality is a big win in Bridge tools.} } @Article{dibble:sort2, author = {Peter C. Dibble and Michael L. Scott}, title = {Beyond Striping: The {Bridge} Multiprocessor File System}, journal = {Computer Architecture News}, year = {1989}, month = {September}, volume = {19}, number = {5}, keyword = {parallel I/O, external sorting, merging, parallel file reference pattern, pario bib}, comment = {Subset of dibble:sort. Extra comments to distinguish from striping and RAID work. Good point that those projects are addressing a different bottleneck, and that they can provide essentially unlimited bandwidth to a single processor. Bridge could use those as individual file systems, parallelizing the overall file system, avoiding the software bottleneck. Using a very-reliable RAID at each node in Bridge could safeguard Bridge against failure for reasonable periods, removing reliability from Bridge level.} } @PhdThesis{dibble:thesis, author = {Peter C. Dibble}, title = {A Parallel Interleaved File System}, year = {1990}, month = {March}, school = {University of Rochester}, keyword = {parallel I/O, external sorting, merging, parallel file system, pario bib}, comment = {Also TR 334. Mostly covered by other papers, but includes good introduction, discussion of reliability and maintenance issues, and implementation. Short mention of prefetching implied that simple OBL was counter-productive, but later tool-specific buffering with read-ahead was often important. The three interfaces to the PIFS server are interesting. A fourth compromise might help make tools easier to write.} } @Article{dunigan:hypercubes, author = {T. H. Dunigan}, title = {Performance of the {Intel iPSC/860} and {Ncube 6400} hypercubes}, journal = {Parallel Computing}, year = {1991}, volume = {17}, pages = {1285--1302}, keyword = {intel, ncube, hypercube, multiprocessor architecture, performance, parallel I/O, pario bib}, comment = {An excellent paper presenting lots of detailed performance measurements on the iPSC/1, iPSC/2, iPSC/860, nCUBE 3200, and nCUBE 6400: arithmetic, FLOPS, communication, I/O. Tables of numbers provide details needed for simulation. iPSC/860 definitely is fastest, but way out of balance wrt communication vs. computation. Number of message hops is not so important in newer machines.} } @TechReport{edelson:pario, author = {Daniel Edelson and Darrell D. E. Long}, title = {High Speed Disk {I/O} for Parallel Computers}, year = {1990}, month = {January}, number = {UCSC-CRL-90-02}, institution = {Baskin Center for Computer Engineering and Information Science}, keyword = {parallel I/O, disk caching, parallel file system, log-structured file system, Intel iPSC/2, pario bib}, comment = {Essentially a small literature survey. No new ideas here, but it is a reasonable overview of the situation. Mentions caching, striping, disk layout optimization, log-structured file systems, and Bridge and Intel CFS. Plugs their ``Swift'' architecture (see cabrera:pario).} } @TechReport{ellis:interleaved, author = {Carla Ellis and P. Dibble}, title = {An Interleaved File System for the {Butterfly}}, year = {1987}, month = {January}, number = {CS-1987-4}, institution = {Dept. of Computer Science, Duke University}, keyword = {Carla, multiprocessor file system, Bridge, Butterfly, parallel I/O, pario bib}, comment = {See dibble:bridge} } @InProceedings{ellis:prefetch, author = {Carla Schlatter Ellis and David Kotz}, title = {Prefetching in File Systems for {MIMD} Multiprocessors}, booktitle = {Proceedings of the 1989 International Conference on Parallel Processing}, year = {1989}, month = {August}, pages = {I:306--314}, keyword = {dfk, parallel file system, prefetching, disk caching, MIMD, parallel I/O, pario bib}, abstract = {The problem of providing file I/O to parallel programs has been largely neglected in the development of multiprocessor systems. There are two essential elements of any file system design intended for a highly parallel environment: parallel I/O and effective caching schemes. This paper concentrates on the second aspect of file system design and specifically, on the question of whether prefetching blocks of the file into the block cache can effectively reduce overall execution time of a parallel computation, even under favorable assumptions. Experiments have been conducted with an interleaved file system testbed on the Butterfly Plus multiprocessor. Results of these experiments suggest that 1) the hit ratio, the accepted measure in traditional caching studies, may not be an adequate measure of performance when the workload consists of parallel computations and parallel file access patterns, 2) caching with prefetching can significantly improve the hit ratio and the average time to perform an I/O operation, and 3) an improvement in overall execution time has been observed in most cases. In spite of these gains, prefetching sometimes results in increased execution times (a negative result, given the optimistic nature of the study). We explore why is it not trivial to translate savings on individual I/O requests into consistently better overall performance and identify the key problems that need to be addressed in order to improve the potential of prefetching techniques in this environment.}, comment = {Superseded by kotz:prefetch.} } @InProceedings{fineberg:nht1, author = {Samuel A. Fineberg}, title = {Implementing the {NHT-1} application {I/O} benchmark}, booktitle = {IPPS~'93 Workshop on Input/Output in Parallel Computer Systems}, year = {1993}, pages = {37--55}, keyword = {parallel I/O, multiprocessor file system, benchmark, pario bib}, comment = {See also carter:benchmark. Some preliminary results from one of their benchmarks. Note: ``I was only using a single Cray disk with a maximum transfer rate of 9.6MBytes/sec.'' --- Fineberg.} } @InProceedings{flynn:hyper-fs, author = {Robert J. Flynn and Haldun Hadimioglu}, title = {A Distributed Hypercube File System}, booktitle = {Third Conference on Hypercube Concurrent Computers and Applications}, year = {1988}, pages = {1375--1381}, keyword = {parallel I/O, hypercube, parallel file system, pario bib}, comment = {For hypercube-like architectures. Interleaved files, though flexible. Separate network for I/O, maybe not hypercube. I/O is blocked and buffered -- no coherency or prefetching issues discussed. Buffered close to point of use. Parallel access is ok. Broadcast supported? I/O nodes distinguished from comp nodes. I/O hooked to front-end too. See hadimioglu:fs and hadimioglu:hyperfs} } @TechReport{foster:climate, author = {Ian Foster and Mark Henderson and Rick Stevens}, title = {Data Systems for Parallel Climate Models}, year = {1991}, month = {July}, number = {ANL/MCS-TM-169}, institution = {Argonne National Laboratory}, note = {Copies of slides from a workshop by this title, with these organizers.}, keyword = {parallel I/O, parallel database, multiprocessor file system, climate model, grand challenge, tertiary storage, archival storage, RAID, tape robot, pario bib}, comment = {Includes the slides from many presenters covering climate modeling, data requirements for climate models, archival storage systems, multiprocessor file systems, and so forth. NCAR data storage growth rates (p.~54), 500 bytes per MFlop, or about 8~TB/year with Y/MP-8. Average file length 26.2~MB. Migration across both storage hierarchy and generations of media. LLNL researcher: typical 50-year, 3-dimensional model with 5-degree resolution will produce 75~GB of output. Attendee list included.} } @Book{fox:cubes, author = {G. Fox and M. Johnson and G. Lyzenga and S. Otto and J. Salmon and D. Walker}, title = {Solving Problems on Concurrent Processors}, year = {1988}, volume = {1}, publisher = {Prentice Hall}, address = {Englewood Cliffs, NJ}, keyword = {hypercube, pario bib}, comment = {See fox:cubix for parallel I/O.} } @InBook{fox:cubix, author = {G. Fox and M. Johnson and G. Lyzenga and S. Otto and J. Salmon and D. Walker}, title = {Solving Problems on Concurrent Processors}, chapter = {6 and 15}, year = {1988}, volume = {1}, publisher = {Prentice Hall}, address = {Englewood Cliffs, NJ}, keyword = {parallel file system, hypercube, pario bib}, comment = {Parallel I/O control, called CUBIX. Interesting method. Depends a lot on ``loose synchronization'', which is sortof SIMD-like.} } @InProceedings{french:balance, author = {James C. French}, title = {Characterizing the Balance of Parallel {I/O} Systems}, booktitle = {Sixth Annual Distributed-Memory Computer Conference}, year = {1991}, pages = {724--727}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {Proposes the min\_SAR, max\_SAR, and ratio phi as measures of aggregate file system bandwidth. Has to do with load balance issues; how well the file system balances between competing nodes in a heavy-use period.} } @InProceedings{french:ipsc2io, author = {James C. French and Terrence W. Pratt and Mriganka Das}, title = {Performance Measurement of a Parallel Input/Output System for the {Intel iPSC/2} Hypercube}, booktitle = {Proceedings of the 1991 ACM Sigmetrics Conference on Measurement and Modeling of Computer Systems}, year = {1991}, pages = {178--187}, keyword = {parallel I/O, Intel iPSC/2, pario bib}, comment = {See french:ipsc2io-tr. Cite french:ipsc2io-jpdc.} } @Article{french:ipsc2io-jpdc, author = {James C. French and Terrence W. Pratt and Mriganka Das}, title = {Performance Measurement of the {Concurrent File System} of the {Intel iPSC/2} Hypercube}, journal = {Journal of Parallel and Distributed Computing}, year = {1993}, month = {January and February}, volume = {17}, number = {1--2}, pages = {115--121}, keyword = {parallel I/O, Intel iPSC/2, pario bib}, comment = {See french:ipsc2io-tr.} } @TechReport{french:ipsc2io-tr, author = {James C. French and Terrence W. Pratt and Mriganka Das}, title = {Performance Measurement of a Parallel Input/Output System for the {Intel iPSC/2} Hypercube}, year = {1991}, number = {IPC-TR-91-002}, institution = {Institute for Parallel Computation, University of Virginia}, note = {Appeared in Journal of Parallel and Distributed Computing}, keyword = {parallel I/O, Intel iPSC/2, disk caching, prefetching, pario bib}, comment = {Cite french:ipsc2io-jpdc. Really nice study of performance of existing CFS system on 32-node + 4 I/O-node iPSC/2. They show big improvements due to declustering, preallocation, caching, and prefetching. See also pratt:twofs.} } @Article{garcia:striping-reliability, author = {Hector Garcia-Molina and Kenneth Salem}, title = {The Impact of Disk Striping on Reliability}, journal = {{IEEE} Database Engineering Bulletin}, year = {1988}, month = {March}, volume = {11}, number = {1}, pages = {26--39}, keyword = {parallel I/O, disk striping, reliability, disk array, pario bib}, comment = {Reliability of striped filesystems may not be as bad as you think. Parity disks help. Performance improvements limited to small number of disks ($n<10$). Good point: efficiency of striping will increase as the gap between CPU/memory performance and disk speed and file size widens. Reliability may be better if measured in terms of performing a task in time T, since the striped version may take less time. This gives disks less opportunity to fail during that period. Also consider the CPU failure mode, and its use over less time.} } @Article{ghosh:hyper, author = {Joydeep Ghosh and Kelvin D. Goveas and Jeffrey T. Draper}, title = {Performance Evaluation of a Parallel {I/O} Subsystem for Hypercube Multiprocessors}, journal = {Journal of Parallel and Distributed Computing}, year = {1993}, month = {January and February}, volume = {17}, number = {1--2}, pages = {90--106}, keyword = {parallel I/O, MIMD, multiprocessor architecture, hypercube, pario bib}, comment = {Given a hypercube that has I/O nodes scattered throughout, they compare a plain one to one that has the I/O nodes also interconnected with a half-size hypercube. They show that this has better performance because the I/O traffic does not interfere with normal inter-PE traffic.} } @Article{gibson:arrays, author = {Garth A. Gibson}, title = {Designing Disk Arrays for High Data Reliability}, journal = {Journal of Parallel and Distributed Computing}, year = {1993}, month = {January/February}, volume = {17}, number = {1--2}, pages = {4--27}, keyword = {parallel I/O, RAID, redundancy, reliability, pario bib} } @Book{gibson:book, author = {Garth A. Gibson}, title = {Redundant Disk Arrays: Reliable, Parallel Secondary Storage}, year = {1992}, series = {ACM Distinguished Dissertations}, publisher = {MIT Press}, keyword = {parallel I/O, disk array, disk striping, reliability, RAID, pario bib}, comment = {Excellent book. Good source for discussion of the access gap and transfer gap, disk lifetimes, parity methods, reliability analysis, and generally the case for RAIDs. Page 220 he briefly discusses multiprocessor I/O architecture.} } @InProceedings{gibson:failcorrect, author = {Garth A. Gibson and Lisa Hellerstein and Richard M. Karp and Randy H. Katz and David A. Patterson}, title = {Failure Correction Techniques for Large Disk Arrays}, booktitle = {Third International Conference on Architectural Support for Programming Languages and Operating Systems}, year = {1989}, month = {April}, pages = {123--132}, keyword = {parallel I/O, disk array, RAID, reliability, pario bib}, comment = {See gibson:raid for comments since it is the same.} } @TechReport{gibson:raid, author = {Garth Gibson and Lisa Hellerstein and Richard Karp and Randy Katz and David Patterson}, title = {Coding techniques for handling failures in large disk arrays}, year = {1988}, month = {December}, number = {UCB/CSD 88/477}, institution = {UC Berkeley}, keyword = {parallel I/O, RAID, reliability, disk array, pario bib}, comment = {Published as gibson:failcorrect. Design of parity encodings to handle more than one bit failure in any group. Their 2-bit correcting codes are good enough for 1000-disk RAIDs that 3-bit correction is not needed.} } @InProceedings{gray:stripe, author = {Jim Gray and Bob Horst and Mark Walker}, title = {Parity Striping of Disk Arrays: Low-cost Reliable Storage with Acceptable Throughput}, booktitle = {Proceedings of the 16th VLDB Conference}, year = {1990}, pages = {148--159}, keyword = {disk striping, reliability, pario bib}, comment = {Parity striping, a variation of RAID 5, is just a different way of mapping blocks to disks. It groups parity blocks into extents, and does not stripe the data blocks. A logical disk is mostly contained in one physical disk, plus a parity region in another disk. Good for transaction processing workloads. Has the low cost/GByte of RAID, the reliability of RAID, without the high transfer rate of RAID, but with much better requests/second throughput than RAID 5. (But 40\% worse than mirrors.) So it is a compromise between RAID and mirrors.} } @InProceedings{grimshaw:elfs, author = {Andrew S. Grimshaw and Loyot, Jr., Edmond C.}, title = {{ELFS:} Object-oriented Extensible File Systems}, booktitle = {Proceedings of the First International Conference on Parallel and Distributed Information Systems}, year = {1991}, pages = {177}, keyword = {parallel I/O, parallel file system, object-oriented, file system interface, pario bib}, comment = {Full paper grimshaw:ELFSTR. Really neat idea. Uses OO interface to file system, which is mostly in user mode. The object classes represent particular access patterns (e.g., 2-D matrix) in the file, and hide the actual structure of the file. The object knows enough to taylor the cache and prefetch algorithms to the semantics. Class inheritance allows layering.} } @TechReport{grimshaw:elfstr, author = {Andrew S. Grimshaw and Loyot, Jr., Edmond C.}, title = {{ELFS:} Object-oriented Extensible File Systems}, year = {1991}, month = {July}, number = {TR-91-14}, institution = {Univ. of Virginia Computer Science Department}, keyword = {parallel I/O, parallel file system, object-oriented, file system interface, Intel iPSC/2, pario bib}, comment = {From uvacs.cs.virginia.edu. See also grimshaw:elfs. provide the high bandwidth and low latency, reduce the cognitive burden on the programmer, and manage proliferation of data formats and architectural changes. Details of the plan to make an extensible OO interface to file system. Objects each have a separate thread of control, so they can do asynchronous activity like prefetching and caching in the background, and support multiple outstanding requests. The Mentat object system makes it easy for them to support pipelining of I/O with I/O and computation in the user program. Let the user choose type of consistency needed. See grimshaw:objects for more results.} } @InProceedings{grimshaw:objects, author = {Andrew S. Grimshaw and Jeff Prem}, title = {High Performance Parallel File Objects}, booktitle = {Sixth Annual Distributed-Memory Computer Conference}, year = {1991}, pages = {720--723}, keyword = {parallel I/O, multiprocessor file system, file system interface, pario bib}, comment = {Not much new from ELFS TR. A better citation than grimshaw:ELFS though. Does give CFS performance results. Note on 721 he says that CFS prefetches into ``local memory from which to satisfy future user requests {\em that never come.}'' This happens if the local access pattern isn't purely sequential, as in an interleaved pattern.} } @InProceedings{hadimioglu:fs, author = {Haldun Hadimioglu and Robert J. Flynn}, title = {The Architectural Design of a Tightly-Coupled Distributed Hypercube File System}, booktitle = {Fourth Conference on Hypercube Concurrent Computers and Applications}, year = {1989}, pages = {147--150}, keyword = {hypercube, multiprocessor file system, pario bib}, comment = {An early paper describing a proposed file system for hypercubes. The writing is almost impenetrable. Confusing and not at all clear what they propose. See also hadimioglu:hyperfs and flynn:hyper-fs.} } @InProceedings{hadimioglu:hyperfs, author = {Haldun Hadimioglu and Robert J. Flynn}, title = {The Design and Analysis of a Tightly Coupled Hypercube File System}, booktitle = {Fifth Annual Distributed-Memory Computer Conference}, year = {1990}, pages = {1405--1410}, keyword = {multiprocessor file system, parallel I/O, hypercube, pario bib}, comment = {Describes a hypercube file system based on I/O nodes and processor nodes. A few results from a hypercube simulator. See hadimioglu:fs and flynn:hyper-fs.} } @InProceedings{hartman:zebra, author = {John H. Hartman and John K. Ousterhout}, title = {{Zebra: A} Striped Network File System}, booktitle = {Proceedings of the Usenix File Systems Workshop}, year = {1992}, month = {May}, pages = {71--78}, keyword = {disk striping, distributed file system, pario bib}, comment = {Not a parallel file system, but worth comparing to Swift. Certainly, a similar idea could be used in a multiprocessor.} } @InProceedings{hatcher:linda, author = {Philip J. Hatcher and Michael J. Quinn}, title = {{C*-Linda:} {A} Programming Environment with Multiple Data-Parallel Modules and Parallel {I/O}}, booktitle = {Proceedings of the Twenty-Fourth Annual Hawaii International Conference on System Sciences}, year = {1991}, pages = {382--389}, keyword = {parallel I/O, Linda, data parallel, nCUBE, parallel graphics, heterogeneous computing, pario bib}, comment = {C*-Linda is basically a combination of C* and C-Linda. The model is that of several SIMD modules interacting in a MIMD fashion through a Linda tuple space. The modules are created using {\tt eval}, as in Linda. In this case, the compiler statically assigns each eval to a separate subcube on an nCUBE 3200, although they also talk about multiprogramming several modules on a subcube (not supported by VERTEX). They envision having separate modules running on the nCUBE's graphics processors, or having the file system directly talk to the tuple space, to support I/O. They also envision talking to modules elsewhere on a network, e.g., a workstation, through the tuple space. They reject the idea of sharing memory between modules due to the lack of synchrony between modules, and message passing because it is error-prone.} } @InProceedings{hayes:ncube, author = {John P. Hayes and Trevor N. Mudge and Quentin F. Stout and Stephen Colley and John Palmer}, title = {Architecture of a Hypercube Supercomputer}, booktitle = {Proceedings of the 1986 International Conference on Parallel Processing}, year = {1986}, pages = {653--660}, keyword = {hypercube, parallel architecture, nCUBE, pario bib}, comment = {Description of the first nCUBE, the NCUBE/ten. Good historical background about hypercubes. Talks about their design choices. Says a little about the file system --- basically just a way of mounting disks on top of each other, within the nCUBE and to other nCUBEs.} } @Book{hennessy:arch, author = {John L. Hennessy and David A. Patterson}, title = {Computer Architecture: A Quantitative Approach}, year = {1990}, publisher = {Morgan Kaufman}, keyword = {computer architecture, textbook, pario bib}, comment = {Looks like a great coverage of architecture. Of course a chapter on I/O (that mentions RAID).} } @Article{herbst:bottleneck, author = {Kris Herbst}, title = {Trends in Mass Storage: vendors seek solutions to growing {I/O} bottleneck}, journal = {Supercomputing Review}, year = {1991}, month = {March}, pages = {46--49}, keyword = {parallel I/O, disk media, optical disk, holographic storage, trends, tape storage, parallel transfer disk, disk striping, pario bib}, comment = {A good overview of the current state-of-the art in March 1991, including particular numbers and vendor names. They discuss disk media (density, rotation, {\em etc.}), parallel transfer disks, disk arrays, parity and RAID, HiPPI, tape archives, optical memory, and holographic storage. Rotation speeds can increase as diameter goes down. Density increases are often offset by slower head settling times. Disk arrays will hit their ``heydey'' in the 1990s. Trend toward network-attached storage devices, that don't need a computer as a server.} } @InProceedings{hersch:pixmap, author = {Roger D. Hersch}, title = {Parallel Storage and Retrieval of Pixmap Images}, booktitle = {Proceedings of the Twelfth IEEE Symposium on Mass Storage Systems}, year = {1993}, pages = {221--226}, keyword = {parallel I/O, file system, pario bib}, comment = {Ways to arrange 2-d images on disk arrays that have multiple processors (like Datamesh), so that retrieval time for images or subimages is minimized.} } @InProceedings{hou:disk, author = {Robert Y. Hou and Gregory R. Ganger and Yale N. Patt and Charles E. Gimarc}, title = {Issues and Problems in the {I/O} Subsystem, Part {I} --- {The} Magnetic Disk}, booktitle = {Proceedings of the Twenty-Fifth Annual Hawaii International Conference on System Sciences}, year = {1992}, pages = {48--57}, keyword = {parallel I/O, pario bib}, comment = {A short summary of disk I/O issues: disk technology, latency reduction, parallel I/O, {\em etc.}. Nothing new.} } @InProceedings{hsiao:decluster, author = {Hui-I Hsiao and David DeWitt}, title = {{Chained Declustering}: {A} New Availability Strategy for Multiprocessor Database Machines}, booktitle = {Proceedings of 6th International Data Engineering Conference}, year = {1990}, pages = {456--465}, keyword = {disk array, reliability, parallel I/O, pario bib}, comment = {Chained declustering has cost like mirroring, since it replicates each block, but has better load increase during failure than mirrors, interleaved declustering, or RAID. (Or parity striping (my guess)). Has reliability between that of mirrors and RAID, and much better than interleaved declustering. Would also be much easier in a distributed environment. See hsiao:diskrep.} } @InProceedings{hsiao:diskrep, author = {Hui-I Hsiao and David DeWitt}, title = {A Performance Study of Three High Availability Data Replication Strategies}, booktitle = {Proceedings of the First International Conference on Parallel and Distributed Information Systems}, year = {1991}, pages = {18--28}, keyword = {disk array, reliability, disk mirroring, parallel I/O, pario bib}, comment = {Compares mirrored disks (MD) with interleaved declustering (ID) with chained declustering (CD). ID and CD found to have much better performance in normal and failure modes. See hsiao:decluster.} } @Article{hsiao:diskrep2, author = {Hui-I Hsiao and David DeWitt}, title = {A Performance Study of Three High Availability Data Replication Strategies}, journal = {Journal of Distributed and Parallel Databases}, year = {1993}, month = {January}, volume = {1}, number = {1}, pages = {53--79}, keyword = {disk array, reliability, disk mirroring, parallel I/O, pario bib}, comment = {See hsiao:diskrep.} } @MastersThesis{husmann:format, author = {Harlan Edward Husmann}, title = {High-Speed Format Conversion and Parallel {I/O} in Numerical Programs}, year = {1984}, month = {January}, school = {Department of Computer Science, Univ. of Illinois at Urbana-Champaign}, note = {Available as TR number UIUCDCS-R-84-1152.}, keyword = {parallel I/O, I/O, pario bib}, comment = {Does FORTRAN format conversion in software in parallel or in hardware, to obtain good speedups for lots of programs. However he found that increasing the I/O bandwidth was the most significant change that could be made in the parallel program.} } @Booklet{intel:examples, key = {Intel}, title = {Concurrent {I/O} Application Examples}, year = {1989}, howpublished = {Intel Corporation Background Information}, keyword = {file access pattern, parallel I/O, Intel iPSC/2, hypercube, pario bib}, comment = {Lists several examples and the amount and types of data they require, and how much bandwidth. Fluid flow modeling, Molecular modeling, Seismic processing, and Tactical and strategic systems.} } @Booklet{intel:ipsc2io, key = {Intel}, title = {{iPSC/2} {I/O} Facilities}, year = {1988}, howpublished = {Intel Corporation}, note = {Order number 280120-001}, keyword = {parallel I/O, hypercube, Intel iPSC/2, pario bib}, comment = {Simple overview, not much detail. See intel:ipsc2, pierce:pario, asbury:fortranio. Separate I/O nodes from compute nodes. Each I/O node has a SCSI bus to the disks, and communicates with other nodes in the system via Direct-Connect hypercube routing.} } @Booklet{intel:paragon, key = {Intel}, title = {Paragon {XP/S} Product Overview}, year = {1991}, howpublished = {Intel Corporation}, keyword = {parallel architecture, parallel I/O, Intel, pario bib}, comment = {Not a bad glossy.} } @Misc{intelio, key = {Intel}, title = {Intel beefs up its {iPSC/2} supercomputer's {I/O} and memory capabilities}, year = {1988}, month = {November}, volume = {61}, number = {11}, pages = {24}, howpublished = {Electronics}, keyword = {parallel I/O, hypercube, Intel iPSC/2, pario bib} } @Proceedings{ipps-io93, title = {IPPS~'93 Workshop on Input/Output in Parallel Computer Systems}, editor = {Ravi Jain and John Werth and J. C. Browne}, year = {1993}, month = {April}, address = {Newport Beach, CA}, keyword = {parallel I/O, multiprocessor file system, pario bib} } @Article{jain:pario, author = {Ravid Jain and Kiran Somalwar and John Werth and J. C. Browne}, title = {Scheduling Parallel {I/O} Operations in Multiple Bus Systems}, journal = {Journal of Parallel and Distributed Computing}, year = {1992}, month = {December}, volume = {16}, number = {4}, pages = {353--362}, keyword = {parallel I/O, shared memory, scheduling, pario bib} } @PhdThesis{jensen:thesis, author = {David Wayne Jensen}, title = {Disk {I/O} In High-Performance Computing Systems}, year = {1993}, school = {Univ. Illinois, Urbana-Champagne}, keyword = {parallel I/O, pario bib} } @InProceedings{johnson:insertions, author = {Theodore Johnson}, title = {Supporting Insertions and Deletions in Striped Parallel Filesystems}, booktitle = {Proceedings of the Seventh International Parallel Processing Symposium}, year = {1993}, pages = {425--433}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {If you insert blocks into a striped file, you mess up the nice striping. So he breaks the file into striped extents, and keeps track of the extents with a distributed B-tree index. Deletions also fit into the same scheme.} } @Article{johnson:wave, author = {Olin G. Johnson}, title = {Three-dimensional Wave Equation Computations on Vector Computers}, journal = {Proceedings of the IEEE}, year = {1984}, month = {January}, volume = {72}, number = {1}, pages = {90--95}, keyword = {computational physics, parallel I/O, pario bib}, comment = {Old paper on the need for large memory and fast paging and I/O in out-of-core solutions to 3-d seismic modeling. They used 4-way parallel I/O to support their job. Needed to transfer a 3-d matrix in and out of memory by rows, columns, and vertical columns. Stored in block-structured form to improve locality on the disk.} } @Article{katz:diskarch, author = {Randy H. Katz and Garth A. Gibson and David A. Patterson}, title = {Disk System Architectures for High Performance Computing}, journal = {Proceedings of the IEEE}, year = {1989}, month = {December}, volume = {77}, number = {12}, pages = {1842--1858}, keyword = {parallel I/O, RAID, disk striping, pario bib}, comment = {Good review of the background of disks and I/O architectures, but a shorter RAID presentation than patterson:RAID. Also addresses controller structure. Good ref for the I/O crisis background, though they don't use that term here. Good taxonomy of previous array techniques.} } @Article{katz:io-subsys, author = {Randy H. Katz and John K. Ousterhout and David A. Patterson and Michael R. Stonebraker}, title = {A Project on High Performance {I/O} Subsystems}, journal = {{IEEE} Database Engineering Bulletin}, year = {1988}, month = {March}, volume = {11}, number = {1}, pages = {40--47}, keyword = {parallel I/O, RAID, Sprite, reliability, disk striping, disk array, pario bib}, comment = {Early RAID project paper. Describes the Berkeley team's plan to use an array of small (100M) hard disks as an I/O server for network file service, transaction processing, and supercomputer I/O. Considering performance, reliability, and flexibility. Initially hooked to their SPUR multiprocessor, using Sprite operating system, new filesystem. Either asynchronous striped or independent operation. Supercomputer I/O is characterized as sequential, minimum latency, low throughput. Use of parity disks to boost reliability. Files may be striped across one or more disks and extend over several sectors, thus a two-dimensional filesystem; striping need not involve all disks.} } @InProceedings{katz:netfs, author = {Randy H. Katz}, title = {Network-Attached Storage Systems}, booktitle = {Scalable High Performance Computing Conference}, year = {1992}, pages = {68--75}, keyword = {distributed file system, supercomputer file system, file striping, RAID, parallel I/O, pario bib}, comment = {Comments on the emerging trend of file systems for mainframes and supercomputers that are not attached directly to the computer, but instead to a network attached to the computer. Avoiding data copying seems to be a critical issue in the OS and controllers, for disk and network interfaces. Describes RAID-II prototype.} } @Article{katz:update, author = {Randy H. Katz and John K. Ousterhout and David A. Patterson and Peter Chen and Ann Chervenak and Rich Drewes and Garth Gibson and Ed Lee and Ken Lutz and Ethan Miller and Mendel Rosenblum}, title = {A Project on High Performance {I/O} Subsystems}, journal = {Computer Architecture News}, year = {1989}, month = {September}, volume = {17}, number = {5}, pages = {24--31}, keyword = {parallel I/O, RAID, reliability, disk array, pario bib}, comment = {A short summary of the RAID project. Some more up-to-date info, like that they have completed the first prototype with 8 SCSI strings and 32 disks.} } @InProceedings{keane:commercial, author = {J. A. Keane and T. N. Franklin and A. J. Grant and R. Sumner and M. Q. Xu}, title = {Commercial Users' Requirements for Parallel Systems}, booktitle = {Proceedings of the 1993 DAGS/PC Symposium}, year = {1993}, month = {June}, pages = {15--25}, organization = {Dartmouth Institute for Advanced Graduate Studies}, address = {Hanover, NH}, keyword = {parallel architecture, parallel I/O, databases, commercial requirements, pario bib}, abstract = {This paper reports on part of an on-going analysis of parallel systems for commercial users. The particular focus of this paper is on the requirements that commercial users, in particular users with financial database systems, have of parallel systems. The issues of concern to such users differ from those of concern to science and engineering users. Performance of the parallel system is not the only, or indeed primary, reason for moving to such systems for commercial users. Infra-structure issues are important, such as system availability and inter-working with existing systems. These issues are discussed in the context of a banking customer's requirements. The various technical concerns that these requirements impose are discussed in terms of commercially available systems.} } @Article{kim:asynch, author = {Michelle Y. Kim and Asser N. Tantawi}, title = {Asynchronous Disk Interleaving: {Approximating} Access Delays}, journal = {IEEE Transactions on Computers}, year = {1991}, month = {July}, volume = {40}, number = {7}, pages = {801--810}, keyword = {disk interleaving, parallel I/O, performance modeling, pario bib}, comment = {As opposed to synchronous disk interleaving, where disks are rotationally synchronous and one access is processed at a time. They develop a performance model and validate it with traces of a database system's disk accesses. Average access delay on each disk can be approximated by a normal distribution.} } @Article{kim:fft, author = {Michelle Y. Kim and Anil Nigam and George Paul and Robert H. Flynn}, title = {Disk Interleaving and Very Large Fast {F}ourier Transforms}, journal = {International Journal of Supercomputer Applications}, year = {1987}, volume = {1}, number = {3}, pages = {75--96}, keyword = {parallel I/O, disk striping, scientific computing, algorithm, pario bib} } @PhdThesis{kim:interleave, author = {Michelle Y. Kim}, title = {Synchronously Interleaved Disk Systems with their Application to the Very Large {FFT}}, year = {1986}, school = {IBM Thomas J. Watson Research Center}, address = {Yorktown Heights, New York 10598}, note = {IBM Report number RC12372}, keyword = {parallel I/O, disk striping, file access pattern, disk array, pario bib}, comment = {Uniprocessor interleaving techniques. Good case for interleaving. Probably better to reference kim:interleaving and kim:fft. Discusses an 3D FFT algorithm in which the matrix is broken into subblocks that are accessed in layers. The layers are stored so this is either contiguous or with a regular stride, in fairly large chunks.} } @Article{kim:interleaving, author = {Michelle Y. Kim}, title = {Synchronized Disk Interleaving}, journal = {IEEE Transactions on Computers}, year = {1986}, month = {November}, volume = {C-35}, number = {11}, pages = {978--988}, keyword = {parallel I/O, disk striping, disk array, pario bib}, comment = {See kim:interleave.} } @TechReport{kotz:fsint, author = {David Kotz}, title = {Multiprocessor File System Interfaces}, year = {1992}, month = {March}, number = {PCS-TR92-179}, institution = {Dept. of Math and Computer Science, Dartmouth College}, note = {Revised version appeared in PDIS'93.}, keyword = {dfk, parallel I/O, multiprocessor file system, file system interface, pario bib}, abstract = {Increasingly, file systems for multiprocessors are designed with parallel access to multiple disks, to keep I/O from becoming a serious bottleneck for parallel applications. Although file system software can transparently provide high-performance access to parallel disks, a new file system interface is needed to facilitate parallel access to a file from a parallel application. We describe the difficulties faced when using the conventional (Unix-like) interface in parallel applications, and then outline ways to extend the conventional interface to provide convenient access to the file for parallel programs, while retaining the traditional interface for programs that have no need for explicitly parallel file access. Our interface includes a single naming scheme, a {\em multiopen\/} operation, local and global file pointers, mapped file pointers, logical records, {\em multifiles}, and logical coercion for backward compatibility.}, comment = {Cite kotz:fsint2.} } @InProceedings{kotz:fsint2, author = {David Kotz}, title = {Multiprocessor File System Interfaces}, booktitle = {Proceedings of the Second International Conference on Parallel and Distributed Information Systems}, year = {1993}, pages = {194--201}, keyword = {dfk, parallel I/O, multiprocessor file system, file system interface, pario bib}, abstract = {Increasingly, file systems for multiprocessors are designed with parallel access to multiple disks, to keep I/O from becoming a serious bottleneck for parallel applications. Although file system software can transparently provide high-performance access to parallel disks, a new file system interface is needed to facilitate parallel access to a file from a parallel application. We describe the difficulties faced when using the conventional (Unix-like) interface in parallel applications, and then outline ways to extend the conventional interface to provide convenient access to the file for parallel programs, while retaining the traditional interface for programs that have no need for explicitly parallel file access. Our interface includes a single naming scheme, a {\em multiopen\/} operation, local and global file pointers, mapped file pointers, logical records, {\em multifiles}, and logical coercion for backward compatibility.} } @InProceedings{kotz:fsint2p, author = {David Kotz}, title = {Multiprocessor File System Interfaces}, booktitle = {Proceedings of the Usenix File Systems Workshop}, year = {1992}, month = {May}, pages = {149--150}, keyword = {dfk, parallel I/O, multiprocessor file system, file system interface, pario bib}, comment = {Short paper (2 pages). See kotz:fsint2.} } @Article{kotz:jpractical, author = {David Kotz and Carla Schlatter Ellis}, title = {Practical Prefetching Techniques for Multiprocessor File Systems}, journal = {Journal of Distributed and Parallel Databases}, year = {1993}, month = {January}, volume = {1}, number = {1}, pages = {33--51}, keyword = {dfk, parallel file system, prefetching, disk caching, parallel I/O, MIMD, pario bib}, abstract = {Improvements in the processing speed of multiprocessors are outpacing improvements in the speed of disk hardware. Parallel disk I/O subsystems have been proposed as one way to close the gap between processor and disk speeds. In a previous paper we showed that prefetching and caching have the potential to deliver the performance benefits of parallel file systems to parallel applications. In this paper we describe experiments with practical prefetching policies that base decisions only on on-line reference history, and that can be implemented efficiently. We also test the ability of these policies across a range of architectural parameters.}, comment = {Journal version of kotz:practical. See also kotz:jwriteback, kotz:fsint2, cormen:integrate.} } @Article{kotz:jwriteback, author = {David Kotz and Carla Schlatter Ellis}, title = {Caching and Writeback Policies in Parallel File Systems}, journal = {Journal of Parallel and Distributed Computing}, year = {1993}, month = {January and February}, volume = {17}, number = {1--2}, pages = {140--145}, keyword = {dfk, parallel file system, disk caching, parallel I/O, MIMD, pario bib}, abstract = {Improvements in the processing speed of multiprocessors are outpacing improvements in the speed of disk hardware. Parallel disk I/O subsystems have been proposed as one way to close the gap between processor and disk speeds. Such parallel disk systems require parallel file system software to avoid performance-limiting bottlenecks. We discuss cache management techniques that can be used in a parallel file system implementation for multiprocessors with scientific workloads. We examine several writeback policies, and give results of experiments that test their performance.}, comment = {Journal version of kotz:writeback. See kotz:jpractical, kotz:fsint2, cormen:integrate.} } @InProceedings{kotz:practical, author = {David Kotz and Carla Schlatter Ellis}, title = {Practical Prefetching Techniques for Parallel File Systems}, booktitle = {Proceedings of the First International Conference on Parallel and Distributed Information Systems}, year = {1991}, month = {December}, pages = {182--189}, keyword = {dfk, parallel file system, prefetching, disk caching, parallel I/O, MIMD, pario bib}, abstract = {Parallel disk subsystems have been proposed as one way to close the gap between processor and disk speeds. In a previous paper we showed that prefetching and caching have the potential to deliver the performance benefits of parallel file systems to parallel applications. In this paper we describe experiments with practical prefetching policies, and show that prefetching can be implemented efficiently even for the more complex parallel file access patterns. We test these policies across a range of architectural parameters.}, comment = {Short form of primary thesis results. Cite kotz:jpractical. See kotz:jwriteback, kotz:fsint2, cormen:integrate.} } @Article{kotz:prefetch, author = {David Kotz and Carla Schlatter Ellis}, title = {Prefetching in File Systems for {MIMD} Multiprocessors}, journal = {IEEE Transactions on Parallel and Distributed Systems}, year = {1990}, month = {April}, volume = {1}, number = {2}, pages = {218--230}, keyword = {dfk, parallel file system, prefetching, MIMD, disk caching, parallel I/O, pario bib}, abstract = {The problem of providing file I/O to parallel programs has been largely neglected in the development of multiprocessor systems. There are two essential elements of any file system design intended for a highly parallel environment: parallel I/O and effective caching schemes. This paper concentrates on the second aspect of file system design and specifically, on the question of whether prefetching blocks of the file into the block cache can effectively reduce overall execution time of a parallel computation, even under favorable assumptions. Experiments have been conducted with an interleaved file system testbed on the Butterfly Plus multiprocessor. Results of these experiments suggest that 1) the hit ratio, the accepted measure in traditional caching studies, may not be an adequate measure of performance when the workload consists of parallel computations and parallel file access patterns, 2) caching with prefetching can significantly improve the hit ratio and the average time to perform an I/O operation, and 3) an improvement in overall execution time has been observed in most cases. In spite of these gains, prefetching sometimes results in increased execution times (a negative result, given the optimistic nature of the study). We explore why is it not trivial to translate savings on individual I/O requests into consistently better overall performance and identify the key problems that need to be addressed in order to improve the potential of prefetching techniques in this environment.} } @PhdThesis{kotz:thesis, author = {David Kotz}, title = {Prefetching and Caching Techniques in File Systems for {MIMD} Multiprocessors}, year = {1991}, month = {April}, school = {Duke University}, note = {Available as technical report CS-1991-016.}, keyword = {dfk, parallel file system, prefetching, MIMD, disk caching, parallel I/O, pario bib}, abstract = {The increasing speed of the most powerful computers, especially multiprocessors, makes it difficult to provide sufficient I/O bandwidth to keep them running at full speed for the largest problems. Trends show that the difference in the speed of disk hardware and the speed of processors is increasing, with I/O severely limiting the performance of otherwise fast machines. This widening access-time gap is known as the ``I/O bottleneck crisis.'' One solution to the crisis, suggested by many researchers, is to use many disks in parallel to increase the overall bandwidth. This dissertation studies some of the file system issues needed to get high performance from parallel disk systems, since parallel hardware alone cannot guarantee good performance. The target systems are large MIMD multiprocessors used for scientific applications, with large files spread over multiple disks attached in parallel. The focus is on automatic caching and prefetching techniques. We show that caching and prefetching can transparently provide the power of parallel disk hardware to both sequential and parallel applications using a conventional file system interface. We also propose a new file system interface (compatible with the conventional interface) that could make it easier to use parallel disks effectively. Our methodology is a mixture of implementation and simulation, using a software testbed that we built to run on a BBN GP1000 multiprocessor. The testbed simulates the disks and fully implements the caching and prefetching policies. Using a synthetic workload as input, we use the testbed in an extensive set of experiments. The results show that prefetching and caching improved the performance of parallel file systems, often dramatically.}, comment = {Published as kotz:jwriteback, kotz:jpractical, kotz:fsint2.} } @TechReport{kotz:throughput, author = {David Kotz}, title = {Throughput of Existing Multiprocessor File Systems}, year = {1993}, month = {May}, number = {PCS-TR93-190}, institution = {Dartmouth College}, keyword = {parallel I/O, multiprocessor file system, performance, survey, dfk, pario bib}, comment = {A brief note on the reported performance of existing file systems (Intel CFS, nCUBE, CM-2, CM-5, and Cray). Many have disappointingly low absolute throughput, in MB/s.} } @InProceedings{kotz:writeback, author = {David Kotz and Carla Schlatter Ellis}, title = {Caching and Writeback Policies in Parallel File Systems}, booktitle = {1991 IEEE Symposium on Parallel and Distributed Processing}, year = {1991}, month = {December}, pages = {60--67}, keyword = {dfk, parallel file system, disk caching, parallel I/O, MIMD, pario bib}, abstract = {Improvements in the processing speed of multiprocessors are outpacing improvements in the speed of disk hardware. Parallel disk I/O subsystems have been proposed as one way to close the gap between processor and disk speeds. Such parallel disk systems require parallel file system software to avoid performance-limiting bottlenecks. We discuss cache management techniques that can be used in a parallel file system implementation. We examine several writeback policies, and give results of experiments that test their performance.}, comment = {Cite kotz:jwriteback. See also kotz:jpractical, kotz:fsint2, cormen:integrate.} } @TechReport{krieger:asf, author = {Orran Krieger and Michael Stumm and Ronald Unrau}, title = {The {Alloc Stream Facility}: A Redesign of Application-level Stream {I/O}}, year = {1992}, month = {October}, number = {CSRI-275}, institution = {Computer Systems Research Institute, University of Toronto}, address = {Toronto, Canada, M5S 1A1}, keyword = {memory-mapped file, file system, parallel I/O, pario bib}, comment = {See also krieger:mapped. A 3-level interface structure: interface, backplane, and stream-specific modules. Different interfaces available: unix, stdio, ASI (theirs), C++. Common backplane. Stream-specific implementations that export operations like salloc and sfree, which return pointers to data buffers. ASI exports that interface to the user, for maximum efficiency. Performance is best when using mapped files as underlying implementation. Many stdio or unix apps are faster only after relinking. ASI is even faster. In addition to better performance, also get multithreading support, multiple interfaces, and extensibility.} } @InProceedings{krieger:hfs, author = {Orran Krieger and Michael Stumm}, title = {{HFS:} A Flexible File System for large-scale Multiprocessors}, booktitle = {Proceedings of the 1993 DAGS/PC Symposium}, year = {1993}, month = {June}, pages = {6--14}, organization = {Dartmouth Institute for Advanced Graduate Studies}, address = {Hanover, NH}, keyword = {multiprocessor file system, parallel I/O, operating system, shared memory, pario bib}, abstract = {The {H{\sc urricane}} File System (HFS) is a new file system being developed for large-scale shared memory multiprocessors with distributed disks. The main goal of this file system is scalability; that is, the file system is designed to handle demands that are expected to grow linearly with the number of processors in the system. To achieve this goal, HFS is designed using a new structuring technique called Hierarchical Clustering. HFS is also designed to be flexible in supporting a variety of policies for managing file data and for managing file system state. This flexibility is necessary to support in a scalable fashion the diverse workloads we expect for a multiprocessor file system.}, comment = {Designed for scalability on the hierarchical clustering model (see unrau:cluster), the Hurricane File System for NUMA shared-memory MIMD machines. Each cluster has its own full file system, which communicates with those in other clusters. Pieces are name server, open-file server, and block-file server. On first access, the file is mapped into the application space. VM system calls BFS to arrange transfers. Open questions: policies for file state management, block distribution, caching, and prefetching. Object-oriented approach used to allow for flexibility and extendability. Local disk file systems are log-structured.} } @TechReport{krystynak:datavault, author = {John Krystynak}, title = {{I/O} Performance on the {Connection Machine DataVault} System}, year = {1992}, month = {May}, number = {RND-92-011}, institution = {NAS Systems Division, NASA Ames}, keyword = {parallel I/O, parallel file system, parallel I/O, performance measurement, pario bib}, comment = {Short measurements of CM-2 Datavault. Faster if you access through Paris. Can get nearly full 32 MB/s bandwidth. Problem in its ability to use multiple CMIO busses.} } @InProceedings{krystynak:pario, author = {John Krystynak and Bill Nitzberg}, title = {Performance Characteristics of the {iPSC/860} and {CM-2} {I/O} Systems}, booktitle = {Proceedings of the Seventh International Parallel Processing Symposium}, year = {1993}, pages = {837--841}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {Essentially a (short) combination of krystynak:datavault and nitzberg:cfs.} } @InProceedings{kucera:libc, author = {Julie Kucera}, title = {Making {\em libc}\/ Suitable for Use by Parallel Programs}, booktitle = {Proceedings of the Usenix Distributed and Multiprocessor Systems Workshop}, year = {1989}, pages = {145--152}, keyword = {parallel file system interface, pario bib}, comment = {Experience making libc reentrant, adding semaphores, {\em etc.}, on a Convex. Some problems with I/O. Added semaphores and private memory to make libc calls reentrant, i.e., callable in parallel by multiple threads.} } @PhdThesis{kwan:sort, author = {Sai Choi Kwan}, title = {External Sorting: {I/O} Analysis and Parallel Processing Techniques}, year = {1986}, month = {January}, school = {University of Washington}, note = {Available as technical report 86--01--01}, keyword = {parallel I/O, sorting, pario bib}, comment = {Examines external sorting techniques such as merge sort, tag sort, multi-pass distribution sort, and one-pass distribution sort. The model is one where I/O complexity is included, assuming a linear seek time distribution and a cost of 1/2 rotation for each seek. Parallel I/O or computing are not considered until the distribution sorts. Architectural model on page 58.} } @InProceedings{lake:pario, author = {Brian Lake and Chris Gray}, title = {Parallel {I/O} for {MIMD} Machines}, booktitle = {Proceedings of SS'93: High Performance Computing}, year = {1993}, month = {June}, pages = {301--308}, address = {Calgary}, keyword = {parallel I/O, MIMD, multiprocessor file system, pario bib} } @InProceedings{lautenbach:pfs, author = {Berin F. Lautenbach and Bradley M. Broom}, title = {A Parallel File System for the {AP1000}}, booktitle = {Proceedings of the Third Fujitsu-ANU CAP Workshop}, year = {1992}, month = {November}, keyword = {distributed file system, multiprocessor file system, pario bib}, comment = {See also broom:acacia, broom:impl, mutisya:cache, and broom:cap.} } @TechReport{lee:impl, author = {Edward K. Lee}, title = {Software and Performance Issues in the Implementation of a {RAID} Prototype}, year = {1990}, month = {May}, number = {UCB/CSD 90/573}, institution = {EECS, Univ. California at Berkeley}, keyword = {parallel I/O, disk striping, performance, pario bib}, comment = {Details of their prototype. Defines terms like stripe unit. Explores ways to lay out parity. Does performance simulations. Describes ops needed in device driver. Good to read if you plan to implement a RAID. Results: small R+W, or high loads, don't care about parity placement; in low load, there are different best cases for large R+W. Best all-around is left-symmetric. See also lee:parity.} } @InProceedings{lee:parity, author = {Edward K. Lee and Randy H. Katz}, title = {Performance Consequences of Parity Placement in Disk Arrays}, booktitle = {Fourth International Conference on Architectural Support for Programming Languages and Operating Systems}, year = {1991}, pages = {190--199}, keyword = {RAID, reliability, parallel I/O, pario bib}, comment = {Interesting comparison of several parity placement schemes. Boils down to two basic choices, depending on whether read performance or write performance is more important to you.} } @InProceedings{livny:stripe, author = {M. Livny and S. Khoshafian and H. Boral}, title = {Multi-Disk Management Algorithms}, booktitle = {Proceedings of the 1987 ACM Sigmetrics Conference on Measurement and Modeling of Computer Systems}, year = {1987}, month = {May}, pages = {69--77}, keyword = {parallel I/O, disk striping, disk array, pario bib} } @TechReport{lo:disks, author = {Raymond Lo and Norman Matloff}, title = {A Probabilistic Limit on the Virtual Size of Replicated File Systems}, year = {1989}, institution = {Department of EE and CS, UC Davis}, keyword = {parallel I/O, replication, file system, disk mirroring, disk shadowing, pario bib}, comment = {A look at shadowed disks. If you have $k$ disks set up to read from the disk with the shortest seek, but write to all disks, you have increased reliability, read time like the min of the seeks, and write time like the max of the seeks. It appears that with increasing $k$ you can get good performance. But this paper clearly shows, since writes move all disk heads to the same location, that the effective value of $k$ is actually quite low. Only 4--10 disks are likely to be useful for most traffic loads.} } @InProceedings{loverso:sfs, author = {Susan J. LoVerso and Marshall Isman and Andy Nanopoulos and William Nesheim and Ewan D. Milne and Richard Wheeler}, title = {{\em sfs}: {A} Parallel File System for the {CM-5}}, booktitle = {Proceedings of the 1993 Summer Usenix Conference}, year = {1993}, pages = {291--305}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {They took the Unix file system from SunOS and extended it to run on the CM-5. This involved handling non-power-of-two block sizes, parallel I/O calls, large file sizes, and more encouragement for extents to be allocated. The hardware is particularly suited to RAID~3 with a 16 byte striping unit, although in theory the software could do anything it wants. Geared to data-parallel model. Proc nodes (PNs) contact the timesharing daemon (TD) on the control processor (CP), who gets block lists from the file system, which runs on one of the CPs. The TD then arranges with the disk storage nodes (DSNs) to do the transfer directly with the PNs. Each DSN has 8~MB of buffer space, 8 disk drives, 4 SCSI busses, and a SPARC as controller. Partition managers mount non-local sfs via NFS. Performance results good. Up to 185~MB/s on 118 (2~MB/s) disks.} } @Article{manuel:logjam, author = {Tom Manuel}, title = {Breaking the Data-rate Logjam with arrays of small disk drives}, journal = {Electronics}, year = {1989}, month = {February}, volume = {62}, number = {2}, pages = {97--100}, keyword = {parallel I/O, disk array, I/O bottleneck, pario bib}, comment = {See also Electronics, Nov. 88 p 24, Dec. 88 p 112. Trade journal short on disk arrays. Very good intro. No new technical content. Concentrates on RAID project. Lists several commercial versions. Mostly concentrates on single-controller versions.} } @Misc{maspar:pario, key = {Mas}, title = {Parallel File {I/O} Routines}, year = {1992}, howpublished = {MasPar Computer Corporation}, keyword = {parallel I/O, multiprocessor file system interface, pario bib}, comment = {Man pages for MasPar file system interface. They have either a single shared file pointer, after which all processors read or write in an interleaved pattern, or individual (plural) file pointer, allowing arbitrary access patterns. Updated in 1992 with many more features.} } @Article{masters:pario, author = {Del Masters}, title = {Improve Disk Subsystem Performance with Multiple Serial Drives in Parallel}, journal = {Computer Technology Review}, year = {1987}, month = {July}, volume = {7}, number = {9}, pages = {76--77}, keyword = {parallel I/O, pario bib}, comment = {Information about the early Maximum Strategy disk array, which striped over 4 disk drives, apparently synchronously.} } @Article{matloff:multidisk, author = {Norman S. Matloff}, title = {A Multiple-Disk System for both Fault Tolerance and Improved Performance}, journal = {IEEE Transactions on Reliability}, year = {1987}, month = {June}, volume = {R-36}, number = {2}, pages = {199--201}, keyword = {parallel I/O, reliability, disk shadowing, disk mirroring, pario bib}, comment = {Variation on mirrored disks using more than 2 disks, to spread the files around. Good performance increases.} } @InProceedings{meador:array, author = {Wes E. Meador}, title = {Disk Array Systems}, booktitle = {Proceedings of IEEE Compcon}, year = {1989}, month = {Spring}, pages = {143--146}, keyword = {parallel I/O, disk array, disk striping, pario bib}, comment = {Describes {\em Strategy 2 Disk Array Controller}, which allows 4 or 8 drives, hardware striped, with parity drive and 0-4 hot spares. Up to 4 channels to cpu(s). Logical block interface. Defects, errors, formatting, drive failures all handled automatically. Peak 40 MB/s data transfer on each channel.} } @TechReport{milenkovic:model, author = {Milan Milenkovic}, title = {A Model for Multiprocessor {I/O}}, year = {1989}, month = {July}, number = {89-CSE-30}, institution = {Dept. of Computer Science and Engineering, Southern Methodist University}, keyword = {multiprocessor I/O, I/O architecture, distributed system, pario bib}, comment = {Advocates using dedicated server processors for all I/O, e.g., disk server, terminal server, network server. Pass I/O requests and data via messages or RPC calls over the interconnect (here a shared bus). Server handles packaging, blocking, caching, errors, interrupts, and so forth, freeing the main processors and the interconnect from all this activity. Benefits: encapsulates I/O-related stuff in specific places, accommodates heterogeneity, improves performance. Nice idea, but allows for an I/O bottleneck, unless server can handle all the demand. Otherwise would need multiple servers, more expensive than just multiple controllers.} } @Article{miller:pario, author = {L. L. Miller and A. R. Hurson}, title = {Multiprogramming and concurrency in parallel file environments}, journal = {International Journal of Mini and Microcomputers}, year = {1991}, volume = {13}, number = {2}, pages = {37--45}, keyword = {parallel file system, parallel I/O, database, pario bib}, comment = {This is really for databases. They identify two types of file access: one where the file can be operated on as a set of subfiles, each independently by a processor (what they call MIMD mode), and another where the file must be operated on with a centralized control (SIMD mode), in their case to search a B-tree whose nodes span the set of processors. Basically it is a host connected to a controller, that is connected to a set of small I/O processors, each of which has access to disk. In many ways a uniprocessor perspective. Paper design, with simulation results.} } @InProceedings{miller:rama, author = {Ethan L. Miller and Randy H. Katz}, title = {{RAMA:} A File System for Massively-Parallel Computers}, booktitle = {Proceedings of the Twelfth IEEE Symposium on Mass Storage Systems}, year = {1993}, pages = {163--168}, keyword = {parallel I/O, multiprocessor file system, pario bib}, comment = {The multiprocessor's file system acts as a block cache for tertiary storage. Disk space is broken into ``lines'' of a few MB. Each line has a descriptor telling what blocks it has, and their status. (fileid, offset) hashed to find (disk, linenum). Intrinsic metadata stored at start of each file; positional metadata implicit in hashing, and line descriptors. Sequentiality parameter puts several blocks of a file in the same line, to improve medium-sized requests (otherwise generate lots of request-response net traffic). Not clear on best choice of size. No mention of atomicity wrt concurrent writes to same data. Blocks migrate to tertiary storage as they get old. Fetched on demand, by block (not file). Self-describing blocks have ids in block -- leads to screwy block sizes?} } @Article{milligan:bifs, author = {P. Milligan and L. C. Waring and A. S. C. Lee}, title = {{BIFS}: {A} filing system for multiprocessor based systems}, journal = {Microprocessing and Microprogramming}, year = {1991}, volume = {31}, pages = {9--12}, note = {Euromicro~'90 conference, Amsterdam}, keyword = {multiprocessor file system, pario bib}, comment = {A simple file system for a transputer network, attached to a single disk device. Several procs are devoted to the file system, but really just act as buffers for the host processor that runs the disk. They provide sequential, random access, and indexed files, either byte- or record-oriented. Some prototypes; no results. They add buffering and double buffering, but don't really get into anything interesting.} } @Article{mokhoff:pario, author = {Nicholas Mokhoff}, title = {Parallel Disk Assembly Packs 1.5 {GBytes}, runs at 4 {MBytes/s}}, journal = {Electronic Design}, year = {1987}, month = {November}, pages = {45--46}, keyword = {parallel I/O, I/O, disk architecture, disk striping, reliability, pario bib}, comment = {Commercially available: Micropolis Systems' Parallel Disk 1800 series. Four disks plus one parity disk, synchronized and byte-interleaved. SCSI interface. Total capacity 1.5 GBytes, sustained transfer rate of 4 MBytes/s. MTTF 140,000 hours. Hard and soft errors corrected in real-time. Failed drives can be replaced while system is running.} } @TechReport{montague:swift, author = {Bruce R. Montague}, title = {The {Swift/RAID} Distributed Transaction Driver}, year = {1993}, month = {January}, number = {UCSC-CRL-93-99}, institution = {UC Santa Cruz}, keyword = {RAID, parallel I/O, distributed file system, transaction, pario bib}, comment = {See other Swift papers, e.g., cabrera:pario.} } @Article{moren:controllers, author = {William D. Moren}, title = {Design of Controllers is Key Element in Disk Subsystem Throughput}, journal = {Computer Technology Review}, year = {1988}, month = {Spring}, pages = {71--73}, keyword = {parallel I/O, disk architecture, pario bib}, comment = {A short paper on some basic techniques used by disk controllers to improve throughput: seek optimization, request combining, request queuing, using multiple drives in parallel, scatter/gather DMA, data caching, read-ahead, cross-track read-ahead, write-back caching, segmented caching, reduced latency (track buffering), and format skewing. [Most of these are already handled in Unix file systems.]} } @InProceedings{muntz:failure, author = {Richard R. Muntz and John C. S. Lui}, title = {Performance Analysis of Disk Arrays Under Failure}, booktitle = {16th International Conference on Very Large Data Bases}, year = {1990}, pages = {162--173}, keyword = {disk array, parallel, performance analysis, pario bib}, comment = {Looked at RAID5 when in failure mode. For small-reads workload, could only get 50\% of normal. So they decouple cluster size and parity-group size, so that they decluster over more disks than group size; during failure, this causes less of a load increase on surviving disks.} } @InProceedings{mutisya:cache, author = {Gerald Mutisya and Bradley M. Broom}, title = {Distributed File Caching for the {AP1000}}, booktitle = {Proceedings of the Third Fujitsu-ANU CAP Workshop}, year = {1992}, month = {November}, keyword = {distributed file system, multiprocessor file system, pario bib}, comment = {See also broom:acacia, broom:impl, lautenbach:pfs, and broom:cap.} } @InProceedings{nagashima:pario, author = {Umpei Nagashima and Takashi Shibata and Hiroshi Itoh and Minoru Gotoh}, title = {An Improvement of {I/O} Function for Auxiliary Storage: {Parallel I/O} for a Large Scale Supercomputing}, booktitle = {1990 International Conference on Supercomputing}, year = {1990}, pages = {48--59}, keyword = {parallel I/O, pario bib}, comment = {Using parallel I/O channels to access striped disks, in parallel from a supercomputer. They {\em chain}\/ (i.e., combine) requests to a disk for large contiguous accesses.} } @TechReport{ncr:3600, key = {NCR}, title = {{NCR 3600} Product Description}, year = {1991}, month = {September}, number = {ST-2119-91}, institution = {NCR}, address = {San Diego}, keyword = {multiprocessor architecture, MIMD, parallel I/O, pario bib}, comment = {Has 1-32 50MHz Intel 486 processors. Parallel independent disks on the disk nodes, separate from the processor nodes. Tree interconnect. Aimed at database applications.} } @InProceedings{ng:diskarray, author = {Spencer Ng}, title = {Some Design Issues of Disk Arrays}, booktitle = {Proceedings of IEEE Compcon}, year = {1989}, month = {Spring}, pages = {137--142}, note = {San Francisco, CA}, keyword = {parallel I/O, disk array, pario bib}, comment = {Discusses disk arrays and striping. Transfer size is important to striping success: small size transfers are better off with independent disks. Synchronized rotation is especially important for small transfer sizes, since then the increased rotational delays dominate. Fine grain striping involves less assembly/disassembly delay, but coarse grain (block) striping allows for request parallelism. Fine grain striping wastes capacity due to fixed size formatting overhead. He also derives exact MTTF equation for 1-failure tolerance and on-line repair.} } @InProceedings{ng:interleave, author = {S. Ng and D. Lang and R. Selinger}, title = {Trade-offs Between Devices and Paths in Achieving Disk Interleaving}, booktitle = {Proceedings of the 15th Annual International Symposium on Computer Architecture}, year = {1988}, pages = {196--201}, keyword = {parallel I/O, disk architecture, disk caching, I/O bottleneck, pario bib}, comment = {Compares four different ways of restructuring IBM disk controllers and channels to obtain more parallelism. They use parallel heads or parallel actuators. The best results come when they replicate the control electronics to maintain the number of data paths through the controller. Otherwise the controller bottleneck reduces performance. Generally, for large or small transfer sizes, parallel heads with replication gave better performance.} } @InProceedings{nishino:sfs, author = {H. Nishino and S. Naka and K Ikumi}, title = {High Performance File System for Supercomputing Environment}, booktitle = {Proceedings of Supercomputing '89}, year = {1989}, pages = {747--756}, keyword = {supercomputer, file system, parallel I/O, pario bib}, comment = {A modification to the Unix file system to allow for supercomputer access. Workload: file size from few KB to few GB, I/O operation size from few bytes to hundreds of MB. Generally programs split into I/O-bound and CPU-bound parts. Sequential and random access. Needs: giant files (bigger than device), peak hardware performance for large files, NFS access. Their FS is built into Unix ``transparently''. Space allocated in clusters, rather than blocks; clusters might be as big as a cylinder. Allows for efficient, large files. Mentions parallel disks as part of a ``virtual volume'' but does not elaborate. Prefetching within a cluster.} } @TechReport{nitzberg:cfs, author = {Bill Nitzberg}, title = {Performance of the {iPSC/860} Concurrent File System}, year = {1992}, month = {December}, number = {RND-92-020}, institution = {NAS Systems Division, NASA Ames}, keyword = {Intel, parallel file system, performance measurement, parallel I/O, pario bib}, comment = {Straightforward measurements of an iPSC/860 with 128 compute nodes, 10 I/O nodes, and 10 disks. This is a bigger system than has been measured before. Has some basic MB/s measurements for some features in Tables 1--2. CFS bug prevents more than 2 asynch requests at a time. Another bug forced random-writes to use preallocated files. For low number of procs, they weren't able to pull the full disk bandwidth. Cache thrashing caused problems when they had a large number of procs, because each read prefetched 8 blocks, which were flushed by some other proc doing a subsequent read. Workaround by synchronizing procs to limit concurrency. Increasing cache size is the right answer, but is not scalable.} } @TechReport{nodine:greed, author = {Mark H. Nodine and Jeffrey Scott Vitter}, title = {Greed Sort: An Optimal External Sorting Algorithm for Multiple Disks}, year = {1992}, number = {CS--91--20}, institution = {Brown University}, note = {A summary appears in SPAA~'91.}, keyword = {parallel I/O algorithms, sorting, pario bib}, comment = {Summary is nodine:sort.} } @InProceedings{nodine:loadbalance, author = {Mark H. Nodine and Jeffrey Vitter}, title = {Load Balancing Paradigms for Optimal Use of Parallel Disks and Parallel Memory Hierarchies}, booktitle = {Proceedings of the 1993 DAGS/PC Symposium}, year = {1993}, month = {June}, pages = {26--39}, organization = {Dartmouth Institute for Advanced Graduate Studies}, address = {Hanover, NH}, keyword = {parallel I/O algorithm, memory hierarchy, load balance, sorting, pario bib}, abstract = {We present several load balancing paradigms pertinent to optimizing I/O performance with disk and processor parallelism. We use sorting as our canonical application to illustrate the paradigms, and we survey a wide variety of applications in computational geometry. The use of parallel disks can help overcome the I/O bottleneck in sorting if the records in each read or write are evenly balanced among the disks. There are three known load balancing paradigms that lead to optimal I/O algorithms: using randomness to assign blocks to disks, using the disks predominantly independently, and deterministically balancing the blocks by matching. In this report, we describe all of these techniques in detail and compare their relative advantages. We show how randomized and deterministic balancing can be extended to provide sorting algorithms that are optimal both in terms of the number of I/Os and the internal processing time for parallel-processing machines with scalable I/O subsystems and with parallel memory hierarchies. We also survey results achieving optimal performance in the these models for a large range of online and batch problems in computational geometry.}, comment = {Invited speaker: Jeffrey Vitter.} } @InProceedings{nodine:sort, author = {Mark H. Nodine and Jeffrey Scott Vitter}, title = {Large-Scale Sorting in Parallel Memories}, booktitle = {Proceedings of the 3rd Annual ACM Symposium on Parallel Algorithms and Architectures}, year = {1991}, pages = {29--39}, keyword = {external sorting, file access pattern, parallel I/O, pario bib}, comment = {Describes algorithms for external sorting that are optimal in the number of I/Os. Proposes a couple of fairly-realistic memory hierarchy models.} } @TechReport{nodine:sort2, author = {Mark H. Nodine and Jeffrey Scott Vitter}, title = {Optimal Deterministic Sorting in Parallel Memory Hierarchies}, year = {1992}, month = {August}, number = {CS--92--38}, institution = {Brown University}, note = {Submitted.}, keyword = {parallel I/O algorithms, parallel memory, sorting, pario bib} } @TechReport{nodine:sortdisk, author = {Mark H. Nodine and Jeffrey Scott Vitter}, title = {Optimal Deterministic Sorting on Parallel Disks}, year = {1992}, month = {August}, number = {CS--92--08}, institution = {Brown University}, note = {Submitted.}, keyword = {parallel I/O algorithms, sorting, pario bib} } @TechReport{ogata:diskarray, author = {Mikito Ogata and Michael J. Flynn}, title = {A Queueing Analysis for Disk Array Systems}, year = {1990}, number = {CSL-TR-90-443}, institution = {Stanford University}, keyword = {disk array, performance analysis, pario bib}, comment = {Fairly complex analysis of a multiprocessor attached to a disk array system through a central server that is the buffer. Assumes task-oriented model for parallel system, where tasks can be assigned to any CPU; this makes for an easy model. Like Reddy, they compare declustering and striping (they call them striped and synchronized disks).} } @Article{olson:random, author = {Thomas M. Olson}, title = {Disk Array Performance in a Random {I/O} Environment}, journal = {Computer Architecture News}, year = {1989}, month = {September}, volume = {17}, number = {5}, pages = {71--77}, keyword = {I/O benchmark, transaction processing, pario bib}, comment = {See wolman:iobench. Used IOBENCH to compare normal disk configuration with striped disks, RAID level 1, and RAID level 5, under a random I/O workload. Multiple disks with files on different disks gave good performance (high throughput and low response time) when multiple users. Striping ensures balanced load, similar performance. RAID level 1 or level 5 ensures reliability