% % (Compiled by) 1993 % % Wolfgang Schreiner % Research Institute for Symbolic Computation (RISC-Linz) % Johannes Kepler University, A-4040 Linz, Austria % % PLEASE DO NOT REMOVE THIS NOTICE. % @inproceedings{Abra85, author = {Abramski, Samson}, title = {{Strictness Analysis and Polymorphic Invariance (Extended Abstract)}}, booktitle = {Programs as Data Objects, Proceedings of a Workshop}, address = {Copenhagen, Denmark, October 17--19}, year = {1985}, volume = {217}, series = {Lecture Notes in Computer Science}, publisher = {Springer, Berlin}, editor = {Ganzinger, H. and Jones, N. D.}, pages = {1--23}, abstract = {In this paper an approach to strictness analysis for polymorphic functions is developed.}, keywords = {Strictness Analysis, Abstract Interpretation}, } @techreport{ACM88, author = {Arvind and Culler, David E. and Maa, Gino K.}, title = {{Assessing the Benefits of Fine-grained Parallelism in Dataflow Programs}}, type = {Computation Structures Group Memo}, number = {279}, institution = {Laboratory for Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, MA}, month = {March}, year = {1988}, abstract = {A method for assessing the benefits of fine-grain parallelism in ``real'' programs is presented. The method is based on {\em parallelism profiles\/} and {\em speedup curves\/} derived by executing dataflow graphs on an interpreter under progressively more realistic assumptions about processor resources and communication costs. It is shown that programs, even using traditional algorithms, exhibit ample parallelism when parallelism is exposed at all levels, i.e., within expressions, across nested loops and function calls, and in producer-consumer relationships on individual elements of data structures. Since we only consider dataflow graphs compiled from the high-level language Id, the bias introduced by the language and the compiler is examined. A method of estimating speedup trough analysis of the ideal parallelism profile is developed, avoiding repeated execution of programs. It is shown that fine-grain parallelism can be used to mask large, unpredictably memory latency and synchronization waits in architectures employing dataflow instruction execution mechanisms. Finally, the effects of grouping portions of dataflow programs, such as function invocations or loop iterations, and requiring that the operators in a group execute on a single processor, are explored.}, keywords = {Dataflow, Id, Parallelism Profiles}, } @inproceedings{AHN88, author = {Arvind and Heller, Steve and Nikhil, Rishiyur S.}, title = {{Programming Generality and Parallel Computers}}, booktitle = {Fourth International Symposium on Biological and Artificial Intelligence Systems}, address = {Trento, Italy, September 18--22}, year = {1988}, note = {Also: Computation Structures Group Memo 287, Laboratory for Computer Science, Massachusetts Institute of Technology, Cambridge, MA}, abstract = {In recent years, many ``parallel, general-purpose'' computers have become available, using various interconnections of processors and memories. However, extensions to conventional languages to program these machines have made programming significantly more complex --- a clear regression in software technology. The user now has the additional problem of partitioning his program into parallel parts, and often this is very machine-specific. Further, it is no longer easy to write determinate programs, making debugging a nightmare. In this paper we present an alternative approach. Id is a declarative, implicitly parallel language that simultaneously raises the level of programming and reveals much more parallelism than is possible with programmer annotations. Determinacy is guaranteed by the language semantics. We demonstrate this by developing a program in Id for a non-trivial problem called the ``paraffins problem'', and examining the available parallelism, after the fact.}, keywords = {Dataflow, Id, I-Structures}, } @techreport{Alli87, author = {Allison, Lloyd}, title = {{Two Functional Programming Techniques: Continuations, Circular Programs}}, institution = {Department of Computer Science}, address = {Monash University, Clayton, Australia}, month = {January}, year = {1987}, number = {87/91}, abstract = {Continuations are used in denotational semantics to describe control commands such as jumps. Here it is shown how they can be used as a programming technique to simulate backtracking and coroutines. A {\em circular program\/} creates a data structure where the computation of new components depends on existing components. Examples arise where a list is to be created in which later elements depend on earlier ones. Such programs can avoid creating garbage in the form of intermediate lists, bringing some of the efficiency of imperative programming to functional programming. The technique can sometimes be used in an imperative language to create an elegant program in a functional style.}, keywords = {Continuation, Backtracking, Coroutine, Non-Determinism}, } @article{Alli89a, author = {Allison, Lloyd}, title = {{Circular Programs and Self-referential Structures}}, journal = software, volume = {19}, number = {2}, pages = {99-109}, year = {1989}, abstract = {A {\em circular program\/} creates a data structure whose computation depends upon itself or refers to itself. The technique is used to omplement the classic data structures circular and doubly-linked lists, threaded trees and queues, in a functional programming language. These structures are normally thought to require updateable variables found in imperative languages. For example, a functional program to perform the breadth-first traversal of a tree is given. Some of the examples result in circular data structures when evaluated. Some examples are particularly space-efficient by avoiding the creation of intermediate temporary structures which would otherwise later become garbage. Lastly, the technique can be applied in an imperative language to give an elegant program.}, keywords = {Circular Program}, } @techreport{Alli89b, author = {Allison, Lloyd}, title = {{Applications of Recursively Defined Data Structures}}, institution = {Department of Computer Science}, address = {Monash University, Clayton, Australia}, month = {February}, year = {1989}, number = {88/119}, abstract = {A {\em circular program\/} contains a data structure whose definition is self-referential or recursive. Use of such definition allows efficient functional programs to be written that would not otherwise be possible. A novel primes program and applications to the Fibonacci numbers, to Ackermann's function and to constraint-satisfaction problems are given.}, keywords = {Circular Programs, Streams}, } @inproceedings{ANP87, author = {Arvind and Nikhil, Rishiyur S. and Pingali, Keshav K.}, title = {{I-Structures: Data Structures for Parallel Computing}}, booktitle = {Graph Reduction, Proceedings of a Workshop}, address = {Santa Fe, New Mexico, September 29 -- October 1}, year = {1986}, volume = {279}, series = {Lecture Notes in Computer Science}, editor = {Fasel, Joseph H. and Keller, Robert M.}, publisher = {Springer, Berlin}, pages = {336--369}, note = {Also: Computation Structures Group Memo 269, Laboratory for Computer Science, Massachusetts Institute of Technology, Cambridge, MA}, abstract = {It is difficult simultaneously to achieve elegance, efficiency and parallelism in functional programs that manipulate large data structures. We demonstrate this through careful analysis of program examples using three common functional data-structuring approaches --- lists using {\tt Cons} and arrays using {\tt Update} (both fine-grained operators), and arrays using {\tt make-array} (a ``bulk'' operator). We then present I-structures as an alternative, defining precisely the parallel operational semantics of Id, a language with I-structures. We show elegant, efficient and parallel solutions for the program examples in Id. I-structures make the language non-functional, but do not raise determinacy issues. Finally, we show that even in the context of purely functional languages, I-structures are invaluable for implementing functional data abstractions.}, keywords = {Dataflow, I-structures}, } @inproceedings{ArAr89, author = {Ariola, Zena and Arvind}, title = {{P-TAC: A Parallel Intermediate Language}}, booktitle = {{FPCA '89, Functional Programming Languages and Computer Architecture}}, address = {London, UK, September 11--13}, year = {1989}, pages = {230--242}, publisher = {ACM Press, New York}, abstract = {P-TAC is an intermediate-level language designed to capture the sharing of computation. It is a more suitable internal language for functional language compilers than the {\em $\lambda$-calculus or combinators\/}, especially for compiler optimizations. Using P-TAC, a proof for the confluence of Id, a higher-order functional language augmented with I-structures, is given. Using the notion of observational congruence the correctness of some compiler optimizations is shown.}, keywords = {Compilation, Target Language}, } @techreport{ArCu87, author = {Arvind and Culler, David E.}, title = {{Resource Requirements of Dataflow Programs}}, type = {Computation Structures Group Memo}, number = {280}, institution = {Laboratory for Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, MA}, month = {December}, year = {1987}, abstract = {Parallel execution of programs requires more resources and more complex resource management than sequential execution. If concurrent tasks can be spawned dynamically, programs may require an inordinate amount of resources when the potential parallelism in the program is much greater than the amount of parallelism the machine can exploit. We describe {\em loop bounding\/}, a technique for dynamically controlling the amount of parallelism exposed in dataflow programs. The effectiveness of the technique in reducing token storage requirements is supported by experimental data in the form of {\em parallelism profiles\/} and {\em waiting-token profiles\/}. Comparisons are made throughout with more conventional approaches to parallel computing.}, keywords = {Dataflow, Resource Allocation, Parallelism Profiles}, } @techreport{ArEk87, author = {Arvind and Ekanadham, Kattamuri}, title = {{Future Scientific Programming on Parallel Machines}}, type = {Computation Structures Group Memo}, number = {272}, institution = {Laboratory for Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, MA}, month = {March}, year = {1987}, abstract = {A language for large scientific applications should facilitate encoding and debugging of programs at the highest level possible. At the same time it should facilitate generation of efficient code for parallel machines. Often these two requirements are conflicting, and trade-offs must be made. Functional and other declarative languages offer relief on both counts. The use of higher-order functions, especially in curried forms, can raise the level of programming dramatically. In addition, such languages often have straightforward operation semantics, thereby providing tremendous opportunities for parallel execution. Programs written in declarative languages thus eliminate the problem of ``detecting parallelism''. This paper illustrates programming in one such language, Id Nouveau, and contrasts it with programming in Fortran. Using an excerpt from an application known as Simple, it is shown how a program can be composed in Id Nouveau from small functions that directly relate to the mathematical and physical concepts of the problem. The difficulty of expressing these concepts in Fortran is discussed. Finally, it is shown that by performing simple transformations, such as in-line substitution of functions, the resulting Id Nouveau code becomes as efficient as an equivalent Fortran program written to run efficiently on a parallel machine.}, keywords = {Dataflow, Id, Scientific Programming}, } @inproceedings{ArNi87, author = {Arvind and Nikhil, R. S. }, title = {{Executing a Program on the MIT Tagged-Token Dataflow Architecture}}, booktitle = {PARLE '87, Parallel Architectures and Languages Europe, Volume 2: Parallel Languages}, address = {Eindhoven, The Netherlands, June 15--19}, year = {1987}, volume = {259}, editor = {de Bakker, J. W. and Nijman, A. J. and Treleaven, P. C.}, series = {Lecture Notes in Computer Science}, publisher = {Springer, Berlin}, note = {Also: Computation Structures Group Memo 271, Laboratory for Computer Science, Massachusetts Institute of Technology, Cambridge, MA}, abstract = {The MIT Tagged-Token Dataflow project has an unconventional, but integrated approach to general-purpose high-performance parallel computing. Rather than extending conventional sequential languages, we use Id, a high-level language with fine-grained parallelism and determinacy implicit in its operational semantics. Id programs are compiled to dynamic dataflow graphs, a parallel machine language. Dataflow graphs are directly executed on the MIT Tagged-Token Dataflow Architecture (TTDA), a novel multiprocessor architecture. Dataflow research has advanced significantly in the last few years; in this paper we provide an overview of our current thinking, by describing example Id programs, their compilation to dataflow graphs, and their execution on the TTDA. Finally, we describe related work and the status of our project.}, keywords = {Compilation, Dataflow, Id}, } @inproceedings{AbSy85, author = {Abramski, S. and Sykes, R.}, title = {{Secd-m: A Virtual Machine for Applicative Programming}}, editor = {Jouannaud, Jean-Pierre}, booktitle = {{FPCA '85, Functional Programming Languages and Computer Architecture}}, address = {Nancy, France, September 16--19}, year = {1985}, pages = {81--98}, series = {Lecture Notes in Computer Science}, volume = {201}, publisher = {Springer, Berlin}, abstract = {We present a virtual machine to support {\em applicative multiprogramming\/} --- the description of concurrent, asynchronous systems such as operating systems in a functional style. The machine extend's Landin's secd machine to support multiple concurrent expression evaluation, non-determinism in the form of the fair merge, and a full range of input and output devices. This allows system programs to be written in a functional style. The secd-m machine has been implemented and a number of functional concurrent programs demonstrated.}, keywords = {SECD, Abstract Machine, Operating System}, } @article{AsWa77, author = {Ashcroft, E. A. and Wadge, W. W.}, title = {{Lucid, a Nonprocedural Language with Iteration}}, journal = cacm, volume = {20}, number = {7}, month = {July}, year = {1977}, pages = {519--526}, abstract = {Lucid is a formal system in which programs can be written and proofs of programs carried out. The proofs are particularly easy to follow and straight-forward to produce because the statements in a Lucid program are simply axioms from which the proof proceeds by (almost) conventional logic reasoning, with the help of a few axioms and rules of inference for the special lucid functions. As a programming language, Lucid is unconventional because, among other things, the order of statements is irrelevant and assignment statements are equations. Nevertheless, Lucid programs need not look much different than iterative programs in a conventional structured programming language using assignment and conditional statements and loops.}, keywords = {Dataflow, Lucid, Formal Systems, Semantics, Proving}, } @inproceedings{AuJo89, author = {Augustsson, Lennart and Johnsson, Thomas}, title = {{Parallel Graph Reduction with the $\langle{v,G}\rangle$-machine}}, booktitle = {{FPCA '89, Functional Programming Languages and Computer Architecture}}, address = {London, UK, September 11--13}, year = {1989}, pages = {202--213}, publisher = {ACM Press, New York}, abstract = {We have implemented a parallel graph reducer on a commercially available shared memory multiprocesor (a Sequent Symmetry) that achieves real speedup compared to a fast compiled implementation of the conventional G-machine. Using 15 procesors, this speedup ranges between 5 and 11 depending on the program. Underlying the implementation is an abstract machine called the $\langle{v,G}\rangle$-machine. We describe the sequential and the parallel $\langle{v,G}\rangle$-machine, and our implementation of them. We provide performance and speedup figures and graphs.}, keywords = {Programmed Graph Reduction}, } @article{BBKR89, author = {Bevan, D. I. and Burn, G. L. and Karia, R. J. and Robson, J. D.}, title = {{Principles For the Design of a Distributed Memory Architecture for Parallel Graph Reduction}}, year = {1989}, journal = compj, volume = {32}, number = {5}, pages = {461--469}, abstract = {Many models for the parallel reduction of lazy functional languages have been proposed in the literature. The one we have chosen to implement is based on evaluation transformers. An evaluation transformer says how much evaluation can be done to an argument expression in a function application, given the amount of evaluation that can be done to the application. Rather than just selecting a distributed memory architecture and trying to support parallel graph reduction, we investigate the implication of a minimally specified distributed memory architecture for parallel graph reduction. The results of the investigation are incorporated into an abstract machine which is able to support the communication and synchronisation needs of the parallel reduction model on a distributed memory architecture. Certain flags are needed on the nodes in the program graph in order to support the model. These are motivated and described.}, keywords = {Parallel Graph Reduction, Evaluation Transformers}, } @inproceedings{BDMT86, author = {Boyle, James M. and Dritz, Kenneth, W. and Muralidharan, M. N. and Taylor, Robert J.}, title = {{Deriving Sequential and Parallel Programs from Pure Lisp Specifications by Program Transformation}}, booktitle = {Program Specification and Transformation}, address = {Bad T{\"o}lz, Germany, April 15--17}, year = {1986}, pages = {2--19}, publisher = {North Holland}, abstract = {We describe an ``industrial strength'' example of the use of specification and transformation to produce efficient programs. The specification is written in pure Lisp. Program transformations have been used to generate from it a program that executes efficiently on a wide variety of sequential machines. Another set of transformations has been used to generate programs from it that execute efficiently on global-memory parallel machines. We discuss some of the design descisions for the parallel version (and their motivations), optimization of parallel programs, and the benefits of the specification-transformation approach.}, keywords = {Program Transformation, Parallelization, Lisp}, } @inproceedings{BeBe90, author = {Berry, Gerard and Boudol, Gerard}, title = {{The Chemical Abstract Machine}}, booktitle = {{Seventeenth Annual ACM Symposium onn Principles of Programming Languages}}, address = {San Francisco, California, January 17--19}, year = {1990}, publisher = {ACM Press, New York}, pages = {81--94}, abstract = {We introduce a new kind of abstract machine based on the chemical metaphor used in the $\Gamma$ language of Banatre \& al. States of a machine are chemical solutions where floating molecules can interact according to reaction rules. Solutions can be stratified by encapsulating subsolutions within membranes that force reactions to occur locally. We illustrate the use of this model by describing the operational semantics of the TCCS and CCS process calculi. We also show how to extract a higher-order concurrent $\lambda$-calculus out of the basic concepts of the chemical abstract machine.}, keywords = {Abstract Machine}, } @article{Bird84, author = {Bird, R. S.}, title = {{Using Circular Programs to Eliminate Multiple Traversals of Data}}, journal = acta, year = {1984}, volume = {21}, pages = {239-250}, abstract = {This paper describes a technique for transforming functional programs that repeatedly traverse a data structure into more efficient alternatives that do not. The transformation makes essential use of lazy evaluation and local recursion (such as provided by letrec, or its equivalent) to build a circular program that, on one pass over the structure, determines the effects of the individual traversals and the combines them.}, keywords = {Circular Programs}, } @article{BHY88, author = {Bloss, Adrienne and Hudak, P. and Young, J.}, title = {{An Optimizing Compiler for a Modern Functional Language}}, journal = compj, volume = {31}, number = {6}, year = {1988}, pages = {152--161}, abstract = {One of the factors hindering the use of functional languages has been their relatively poor performance in comparison to more traditional languages such as C and Pascal. During the last decade tremendous progress has been made in building implementations of functional languages but the approaches adopted have employed specialist hardware and/or compiler optimizations that have been developed specifically for functional languages. Building specialist hardware may be the best long-term solution but in the short run it is possible to increase the use and acceptance of functional languages by exploiting the performance of commercially available machines. The goal of this project described in this paper has been to design an optimizing compiler that produces fast code for functional languages on conventional sequential and parallel machines.}, keywords = {Compilation, Strictness Analysis, Termination Analysis, Destructive Aggregate Updating}, } @techreport{BMN*88, author = {Brandenburg, Joe and Mirashrafi, Mojy and McNamara, Jean and Billstrom, David and Gilbert, Eric and Ferguson, Mark and Sizer, Andrew and Alleyne, Paul}, title = {{Advances in Lisp for the iPSC/2}}, institution = {Intel Scientific Computers}, year = {1988}, type = {Technical Report}, address = {Beaverton, Oregon}, abstract = {A complete redesign of Common Lisp for the Intel iPSC/2 sytem resulted in a new programming model based on multiple threads of execution, substantial performance gains exceeding all previously published timings for gabriel benchmarks, and a flexible user interface to manage the complexity of concurrently executing Lisp environments. Although multiple thread execution is not an unknown model of processing, the integration of threads with existing iPSC Lisp constructs provided new insight into certain concurrent applications. The performance of iPSC/2 Lisp is shown to be significantly faster than PSL on a Cray-XMP supercomputer, for two of the largest Gabriel benchmarks, iPSC/2 Lisp is shown to be the highest performance Lisp system available, commercially or in academia. The user interface allows direction and redirection of keyboard input, program output, and the handling of errors to multiple screens --- important for large concurrent programs. Specific information is provided on iPSC/2 Lisp performance on the Gabriel Benchmarks; on the differences between iPSC/2 Lisp and CCLISP for the iPSC/2; and on the flexibility of the new user interface. Examples are given for each of the new constructs, user interface, and the performance.}, keywords = {Lisp, Explicit Parallelism}, } @inproceedings{BNA91, author = {Barth, Paul S. and Nikhil, Rishiyur S. and Arvind}, title = {{M-Structures: Extending a Parallel, Non-strict, Functional Language with State}}, booktitle = {{Functional Programming Languages and Computer Architectures}}, address = {Harvard, Massachusetts}, year = {1991}, editor = {Hughes, John}, series = {Lecture Notes in Computer Science}, volume = {523}, publisher = {Springer, Berlin}, pages = {538--568}, abstract = {Functional programs are widely appreciated for being ``declarative'' (algorithms are not {\em over-specified\/}) and implicitly parallel. However, when modeling state, both properties are compromised because the state variables must be ``threaded'' through most functions. Further, state ``updates'' may require much copying. A solution is to introduce assignments, as in ML and Scheme; however, for meaningful semantics, they resort to strict, sequential evaluation. We present an alternative approach called {\em M-structures\/}, which are imperative data structures with implicit synchronization. M-structures are added to Id, a parallel non-strict functional language. We argue that some programs with M-structures improve on their functional counterpart in three ways; they are (1) more {\em declarative\/}; (2) more {\em parallel\/}; and (3) more {\em storage efficient\/}. Points (1) and (2) contradict conventional wisdom. We study two problems: histogramming a tree, and graph traversal, comparing functional and M-structure solutions.}, keywords = {State, Updatable arrays, Non-determinism}, } @inproceedings{Burn87, author = {Burn, Geoffrey L.}, title = {{Evaluation Transformers --- A Model for the Parallel Evaluation of Functional Languages (Extended Abstract)}}, booktitle = {{Functional Programming Languages and Computer Architecture}}, editor = {Kahn, Gilles}, address = {Portland, Oregon, USA, September 14--16}, year = {1987}, pages = {446--470}, series = {Lecture Notes in Computer Science}, volume = {274}, publisher = {Springer, Berlin}, abstract = {If we are not careful, a parallel machine may become swamped with the computation of the expressions whose values are not needed in order to produce the result from a program. We give a semantic criterion which ensures that this does not happen. An abstract interpretation can be developed which gives the definedness of a function in terms of the definedness of its arguments. Traditionally this has been used to give a {\em strictness analysis\/} which has been interpreted to say how much the argument in an application can be evaluated in parallel with the application while still satisfying the semantic criterion. Strictness analysis however only takes into account local information. By taking into account contextual information of an application, we are able to find that in many cases more evaluation of the argument is allowed. This information is available from the same abstract interpretation that is used for strictness analysis. Given how much evaluation is allowed of a function application, an {\em evaluation transformer\/} says how much evaluation of the argument in the application is allowed. This leads to a natural model of the parallel evaluation of functional languages.}, keywords = {Strictness Analysis, Abstract Interpretation, Evaluation Transformers}, } @techreport{Burn88a, author = {Burn, G. L.}, title = {{A Shared Memory Parallel G-machine Based on the Evaluation Transformer Model of Computation}}, type = {Draft}, month = {November}, year = {1988}, institution = {GEC Hirst Research Centre}, address = {Wembley, UK}, abstract = {The evaluation transformer model of parallel reduction is a natural extension of lazy evaluation. It is therefore sensible to use the G-machine as a basis for defining a parallel machine supporting the model. Information from the model is compiled into the code for the parallel G-machine, and uses a number of special instructions defined for creation and sysnchronization of subtasks. The instructions from the sequential machine remain essentially unchanged. Besides the evaluation transformer model, we incorporate a number of important concepts into our definition that are not present in other definitions of parallel G-machines. A full definition of the compilation schemes and transition rules for the instructions are given in an appendix.}, keywords = {Parallel Graph Reduction, Evaluation Transformers, G-Machine}, } @inproceedings{BuGu85, author = {Bush, V. J. and Gurd, J. R.}, title = {{Transforming Recursive Programs for Execution on Parallel Machines}}, editor = {Jouannaud, Jean-Pierre}, booktitle = {{FPCA '85, Functional Programming Languages and Computer Architecture}}, address = {Nancy, France, September 16--19}, year = {1985}, pages = {350--367}, series = {Lecture Notes in Computer Science}, volume = {201}, publisher = {Springer, Berlin}, abstract = {A linear recursive scheme makes just one recursive call; when it is executed, the resulting computation is sequential. Under certain conditions, there is an equivalent double recursive (bilinear) scheme which contains two independent recursive calls. The computation corresponding to this is parallel; the recursion causes a tree-like expansion of function calls. This recursive form is useful for exploiting the large amounts of parallelism available in dataflow and reduction machine architectures. On the other hand, the number of processors in such a parallel machine may be limited, in which case it might be desirable to convert a bilinear scheme to linear form by reversing the direction of the above transformation. In fact, the optimum strategy may be to transform dynamically between the two recursive forms, depending on the run-time availability of parallel processing resources. This paper describes the conditions under which linear and bilinear recursive forms are equivalent and demonstrates the importance and utility of establishing such a correspondence.}, keywords = {FP, Recursion, Parallelization}, } @inproceedings{BlHu86, author = {Bloss, Adrienne and Hudak, Paul}, title = {{Variations on Strictness Analysis}}, booktitle = {{1986 ACM Symposium on Lisp and Functional Programming}}, address = {Cambridge, Massachusetts, August 4--6}, year = {1986}, pages = {132--142}, publisher = {ACM Press, New York}, abstract = {This paper discusses the analysis of first-order functional programs in order to answer the following four questions: what definitely will be evaluated after some expression $e$ is evaluated, what definitely won't be evaluated after $e$ is evaluated, what definitely was evaluated before $e$ was evaluated, what definitely wasn't evaluated before $e$ was evaluated.}, keywords = {Strictness Analysis}, } @article{BHA86, author = {Burn, Geoffrey L. and Hankin, Chris and Abramsky, Samson}, title = {{Strictness Analysis for Higher-Order Functions}}, journal = {Science of Computer Programming}, volume = {7}, year = {1986}, pages = {249--278}, note = {Also: Graph Reduction, Proceedings of a Workshop, volume 217 of Lecture Notes of Computer Science, pages 42--63, Springer, Berlin, 1985.}, abstract = {Abstract Interpretation is a compile-time technique which is used to gain information about a program that may then be used to optimize the execution of the program. A particular use of abstract interpretation is the strictness analysis of functional programs. This provides the key to the exploitation of parallelism in the evaluation of programs written in functional languages. In a language that has lazy semantics, the main potential for parallelism arises in the evaluation of operands of strict operators. A function is strict in an argument if its value is undefined whenever the argument is undefined. If we can use strictness analysis to detect which arguments a function is strict in, we then know that these arguments can be safely evaluated in parallel because this will not affect the lazy semantics. Experimental results suggest that this leads to significant speedups. Mycroft was the first person to apply abstract interpretation to the strictness analysis of functional programs. His framework only applies to first-order functions on flat domains. Many workers have proposed practical approaches to strictness analysis of higher-order functions over flat base domains but their work has not been accompanied by extensions to Mycroft's theoretical framework. In this paper, we give sound mathematical foundations for this work and discuss some of the practical issues involved. The practical approach is proved correct in relation to the theoretical framework.}, keywords = {Strictness Analysis, Abstract Interpretation}, } @techreport{Burn90x, author = {Burn, Geoffrey}, title = {{A New Domain For Head-Strictness}}, institution = {Department of Computing}, address = {Imperial College, London, UK}, month = {January}, year = {1990}, abstract = {A new domain is introduced that captures the notion of head-strictness in functional programming languages.}, keywords = {Strictness Analysis, Abstract Interpretation}, } @inproceedings{Burn90a, author = {Burn, Geoffrey L.}, title = {{A Relationship Between Abstract Interpretation and Projection Analysis (Extended Abstract)}}, booktitle = {POPL Symposium on Principles of Programming Languages, San Francisco, CA, January 17--19}, year = {1990}, abstract = {Abstract interpretation and projection analysis are two techniques for finding out information about lazy functional programs. Two typical uses of these techniques are speeding up sequential implementations, and the introduction of parallelism into parallel implementations. Our main result is the proof of a relationship between a certain class of projections and a certain class of abstract interpretations. One of the claims of projection analysis is that it can find out information about head-strictness, whilst abstract interpretation cannot. We show that there are at least two intuitive notions of head-strictness, and that one of them can be determined using abstract interpretation.}, keywords = {Strictness Analysis, Abstract Interpretation, Projection Analysis}, } @inproceedings{Burn90b, author = {Burn, Geoffrey L.}, title = {{The Evaluation Transformer Model of Reduction and Its Correctness}}, booktitle = {TAPSOFT 91, Brighton, UK, April 9-12}, note = {Also: Technical Report DOC 90/19, Department of Computing, Imperial College, London}, year = {1991}, abstract = {Lazy evaluation of functional programs incurs time and memory overheads, and restricts parallelism, when compared with programs that are evaluated strictly. A number of analysis techniques, such as abstract interpretation and projection analysis, have been developed to find out information that can be used to alleviate these overheads. This paper formalises an evaluation model, the evaluation transformer model of reduction, which can use information form these analysis techniques, and proves that the resulting reduction strategies produce the same answers as those obtained using lazy evaluation.}, keywords = {Strictness Analysis, Evaluation Transformers}, } @inproceedings{Burn90c, author = {Burn, Geoffrey L.}, title = {{Using Projection Analysis in Compiling Lazy Functional Programs}}, booktitle = {ACM Conference on Lisp and Functional Programming}, address = {Nice France, June 27--29}, year = {1990}, pages = {227--241}, publisher = {ACM Press, New York}, abstract = {Projection Analysis is a technique for finding out information about lazy functional programs. We show how the information obtained from this analysis can be used to speed up sequential implementations, and introduce parallelism into parallel implementations. The underlying evaluation model is evaluation transformers, where the amount of evaluation that is allowed of an argument in a function application dependsss on the amount of evaluation allowed to the application. We prove that the transformed programs preserve the semantics of the original programs. Compilation rules, which encode the information from the analysis, are given for sequential and parallel machines.}, keywords = {Strictness Analysis, Projection Analysis, Evaluation Transformers}, } @article{Burn91a, author = {Burn, Geoffrey L.}, title = {{Implementing the Evaluation Transformer Model of Reduction on Parallel Machines}}, journal = {Journal of Functional Programming}, volume = {1}, number = {2}, month = {April}, year = {1991}, note = {Also Technical Report DOC 90/24, Department of Computing, Imperial College, London, UK}, abstract = {The evaluation transformer model of reduction generalises lazy evaluation in two ways: it can start the evaluation of expressions before their first use, and it can evaluate expressions further than weak head normal form. Moreover, the amount of evaluation required of an argument to a function may depend on the amount of evaluation required of the function application. It is a suitable candidate model for implementing lazy functional languages on parallel machines. In this paper we explore the implementation of lazy functional languages on parallel machines, both shared and distributed memory architectures, using the evaluation transformer model of reduction. We will see that the same code can be produced for both styles of architecture, and the definition of the instruction set is virtually the same for each style. The essential difference is that a distributed memory architecture has one extra node type for non-local pointers and instructions which involve the value of such nodes need their definitions extended to cover this new type of node. To make our presentation accessible, we base our description on a variant of the well-known G-machine, an abtract machine for executing lazy functional programs.}, keywords = {Strictness Analysis, Evaluation Transformers}, } @Article{Burt84, author = {Burton, F. Warren}, title = {{Annotations to Control Parallelism and Reduction Order in the Distributed Evaluation of Functional Programs}}, journal = toplas, year = {1984}, volume = {6}, number = {2}, pages = {159--174}, month = {April}, abstract = {When evaluating a functional program on a network of processors, it is necessary to decide when parallelism is desirable, which work may be transferred to another processor, and in what form the work is to be transferred. If the wrong decisions are made, a parallel evaluation may require asymptotically more time than a sequential evaluation, owing to communication costs. The introduction of annotations to give the programmer control over the above decisions is proposed. The annotations and their effects are definied in terms of the lambda calculus. Each application must have one of three annotations. No other annotations are required. Examples and possible extensions to this work, including annotated combinators, are briefly considered.}, keywords = {Para-Functional Programming, Annotations}, } @article{Burt88, author = {Burton, F. W.}, title = {{Nondeterminism with Referential Transparency in Functional Programming Languages}}, journal = compj, volume = {31}, number = {3}, year = {1988}, pages = {243--247}, abstract = {Procedural programmers have found nondeterminism to be unavoidable in many parallel programs (e.g.\ operating systems). If functional programming languages are to be usable as general-purpose parallel-programming languages then provision of some nondeterministic language constructs appears to be necessary. Existing constructs for nondeterminism are not functional and are not compatible with the mathematical foundations of functional programming, which require that the value of a function be uniquely determined by the values of its arguments. We propose to solve this problem by placing the nondeterminism in pseudo-data. A program is passed an infinite tree of two-valued decisions, along with its input. These decisions may be fixed at run time, thereby permitting nondeterminism. Once fixed, a decision remains unchanged, so equivalent expressions must always have the same value. The approach generalizes so that a program can make use of other run-time information such as the current time or the current amounts of available storage.}, keywords = {Non-Determinism, Referential Transparency}, } @Article{Burt87, author = {Burton, F. W.}, title = {{Functional Programming for Concurrent and Distributed Computing}}, journal = compj, year = {1987}, volume = {30}, number = {5}, pages = {437--450}, abstract = {There are at least two approaches to the desing of languages for parallel computing. One approach is to use functional or relational languages which are easy to read, write, transform and verify. The more conventional approach is to use procedural langauges which give a programmer a high degree of control over the run-time behavior of a program. There is a need to reconcile these two approaches in a language which permits both simplicity and efficiency. We propose a small and simple set of annotations (or pragmas) to control the run-time behavior of a functional progrma. The annotations allow a programmer to use three forms of parameter passing. The parameter-passing mechanisms correspond to passing by name, value and need in a sequential language. In addition, in a distributed system a programmer can specify that work should be done on the current processor, an arbitrary processor, or a particular processor such as the one containing a specific data item. The annotatins cannot affect the meaning (result) of a functional program, except for causing non-termination in some cases (which we view as an extreme form of inefficiency). This separation of meaning from control allows a program to be both simple and efficient. Since non-determinism appears to be unavoidable without significant loss of efficiency in a concurrent system, the interaction of the proposed annotations with non-determinism is briefly considered. The run-time behavior of an annotated functional program is similar to that of a procedural programs using message passing, semaphores ore rendezvous to control communication and synchronization.}, keywords = {Para-functional programming, Annotations, Process Mapping, Non-determinism}, } @inproceedings{Chen86, author = {Chen, Marina C.}, title = {{Crystal: A Synthesis Approach to Programming Parallel Machines}}, booktitle = {First Conference on Hypercube Multiprocessors, Knoxville, Tennessee, August 26--27}, year = {1985}, editor = {Health, Michael T.}, pages = {87--107}, abstract = {This paper addresses two software issues in parallel processing: Fiirst, how to program parallel machines effectively, and second, how to realize the high performance promised by the parallel hardware technology. An overview of the language Crystal and a programming methodology for a class of computational problems are presented. Crystal provides high-level abstractions for the behavior of an ensamble of parallel processors, and is devised for the purpose of managing effectively the complexity of programming large parallel systems. To address the issue of effective utilization of hardware technologies, a design methodology is developed for syntehsizing efficient systolic algorithms and architectures from problem definitions.}, keywords = {Dataflow, Crystal}, } @techreport{ChGo91, author = {Chuang, Tyng-Ruey and Goldberg, Benjamin}, title = {{Backward Analysis for Higher-Order Functions Using Inverse Images}}, institution = {Department of Computer Science}, address = {New York University, NY}, month = {February}, year = {1991}, abstract = {We propose a method for performing backward analysis on higher-order functional programming languages based on computing inverse images of functions over abstract domains. The method can be viewed as abstract interpretation done backward. Given an abstract semantics which supports forward analysis, we can transform it into an abstract semantics which performs backward analysis. We show that if the original semantics is correct and computable, then the transformed version of the abstract semantics is also correct and computable. More specifically, given a forward abstract semantics of a higher-order functional language which is expressed in terms of Scott-closed powerdomains, we derive a backward abstraction semantics which is expressed in terms of Scott-open powerdomains. The derivation is shown to be correct and the relationships between forward analysis and backward analysiss is established. We apply this method to the classic strictness analysis in functional languages and obtain promising results. We show that the time complexity of inverse image based backward analysis is not much worse than the forward analysis from which it is derived. We then compare this work with previous works of Wadler, Hughes, and Burn, showing that some special restrictions and constructions in previous works have natural interpretation in the Scott-closed/Scott-open powerdomain framework. A brief outline of applying the inverse image method to other backward semantics analysis is also given.}, keywords = {Strictness Analysis, Abstract Interpretation, Backwards Analysis}, } @techreport{Clar90, author = {Clarke, Thomas}, title = {{The Semantics and Implementation of Aggregates or How to Express Concurrency without Destroying Determinism}}, institution = {Computer Laboratory}, address = {University of Cambridge}, month = {December}, year = {1990}, abstract = {Associative commutative operations lead to algorithms with concurrency which cannot be adequately represented by the pure lambda calculus. This paper investigates two classes of associative commutative operation which express extra concurrency, and shows that they preserve a simple denotational semantics. {\em Simple aggregates\/} encapsulate the evaluation order indeterminancy implicit in strict associative commutative operations and have no effect on semantics. {\em Early Readable Aggregates\/} encapsulate program values which are defined by a lower bound their denotations. The denotational join on ERA values is an associative commutative operation which can be used to express concurrency. Specific aggregates can be used to implement concurrent graph marking, time deterministic merge of lazy lists, and write-once locations.}, keywords = {Non-determinism, Aggregate}, } @inproceedings{ClPe85, author = {Clack, Chris and {Peyton Jones}, Simon L.}, title = {{Strictness Analysis --- A Practical Approach}}, booktitle = {{Functional Programming Languages and Computer Architecture}}, editor = {Jouannaud, Jean-Pierre}, address= {Nancy, France, September 16--19}, year = {1985}, volume = {201}, series = {Lecture Notes in Computer Science}, publisher = {Springer, Berlin}, pages = {35--49}, abstract = {Significant improvements in performance arise if we can arrange for parallel execution of programs. The absence of side effects in functional languages allows concurrent evaluation of the program, but in lazy implementations this risks wasting work by evaluating expressions which are subsequently discarded. We discuss the use of strictness analysis to determine at complie time which parts of program evaluation can safely be carried out concurrently. We give a practical explanation of this technique, concentrating particularly on the problem of finding fix points.}, keywords = {Strictness Analysis, Abstract Interpretation, Fixed Points}, } @inproceedings{Cole88, author = {Cole, M. I.}, title = {{A `Skeletal' Approach to the Exploitation of Parallel Computation}}, booktitle = {CONPAR 88}, address = {Manchester, UK, September 12--16}, year = {1988}, pages = {100--107}, volume = {C}, publisher = {British Computer Society}, abstract = {This paper introduces a new approach to the design of programming ystems which can exploit parallelism while maintaining a reasonable level of abstraction from the underlying hardware. This is based on the concept of ``algorithmic skeletons''. The abstract specification of four such skeletons are presented and a possible implementation on a grid of processing units is outlined.}, keywords = {Skeletons, Para-Functional Programming}, } @techreport{CoMo90, author = {Cooper, Eric C. and Morrisett, J. Gregory}, title = {{Adding Threads to Standard ML}}, number = {CMU-CS-90-186}, institution = {School of Computer Science}, address = {Carnegie Mellon University, Pittsburgh, PA}, month = {December}, year = {1990}, abstract = {We have added multiple threads of control to the Standard ML programming language. Stanford ML's support for first-class functions and automatic storage management influenced the design in a number of ways. We demonstrate how other concurrency and synchronization operations, such as cobegin/coend, futures, and events, can be implemented in terms of the thread interface. Finally, we describe three implementations of the thread interface: a coroutine version, a uniprocessor preemptive version, and a multiprocessor Mach-based version.}, keywords = {ML, Threads}, } @techreport{CRZ83, author = {Cohen, Shismon and Rosner, Roni and Zidon, Ari}, title = {{PARALISP Simulator (Reference Manual)}}, type = {Research Report}, number = {83-2}, institution = {Computer Science Department}, address = {Hebrew University, Jerusalem, Israel}, month = {January}, year = {1983}, abstract = {PLISP (``Parallel Lisp'') is the current implementation of ``Senile Parallel Execution of Lisp Programs'' (Lehmann and Cohen). PLISP may be thought of as a virtual machine designed to evaluate LISP expressions in a parallel fashion. The language of this virtual machine is PARALISP. This means, for example, that when a function application contains several formal arguments, these arguments may be evaluated in parallel, under the assumption that there are no dependencies, or side effects. Therefore, PARALISP is a version of LISP designed to enable parallel evaluation to occur without side effects.}, keywords = {Lisp}, } @inproceedings{DaRe81, author = {Darlington, John and Reeve, Mike}, title = {{ALICE --- A Multi-Processor Reduction Machine for the Parallel Evaluation of Applicative Languages}}, booktitle = {{FPCA '81, Functional Programming Languages and Computer Architecture}}, address = {Boston, Massachusetts}, year = {1981}, pages = {65--74}, publisher = {ACM Press, New York}, abstract = {In this paper we present a scheme for the parallel evaluation of a wide variety of applicative languages and provide an overview of the architecture of a machine on which it may be implemented.}, keywords = {Parallel Graph Reduction}, } @inproceedings{DKL*88, author = {Darlington, John and Kohshnevisan, Hessam and McLoughlin, Lee and Perry, Nigel and Pull, Hellen and Sephton, Keith and While, Lyndon}, title = {{An Introduction to the Flagship Programming Environment}}, booktitle = {CONPAR 88}, address = {Manchester, UK, September 12--16}, year = {1988}, pages = {75--86}, volume = {A}, publisher = {British Computer Society}, abstract = {The Flagship programming environment provides support for the development, implementation and modification of programs. Its novelty is that it is designed to support programming in a pure functional language and to give mechanical assistance to the process of deriving programs from high level specifications via correctness preserving transformations. The target for these programs is both the Flagship Parallel Machine and conventional parallel machines.}, keywords = {Program Transformation}, } @inproceedings{DRW89, author = {Darlington, J. and Reeve, M. and Wright, S.}, title = {{Programming Parallel Computer Systems using Functional Languages and Program Transformation}}, booktitle = {Parallel Processing '89}, address = {Leiden, The Netherlands}, year = {1989}, abstract = { One of the major problems associated with parallel computer systems is the economical production of software that fully exploits the hardware. Functional languages facilitate a formal methodology for the development of programs for parallel machines. This paper provides an overview of one program development methodology developed at imperial College, and presents a case study that illustrates its operation and quantifies its benefits using results of experiments carried out on the ALICE parallel machine.}, keywords = {Parrallel Graph Reduction}, } @manual{Dimi88, author = {Dimitrovsky, Isaac}, title = {{ZLISP 0.85 Reference Manual}}, organization = {Ultracomputer Research Laboratory}, address = {NewYork}, year = {1988}, month = {March}, abstract = {ZLISP is a portable parallel LISP system for shared memory MIMD supercomputers. It contains several extensions to LISP to support the writing of parallel programs. In the current version, all parallelism is explicit although this may change in the future.}, keywords = {Lisp, Futures}, } @techreport{EkAr87, author = {Ekanadham, Kattamuri and Arvind}, title = {{SIMPLE: Part 1 --- An Exercise in Future Scientific Programming}}, type = {Computation Structures Group Memo}, number = {273}, institution = {Laboratory for Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, MA}, month = {July}, year = {1987}, abstract = {Ideally, a high-level language should provide a way of writing abstractions which are as close to the problem domain as possible, as well as facilitate efficient implementations of these abstractions lest a user try to ``get underneath'' the abstractions. With the advent of parallel machines, a language such as FORTRAN fails on both counts. Functional and other declarative languages allegedly offer relief on both counts. The use of higher order functions, including the free use of curried forms, can dramatically raise the level of programming. In addition, such languages often have straight-forward operational semantics which admits tremendous opportunities for parallel execution. Programs in declarative languages, thus eliminate the problem of ``detecting parallelism''. In this report we have examined the first part of this claim by writing an application known as the SIMPLE code in a language called Id Nouveau. The issues of parallelism will be examined in a companion report, Part II, to be published later. We have presented a high-level description of the algorithms used in the hydrodynamics problem, called SIMPLE. Corresponding Id Noveau program fragments are derived from the mathematical descriptions. The emphasis is on the style of programming, and not on algorithmic cleverness. The resulting program has 550 lines and runs successfully in ID World, which is a compiler for Id Noveau together with an abstract simulation facility for a dataflow machine. This report presumes the knowledge of Id Noveau, functional programming and I-structures.}, keywords = {Scientific Programming, Dataflow, Id, I-Structures}, } @techreport{FeMi89, author = {Feeley, Marc and Miller, James S.}, title = {{A Parallel Virtual Machine for Efficient Scheme Compilation}}, institution = {Brandeis University}, address = {Waltham, Massachusetts}, year = {1989}, abstract = {Programs compiled by Gambit, our Scheme compiler, achieve performance as much as twice that of the fastest available Scheme compilers. Gambit is easily ported, while retaining its high performance, through the use of a simple virtual machine (PVM). PVM allows a wide variety of machine-independent optimizations and it supports parallel computation based on the {\tt future} construct. PVM conveys high-level information bidirectionally between the machine-independent front end of the compiler and the machine-dependent back end, making it easy to implement a number of common back end optimizations that are difficult to achieve for other virtual machines. PVM is similar to many real computer architectures and has an option to efficiently gather dynamic measurements of virtual machine usage. These measurements can be used in performance prediction for ports to other architectures as well as design decisions related to proposed optimizations and object representations.}, keywords = {Lisp, Futures, Virtual Machine}, } @article{FrWi78, author = {Friedman, Daniel P. and Wise, David S.}, title = {{Aspects of Applicative Programming for Parallel Processing}}, journal = i3ec, volume = {27}, number = {4}, month = {April}, year = {1978}, pages = {289--296}, abstract = {Early results of a project on compiling stylized recursion into stackless iterative code are reviewed as they apply to a target environment with multiprocessing. Parallelism is possible in executing the compiled image of argument evaluation (collateral argument evaluation in Algol 68), of data structure construction when suspensions are used, and of functional combinations. The last facility provides generally, concise expression for all operations performed in Lisp by mapping functions and in APL by typed operators; there are other uses as well.}, keywords = {Combinators}, } @inproceedings{GaLe87, author = {Gaudiot, J. L. and Lee, L. T.}, title = {{Multiprocessor Systems Programming in a High-Level Data-Flow Language}}, booktitle = {{PARLE '87, Parallel Architectures and Languages Europe, Volume 1: Parallel Architectures}}, editor = {de Bakker, J. W. and Nijman, A. J. and Treleaven, P. C.}, address = {Eindhoven, The Netherlands, June 15--19}, year = {1987}, pages = {134--151}, series = {Lecture Notes in Computer Science}, volume = {258}, publisher = {Springer, Berlin}, abstract = {The data-flow model of computation is an attractive methodology for multiprocessor programming for it offers the potential for unlimited parallelism detection at no programmer's expense. It is here applied to a distributed architecture based on a commercially available microprocessor (the Inmos Transputer). In this project, we have integrated the high-level data driven principles of scheduling within the Transputer architecture so as to provide high programmability of our multicomputer system. A completet programming environment which translates a complex data-flow program graph into occam has been developed and is presented in this paper. We here describe in detail the mapping from the SISAL high-level constructs into the low-level mechanisms of the Transputer (synchronization, structure representation, etc.). The partitioning issues (granularity of the graph) are presented and several solutions based upon both data-flow analysis (communication costs) and program syntax (program structure) are proposed and have been implemented in our programming environment. Finally, we present and analyze graph allocation and optimization schemes to improve the performance of the resulting occam program.}, keywords = {SISAL, Dataflow}, } @inproceedings{Geor89, author = {George, Lal} , title = {{An Abstract Machine for Parallel Graph Reduction}}, booktitle = {{FPCA '89, Functional Programming Languages and Computer Architecture}}, address = {London, UK, September 11--13}, year = {1989}, pages = {214--229}, publisher = {ACM Press, New York}, abstract = {An abstract machine for parallel graph reduction on a shared memory multiprocessor is described. This is intended primarily for normal order ({\em lazy\/}) evaluation of functional programs. It is absolutely essential in such a design to adapt an efficient sequential model since during execution under limited resources available, performance will be reduced in the limit to that of the sequential engine. Parallel evaluation of normal order functional languages performed naively can result in poor overall performance despite the avilability of sufficient processing elements and parallelism in the application. Needless context switching, task migration and continuation building may occur when a sequential thread of control would have sufficed. Furthermore, the compiler using static information cannot be fully aware of the availability of resources and their optimal utilization at any moment in run time. Indeed this may vary between runs which further aggravates the job of the compiler writer in generating optimal and compact code for programs. The benefits derived from this model are: 1) it is based on the G-machine so that execution under limited resources will default to a performance close to that of the G-machine; 2) the additional instructions needed to control the complexities of paralllel evaluation are extremely simple, almost trivializing the job of the compiler writer; 3) attempts are made where possible to avoid context switching and task migration by retaining a sequential thread of control (made more clear in the paper), and 4) the method has demonstrated good overall performance on a shared memory multiprocessor.}, keywords = {Parallel Graph Reduction, G-Machine}, } @inproceedings{GHKP88, author = {Glauert, J. R. W. and Hammond, K. and Kennaway, J. R. and Papadopoulos, G. A.}, title = {{Using Dactl to Implement Declarative Languages}}, booktitle = {CONPAR 88}, address = {Manchester, UK, September 12--16}, year = {1988}, pages = {87--94}, volume = {A}, publisher = {British Computer Society}, abstract = {Dactl is a language of graph rewriting, intended as an intermediate language and compiler target language for highly parallel machines. It is based on a form of graph rewriting which can be used to implement functional, logic, and imperative languages.}, keywords = {Intermediate Language, Compilation}, } @article{GlDe87, author = {Gl{\"u}ck, Robert and Demuth, Christian}, title = {{OC-FP: An Applicative Language Combination with Occam and the Algebra of Processes}}, journal = {Microprocessing and Microprogramming}, volume = {21}, year = {1987}, pages = {549--558}, publisher = {North-Holland}, abstract = {A functional language based on the FP-model is presented. The language combines occam processes and is used as a command level language for occam systems. It is well suited as a graphical language for concurrent programming. An algebra, based on the functional properties of the language, makes process and communication structures accessible to (semi-)automatical transformations.}, keywords = {FP, Occam, Process Algebra}, } @article{GoGa89, author = {Goldman, Ron and Gabriel, Richard P.}, title = {{Qlisp: Parallel Processing in Lisp}}, journal = {IEEE Software}, year = {1989}, pages = {51--59}, month = {July}, abstract = {There is a serious need for a powerful understandable multiprocessing language. Qlisp is an extension to Common Lisp designed for parallel symbolic computation.}, keywords = {Lisp, Futures, Parallel Let}, } @inproceedings{GoGa88, author = {Goldman, Ron and Gabriel, Richard P.}, title = {{Qlisp: Experience and New Directions}}, booktitle = {{PPEALS 1988, Parallel Programming: Experience with Applications, Languages and Systems}}, address = {New Haven, Conneticut, July 19--21}, year = {1988}, series = {SIGPLAN Notices}, number = {23(9)}, month = {September}, publisher = {ACM Press, New York}, pages = {111--123}, abstract = {Qlisp, a dialect of Common Lisp, has been proposed as a multiprocessing programming language which is suitable for studying the styles of parallel programming at the medium-grain level. An initial version of Qlisp has benn implemented on a multiprocessor and a number of experiments with it conducted. This paper describes the implementation, reports on some of the experiments, and presents some new constructs that ae suggested from programming experience with Qlisp.}, keywords = {Lisp, Futures, Parallel Let}, } @inproceedings{GoHu86, author = {Goldberg, Benjamin and Hudak, Paul}, title = {{Alfalfa: Distributed Graph Reduction on a Hypercube Multiprocessor}}, booktitle = {{Graph Reduction, Proceedings of a Workshop}}, address = {Santa Fe, New Mexico, September 29 -- October 1}, year = {1987}, volume = {279}, series = {Lecture Notes in Computer Science}, publisher = {Springer, Berlin}, editor = {Fasel, Joseph H. and Keller, Robert M.}, pages = {94--113}, abstract = {Alfala is an implementation of a functional language on the Intel iPSC multiprocessor. It is based on a heterogeneous abstract machine model consisting of both graph reduction and stack oriented execution. Alfala consists of two major components, a compiler and a run-time system. The source language, Alfl, contains no constructs that allow the programmer to specify parallelism or synchronization and thus it is the task of the compiler to detect the exploitable parallelism in a program. The runtime system supports dynamic scheduling, interprocessor communication, and storage management. A number of statistics gathered during execution are presented.}, keywords = {Compilation, Serial Combinators, Scheduling}, } @phdthesis{Gold88, author = {Goldberg, Benjamin F.}, title = {{Multiprocessor Execution of Functional Programs}}, type = {Research Report YALEU/DCS/RR-618}, school = {Department of Computer Science}, address = {Yale University}, month = {April}, year = {1988}, abstract = {Functional languages have recently gained attention as vehicles for programming in a concise and elegant manner. In addition, it has been suggested that functional programming provides a natural methodology for programming {\em multiprocessor\/} computers. This dissertation demonstrates that multiprocessor execution of functional programs is feasible, and results in a significant reduction of their execution times. Two implementations of the functional language ALFL were built on commercially available multiprocessors. {\em Alfala\/} is an implementation on the Intel iPSC hypercube multiprocessor, and {\em Buckwheat\/} is an implementation on the Encore Multimax shared-memory multiprocessor. Each implementation includes a compiler that performs {\em automatic decomposition\/} of ALFL programs. The compiler is responsible for detecting the inherent parallelism in a program, and decomposing the program into a collection of tasks, called {\em serial combinators\/}, that can be executed in parallel. One of the primary goals of the compiler is to generate serial combinators exhibiting the coarsest granularity possible without sacrificing useful parallelism. This dissertation describes the algorithms used by the compiler to analyze, decompose, and optimize functional programs. The abstract model supported by Alfala and Buckwheat is called {\em heterogeneous graph reduction\/}, which is a hybrid of graph reduction and conventional stack-oriented execution. This model supports parallelism, lazy evaluation and higher order functions while at the same time making efficient use of the processors in the system. The Alfala and Buckwheat run-time systems support dynamic load balancing, interprocessor communication (if required), and storage management. A large number of experiments were performed on Alfala and Buckwheat for a variety of programs. The results of these experiments, as well as the conclusions drawn from them, are presented.}, keywords = {Parallelization, Serial Combinators, Graph Reduction}, } @techreport{HaBo89, author = {Harmer, Terence J. and Boyle, James M.}, title = {{An Optimization to Increase Parallelism in Parallel Pure Lisp Implementations}}, institution = {Argonne National Laboratory}, address = {Argonne, Illinois}, year = {1989}, abstract = {In this paper we consider an optimization that may be applied to the Lisp list constructor {\em cons\/} when parallel evaluation of function arguments is possible. We present experimental results comparing implementations of a typical function with and without this optimization.}, keywords = {Lisp, Lists}, } @article{Hals85, author = {{Halstead, Jr.}, Robert H.}, title = {{Multilisp: A Language for Concurrent Symbolic Computation}}, journal = toplas, volume = {7}, number = {4}, month = {October}, year = {1985}, pages = {501--538}, abstract = {Multilisp is a version of the Lisp dialect Scheme extended with constructs for parallel execution. Like Scheme, Multilisp is oriented toward symbolic computation. Unlike some parallel programming languages, Multilisp incorporates constructs for causing side effects and for explicitly introducing parallelism. The potential complexity of dealing with side effects in a parallel context is mitigated by the nature of the parallelism constructs and by support for abstract data types: a recommended Multilisp programming style is presented which, if followed, should lead to highly parallel, easily understandable programs. Multilisp is being implemented on the 32 processor {\em Concert\/} multiprocessor; however, it is ultimately intended for use on larger multiprocessors. The current implementation, called {\em Concert Multilisp\/}, is complete enough to run the Multilisp compiler itself and has been run on Concert prototypes including up to eight processors. Concert Multilisp uses novel techniques for task scheduling and garbage collection. The task scheduler helps control excessive resource utilization by means of an unfair scheduling policy; the garbage collector uses a multiprocessor algorithm based on the incremental garbage collector of Baker.}, keywords = {Lisp, Futures}, } @article{Hals86, author = {{Halstead, Jr.}, Robert H.}, title = {{Parallel Symbolic Computing}}, journal = i3ec, month = {August}, year = {1986}, volume = {19}, number = {8}, pages = {35--43}, abstract = {Programs differ from one another in many dimensions. In one such dimension, programs can be laid out along a spectrum with predominantly symbolic programs at one end and predominantly numerical programs at the other. The difference between numerical and symbolic programs suggest different approaches to parallel processing. This article explores the problem and opportunities of parallel symbolic computing and describes the language Multilisp, used at M.I.T. for experiments in parallel symbolic programming.}, keywords = {Multilisp, Symbolic Computation} } @techreport{Harr86, author = {Harrison III, W. Ludwell}, title = {{Compiling Lisp for Evaluation on a Tightly Coupled Multiprocessor}}, type = {CSRD Report}, number = {565}, month = {March}, year = {1986}, institution = {Center of Supercomputing Research and Development}, address = {University of Illinois, Urbana, IL}, abstract = {The problem of compiling Lisp for efficient evaluation on a large, tightly coupled, shared memory multiprocessor is investigated. A representation for s-expressions which facilitates parallel evaluation is proposed, along with a sequence of transformations, to be applied to the functions comprising a Lisp program, which reveal and exploit parallelism.}, keywords = {Lisp, Compilation, Parallelization}, } @techreport{Harr89, author = {Harrison III, W. Ludwell}, title = {{The Interprocedural Analysis and Automatic Parallelization of Scheme Programs}}, number = {CSRD No. 860, UILU-ENG-89-8003}, month = {February}, year = {1989}, institution = {Center of Supercomputing Research and Development}, address = {University of Illinois, Urbana, IL}, abstract = {Lisp and its descendants are among the most important and widely used of programming languages. At the same time, parallelism in the architecture of computer systems is becoming commonplace. There is a pressing need to extend the technology of automatic parallelization that has become available to Fortran programmers of parallel machines, to the realm of Lisp programs and symbolic computing. In this thesis we present a comprehensive approach to the compilation of Scheme programs for shared-memory multiprocessors. Our strategy has two principal components: {\em interprocedural analysis\/} and {\em program restructuring\/}. We introduce {\em procedure strings\/} and {\em stack configurations\/} as a framework in which to reason about interprocedural side-effects and object lifetimes, and develop a system of interprocedural analysis, using abstract interpretation, that is used in the dependence analysis and memory management of Scheme programs. We introduce the transformations of {\em exit-loop translation\/} and {\em recursion splitting\/} to treat the control structures of iteration and recursion that arise commonly in Scheme programs. We propose an alternative representation for s-expressions that facilitates the parallel creation and access of lists. We have implemented these ideas in a parallelizing Scheme compiler and run-time system, and we complement the theory of our work with ``snapshots'' of programs during the restructuring process, and some preliminary performance results of the execution of object codes produced by the compiler.}, keywords = {Lisp, Parallelization, Abstract Interpretation}, } @techreport{HaPe91, author = {Hammond, Kevin and {Peyton Jones}, Simon}, title = {{Profiling scheduling strategies on the GRIP parallel reducer}}, institution = {Department of Computing Science}, address = {University of Glasgow, UK}, month = {August}, year = {1991}, abstract = {It is widely claimed that functional languages are particularly suitable for programming parallel computers. A claimed advantage is that the programmer is not burdened with details of task creation, placement, scheduling, and synchronization, these decisions being taken by the system instead. Leaving aside the question of whether a pure functional language is expressive enough to encompass all the parallel algorithms we might wish to program, there remains the question of how effectively the compiler and run-time system map the program onto a real parallel system, a task usually carried out mostly by the programmer. This is the question we address in this paper. We first introduce the system architecture of GRIP, a shared-memory parallel machine supporting an implementation of the functional language {\sc Haskell}. GRIP executes functional programs in parallel using compiled supercombinator graph reduction, a form of declarative rule system. We then describe several strategies for run-time resource control which we have tried, presenting comprehensive measurements for their effectiveness. We are particularly concerned with strategies controlling task creation, in order to improve task granularity and minimize communication overheads. This is, so far as we know, one of the first attempts to make a systematic study of task-control strategies in a high-performance parallel functional-language system. GRIP's high absolute performance render these results credible for real applications.}, keywords = {Process Scheduling, Load Balancing}, } } @article{HBP86, author = {Hankin, Chris L. and Burn, Geoffrey, L. and Peyton Jones, Simon L.}, title = {{A Safe Approach to Parallel Combinator Reduction (Extended Abstract)}}, journal = {Theoretical Computer Science}, volume = {56}, number = {1}, pages = {17--35}, year = {1988}, note = {Also: Extended Abstract in ESOP 86, European Symposium on Programming, Saarbr{\"u}cken, Germany, March, 1986, pages 99--110, Robinet, B. and Wilhelm, R. (editors), volume 213 of Lecture Nots in Computer Science, Springer, Berlin}, abstract = {In this paper we present the results of two pieces of work which, when combined, allow us to go from a program text in a functional language to a parallel implementation of that program. We present techniques for discovering sources of parallelism in a program at compile time, and then show how this parallelism is naturally mapped into a parallel combinator set that we will define. To discover sources of parallelism in a program, we use abstract interpretation. Abstract interpretation is a compile-time technique which is used to gain information about a program that may then be used to optimize the execution of the program. A particular ue of abstract interpretation is the strictness analysis of functional programs. In a language that has lazy semantics, the main potential for parallelism arises in the evaluation of operands of strict operators. A function is strict in an argument if its value is undefined whenever the argument is undefined. If we can use strictness analysis to detect which arguments a function is strict in, we then know that these arguments can be safely evaluated in parallel because this will not affect the lazy semantics. Having identified the sources of parallelism at compile-time it is necessary to communicate these to the run-time system. In the second part of the paper we use an extended set of combinators, including a pair of parallel combinators that achieve this purpose.}, keywords = {Strictness Analysis, Combinators, Director Strings}, } @article{Huda86, author = {Hudak, Paul}, title = {{Para-Functional Programming}}, journal = i3ec, month = {August}, year = {1986}, volume = {19}, number = {8}, pages = {60--70}, abstract = {In this article I introduce {\em para-functional programming\/}, a methodology for programming multiprocessor computing systems. It is based on a functional programming model augmented with features that allow programs to be mapped to specific multiprocessor topologies. The most significant aspect of the methodology is that it treats the multiprocessor as a single autonomous computer onto which a program is mapped, rather than as a group of independent processors that carry out complex communication and require complex synchronization. In more conventional approaches to parallel programming, the latter method of treatment is often manifested as processes that cooperate by message-passing. However, such notions are absent in para-functional programming; indeed, a single language and evaluation model can be used from problem inception, to prototypes targeted for uniprocessors, and ultimately to realization on a parallel machine.}, keywords = {Para-Functional Programming, Annotations, Process Mapping}, } @inproceedings{Huda86a, author = {Hudak, Paul}, title = {{Arrays, Non-Determinism, Side-Effects, and Parallelism: A Functional Perspective (Extended Abstract)}}, booktitle = {Graph Reduction, Proceedings of a Workshop}, address = {Santa Fe, New Mexico, September 29 -- October 1}, year = {1986}, volume = {279}, series = {Lecture Notes in Computer Science}, editor = {Fasel, Joseph H. and Keller, Robert M.}, publisher = {Springer, Berlin}, pages = {312--327}, abstract = {Purely functional incremental updates to arrays, executed in a non-deterministic manner, are shown to achieve the same effect (in both efficiency and functionability) as parallel assignment to imperative arrays. The strategy depends only on the ability of a compiler to recognize that incremental updates to functional arrays can be done destructively (an optimization often called ``copy-avoidance'' of ``single-threaded arrays''.) Special ``pseudo-functional syntax'' is introduced that captures typical array-update patterns. This syntax falls somewhere between the purely functional and (im)purely imperative, and makes the inferencing problem fairly easy. If nothing else, the work represents an interesting intellectual exercise in the relationship between non-determinism, side-effects, and parallelism, and tends to blur the traditionally clear-cut distinction between functional and imperative languages.}, keywords = {Non-Determinism, Arrays}, } @inproceedings{HuGo85, author = {Hudak, Paul and Goldberg, Benjamin}, title = {{Serial Combinators: Optimal Grains of Parallelism}}, booktitle = {{Functional Programming Languages and Computer Architecture}}, address = {Nancy, France, September 16--19}, year = {1985}, pages = {382--399}, series = {Lecture Notes in Computer Science}, editor = {Jouannaud, Jean-Pierre}, volume = {201}, publisher = {Springer, Berlin}, abstract = {A method is described for translating a high-level functional program into combinators suitable for execution on multiprocessors with no shared memory. It is argued that the granularity of the standard set of fixed combinators is too fine, whereas the granularity of user-defined functions is too coarse. The notion of a {\em serial combinator\/} is introduced that in some sense has optimal granularity, and that takes into account pragmatic issues such as the complexity of expressions and communication costs between processors. The technique improves on the standard notion of {\em super-combinators\/} by making them larger to retain locality and improve efficiency, and by ensuring that they have no concurrent substructure that could result in lost parallelism. Simulation results demonstrate improved performance on both sequential and parallel computing models.}, keywords = {Parallelization, Serial Combinators}, } @article{HuGo85a, author = {Hudak, Paul and Goldberg, Benjamin}, title = {{Distributed Execution of Functional Programs Using Serial Combinators}}, journal = i3ec, volume = {34}, number = {10}, month = {October}, year = {1985}, pages = {881--891}, abstract = {A general strategy for automatically decomposing and dynamically distributing a functional program is discussed, suitable for parallel execution on multiprocessor architectures with no shared memory. The strategy borrows ideas from data flow and reduction machine research on one hand, and from conventional compiler technology for sequential machines on the other. One of the more troublesome issues in such a system is choosing the right granularity for the parallel tasks. As a solution we describe a program transformation technique based on {\em serial combinators\/} that offers in some sense just the ``right'' granularity for this style of computing, and that can be ``fine-tuned'' for particular multiprocessor architectures. We show via simulation the success of our approach.}, keywords = {Compilation, Serial Combinators, Load Balancing}, } @techreport{HuSu88, author = {Hudak, Paul and Sundaresh, Raman S.}, title = {{On the Expressiveness of Purely Functional I/O Systems}}, institution = {Department of Computer Science}, address = {Yale University}, type = {Research Report}, number = {YALEU/DCS/RR-665}, month = {December}, year = {1988}, abstract = {Functional programming languages have traditionally lacked complete, flexible, and yet referentially transparent I/O mechanisms. Previous proposals for I/O have used either the notion of {\em lazy streams\/} or {\em continuations\/} to model interaction with the external world. We discuss and generalize these models and introduce a third, which we call the {\em systems\/} model, to perform I/O. The expressiveness of the styles are compared by means of an example. We then give a series of surprisingly simple translations between the three models, demonstrating that they are not as different as their programming styles suggest, and implying that the styles could be mixed within a single program. The need to express non-deterministic behavior in a functional language is well recognized. So is the problem of doing so without destroying referential transparency. We survey past approaches to this problem, and suggest a solution in the context of the I/O models described. The I/O system of the purely functional language {\sc Haskell} is presented. The system includes a rich set of operations, and distinguishes between {\em file\/} and {\em channel\/} I/O. The approach to non-determinism is also presented. A useful aspect of the design is that it includes a rigorous specification of the behavior of the operating system, thus precisely fixing the semantics of the various I/O operations. The {\sc Haskell} I/O system is capable of supporting may other paradigms of concurrent computation in a natural way. We demonstrate this through the emulation of Actors, UNITY, CSP, CCS and Linda.}, keywords = {Streams, Continuations, Non-determinism}, } @techreport{Iann88, author = {Iannucci, Robert Alan}, title = {{A Dataflow/von Neumann Hybrid Architecture}}, number = {MIT/LCS/TR-418}, institution = {Laboratory for Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, MA}, month = {May}, year = {1988}, abstract = {Dataflow architectures offer the ability to trade program-level parallelism in order to overcome machine-level latencies in accessing memory and in communicating with other processors. Dataflow further offers a uniform synchronization paradigm, representing one end of spectrum wherein the unit of scheduling is a single instruction. At the opposite extreme are the von Neumann architectures which schedule on a task, or process, basis. As a basis for scalable, general purpose multiprocessors, traditional von Neumann architectures are unsuitable due to their inability to tolerate latency and to provide means for fine-grained synchronization. This report examines the spectrum by proposing a new architecture which is a {\em hybrid\/} of dataflow and von Neumann organizations. The analysis attempts to discover those features of the dataflow architecture, lacking in a von Neumann machine, which are essential for tolerating latency and synchronization costs. These features are captured in the concept of a {\em parallel machine language\/} which can be grafted on top of an otherwise traditional von Neumann base. In such an architecture, the units of scheduling, called {\em scheduling quanta\/}, are bound at compile time rather than at instruction set design time. The parallel machine language supports this notion via a large synchronization anme space. It is shown that the combination of dataflow-style explicit synchronization and von Neumann-style implicit synchronization in the same instruction set results in an architectural synergism. Using an instruction set which is strictly less powerful than that of the MIT Tagged-Token Dataflow Architecture (TTDA), the hybrid architecture can exploit the same kinds of parallelism than the TTDA. Given that a compiler can generate scheduling quanta of two or three instructions, the hybrid architecture will execute approximately the same number of instructions as the TTDA. Larger quanta can result in the hybrid actually executing {\em fewer\/} instructions than the TTDA, demonstrating the power of passing state implicitly between program-counter sequenced instructions.}, keywords = {Dataflow, Hybrid, Compilation} } @inproceedings{Jaga86, author = {Jagannathan, Suresh}, title = {{A Model of Data Backup and Recovery in a Computer System for Functional Programming}}, booktitle = {{Formal Description of Programming Concepts III}}, address = {Ebberup, Denmark, August 25--28}, year = {1986}, editor = {Wirsing, Martin}, publisher = {North-Holland, Amsterdam}, abstract = {We present a formal operational model of an abstract data flow interpreter for an application programming language. This model serves as a precise specification of an idealized implementation for the VIM computer system, an experimental system under development at MIT intended to examine the efficient implementation of functional languages using data flow principles. The correctness of any implementation of VIM can be verified by showing that it preserves all the invariants specified by this abstract model. To demonstrate this technique, we present an extension of this idealized system which contains backup and recovery facilities to guarantee the security of all online data against loss or corruption as a result of hardware malfunction. To show that the augmented system correctly preserves the system state, we use as simple inductive proof technique that establishes the equivalence of the behavior of the two systems. Our specification language is a simple applicative language augmented with operations to perform first-order existential and universal quantification.}, keywords = {Operating System, Streams, Operational Model}, } @techreport{JHM*85, author = {Jorrand, Ph. and Hufflen, J-M. and Marty, A. and Marty, J-Ch. and Schnoebelen, Ph.}, title = {{FP2 --- The Language and its Formal Definition}}, type = {Research Report}, number = {LIFIA 26}, institution = {Laboratoire d' Informatique Fondamentale et d' Intelligence Artificielle}, address = {IMAG Informatique et Mathematiques Appliquees de Grenoble, France}, month = {May}, year = {1985}, abstract = {The purpose of this report is to provide both an informal introduction to the main concepts of FP2 and a formal definition of its syntax and semantics. {\em It must be noted that both the language FP2 and its formal definition are still subject to change.\/} In particular, it is expected that feedback from implementation and early use of FP2 will be the source of modifications to the language.}, keywords = {FP2, Process Networks}, } @techreport{Jorr84, author = {Jorrand, Philippe}, title = {{FP2: Functional Parallel Programming Based on Term Substitution}}, type = {Research Report}, number = {LIFIA 15}, institution = {Laboratoire d' Informatique Fondamentale et d' Intelligence Artificielle}, address = {IMAG Informatique et Mathematiques Appliquees de Grenoble, France}, month = {November}, year = {1984}, abstract = {The purpose of this paper is to present FP2 (Functional Parallel Programming), a language where communication, functions and parallelism are consistent notions: its design is simple and its formal semantics are rigorously defined. This is made possibleby a systematic use of terms and term substitutions for representing the basic notions of the language.}, keywords = {FP2, Process Networks}, } @inproceedings{John85, author = {Johnson, Thomas}, title = {{Lambda Lifting: Transforming Programs to Recursive Equations}}, booktitle = {{Functional Programming Languages and Computer Architecture}}, address = {Nancy, France, September 16--19}, year = {1985}, pages = {190--203}, series = {Lecture Notes in Computer Science}, editor = {Jouannaud, Jean-Pierre}, volume = {201}, publisher = {Springer, Berlin}, abstract = {Lambda lifting is a technique for transforming a functional program with local function definitions, possibly with free variables in the function definitions, into a program consisting only of global function (combinator) definitions which will be used as rewrite rules. Different ways of doing lambda lifting are presented, as well as reasons for rejecting or selecting the method used in our Lazy ML compiler. A functional program implementing the chosen algorithm is given.}, keywords = {Lambda Lifting, Program Transformation}, } @article{JoSi89, author = {Jones, S. B. and Sinclair, A. F.}, title = {{Functional Programming and Operating Systems}}, journal = compj, volume = {32}, number = {2}, pages = {162--174}, month = {April}, year = {1989}, abstract = {There is a large class of programming problems to which, at first sight, functional programming does not seem suited: interactive programs and programs which must access external resources such as file stores or communication systems. We take operating systems as the archetype of this class of problems. We show how the lazy evaluation of functional programs, in particular lazily evaluated infinite lists, or streams, can be exploited to write interactive programs. This extends quite naturally to a software design paradigm comprising processes and networks, to the control of peripheral devices, and hence to the design of operating systems. We present a design for a simple, single user, multiprogramming operating system. This demonstrates workable solutions to operating system structuring, to low level device interfacing, to high level application program interfacing, and to resource management. We examine the paradigm illustrated, with reference to alternative approaches which have been proposed.}, keywords = {Non-Determinism, Streams, Process Networks, Operating Systems}, } @techreport{Joy90a, author = {Joy, M. S.}, title = {{Parallel Evaluation of Functional Programs --- A Bibliography}}, institution = {Department of Computer Science}, address = {University of Warwick}, type = {Functional Language Implementation Project Document}, number = {18}, month = {April}, year = {1990}, abstract = {A bibliography of documents relating to the execution of functional programs in a parallel environment is presented together with abstracts of (some of) the documents.}, keywords = {Bibliography}, } @inproceedings{KHM89, author = {Kranz, David A. and {Halstead, Jr.}, Robert H. and Mohr, Eric}, title = {{Mul-T: A High-Performance Parallel Lisp}}, booktitle = {{SIGPLAN '89 Conference on Programming Language Design and Implementation}}, address = {Portland, Oregon, June 21--23}, year = {1989}, publisher = {ACM Press, New York}, series = {SIGPLAN Notices}, number = {24(7)}, month = {July}, pages = {81--90}, abstract = {Mul-T is a parallel Lisp system, based on Multilisp's {\tt future} construct, that has been developed to run on an Encore Multimax processor. Mul-T is an extended version of the Yale T system and uses the T system's ORBIT compiler to achieve ``production quality'' performance on stock hardware --- about 100 times faster than Multilisp. Mul-T shows that futures can be implemented cheaply enough to be useful in a production-quality system. Mul-T is fully operational, including a user interface that supports managing groups of parallel tasks.}, keywords = {Lisp, Futures}, } @article{Kieb86, author = {Kieburtz, Richard B.}, title = {{When Chasing your Tail Saves Time}}, journal = {Information Processing Letters}, volume = {23}, number = {6}, month = {December}, year = {1986}, pages = {321--324}, abstract = {Several investigators have observed experimentally that, in evaluating functional language by graph reduction, the time required for evaluation depends rather strongly on whether or not cyclic data structures are used to represent recursively defined expressions. The result is surprising, and the purpose of this short article is to explain it.}, keywords = {Circular Programs}, } @incollection{Kuch89, author = {Kuchen, Herbert}, title = {{Applikative Datenstrukturen in der parallelen abstrakten Maschine PAM (Applicative Data Structures in the Parallel Abstract Machine PAM) (in German)}}, booktitle = {{Funktionale und Logische Programmierung --- Sprachen, Methoden, Implementationen}}, editor = {Dosch, Walter}, publisher = {Report 214, University Augsburg, Germany}, month = {December}, year = {1989}, abtract = {It is explained which requirements data structures for a functional programming language must fulfill. The applicative data structures list, sequence and distributed applicative array are compared according to these demands. For some example programs the runtimes and speedups are given that were measured for a parallel prototype implementation on a multitransputer system.}, keywords = {Distributed Data Structures}, } @inproceedings{KuLo89, author = {Kuchen, Herbert and Loogen, Rita}, title = {{Parallele Implementierung einer funktionalen Programmiersprache auf einem Transputer-Mehrprozessor-System (Parallel Implementation of a Functional Programming Language on a Multitransputer-System) (in German)}}, booktitle = {{1. Transputer-Anwender-Treffen}}, series = {Informatik-Fachberichte}, publisher = {Springer, Berlin}, year = {1989}, abstract = {In this paper we describe the parallel implementation of a functional programming language on a multitransputer system. First a functional program is translated into a system of special function definitions called serial combinators. Herewith the implicit parallelism to be utilized is made explicit. The system of serial combinators is transformed into code for a parallel abstract machine based on graph reduction. Furthermore some data structures of the functional language are introduced and compared. For this purpose, the runtimes and speedups measured for some example programs are given.}, keywords = {Parallel Graph Reduction, Serial Combinators, Evaluation Transformers, Distributed Data Structures}, } @inproceedings{LaHi88, author = {Larus, James R. and Hilfinger, Paul N.}, title = {{Restructuring Lisp Programs for Concurrent Execution}}, booktitle = {PPEALS 1988, Parallel Programming: Experience with Applications, Languages and Systems}, address = {New Have, Connecticut, July 19--21}, year = {1988}, publisher = {ACM Press, New York}, pages = {100--110}, series = {SIGPLAN Notices}, volume = {23(9)}, abstract = {This paper describes the technique that the program transformation system {\sc Curare} uses to restructure Lisp programs for concurrent execution in Multiprocessor Lisp systems and discusses the problems inherent in producing concurrent programs in a flexible and dynamic programming language such as Lisp. {\sc Curare}'s overall organization is similar to other program restructuring systems: it detects potential conflicts between statements in a program, then transforms the program to improve its concurrent performance, and finally inserts synchronization to ensure the program's concurrent behavior. However, the language and programs that {\sc Curare} transforms are very different from the FORTRAN programs that are the traditional targets of program restructuring and so {\sc Curare} requires new algorithms and approaches, which are described in this paper.}, keywords = {Lisp, Parallelization}, } @inproceedings{Lee88, author = {Lee, M. K. O.}, title = {{A Parallel Computational Model for Functional Languages and Its Formal Specification in CSP}}, booktitle = {CONPAR 88}, address = {Manchester, UK, September 12--16}, year = {1988}, pages = {9--16}, volume = {C}, publisher = {British Computer Society}, abstract = {This paper presents a parallel and distributed computational model for functional languages based on the notion of demand/data driven reduction of graphical combinators. A graphical combinator language is used as the canonical language form for the computational model. The behavior of the model is formally described using Hoare's CSP constructive specification of a detailed and practical parallel computational model of this kind. CSP is found to be a suitable and valuable tool for conveying complex design ideas within a parallel and distributed framework.}, keywords = {Semantics, CSP}, } @article{LiHu86, author = {Li, Kai and Hudak, Paul}, title = {{A New List Compaction Method}}, journal = {Software --- Practice and Experience}, volume = {16}, number = {2}, pages = {145--163}, month = {February}, year = {1986}, abstract = {List compaction, or so-called `cdr-coding', can greatly reduce the storage needs of list processing languages. However, existing methods do not perform well when several lists are being constructed simultaneously from the same heap, since the non-contiguous nature of the cells being allocated eliminates the opportunity for compaction. This situation arises not only in true parallel systems sharing a common memory, and sequential systems supporting multiple processes, but also quite often in purely sequential systems, where it is not uncommon to build several different lists simultaneously within a single loop. In this paper, a new list compaction method is presented that performs well during both sequential and `parallel' list generation. The method is essentially a generalization of cdr-coding, in which lists are represented explicitly as linked vectors rather than implicitly as compacted memory. In addition, an encoding scheme is used that is simple or simpler than all known encodings, and destructive operations are supported with no greater overhead than existing schemes. Performance figures in a simulated environment suggest that the strategy consistently performs better than conventional cdr-coding, with essential the same complexity.}, keywords = {List compaction}, } @inproceedings{LKID89, author = {Loogen, Rita and Kuchen, Herbert and Indermark, Klaus and Damm, Werner}, title = {{Distributed Implementation of Programmed Graph Reduction}}, booktitle = {{PARLE '89, Parallel Architectures and Languages Europe, Volume I: Parallel Architectures}}, address = {Eindhoven, The Netherlands, June 12--16}, year = {1989}, editor = {Odijk, E. and Rem, M. and Syre, J.-C.}, series = {Lecture Notes in Computer Science}, volume = {365}, publisher = {Springer, Berlin}, page = {136--157}, abstract = {Programmed graph reduction has been shown to be an efficient implementation technique for lazy functional languages on sequential machines. Considering programmed graph reduction as a generalization of conventional environment-based implementations where the activation records are allocated in a graph instead of on a stack it becomes very easy to use this technique for the execution of functional programs in a parallel machine in distributed memory. We describe in this paper the realization of programmed graph reduction in PAM --- a parallel abstract machine with distributed memory. Results of our implementation of PAM on an {\sc Occam}-Transputersystem are given.}, keywords = {Programmed Graph Reduction, Serial Combinators, Evaluation Transformers}, } @techreport{Loog87, author ={Loogen, Rita}, title = {{Design of a Parallel Programmable Graph Reduction Machine With Distributed Memory}}, type = {Aachener Informatik-Berichte}, number = {87-11}, institution = {RWTH Aachen, Fachgruppe Informatik}, address = {Aachen, Germany}, year = {1987}, abstract = {This paper describes the systematic design of a parallel programmable graph reduction machine with distributed memory. The abstract architecture that is presented is not just one of many proposals in this area, but highlights the essential features of such machines. Parallel program execution will naturally cause additional organizational overhead (message handling, task distribution, workload balancing) that will certainly slow down the reduction process if only one processor is available in each node of the multicomputer system. To avoid such loss in efficiency each processor node of our abstract architecture consists of two processing units --- the reduction unit and the input/output unit --- which work in parallel exchanging messages in a shared local memory. The reduction unit performs the program evaluation by programmed graph reduction, while the input/output unit keeps organizational work that is important for the parallel program execution, but can be done independently from reduction process, away from the reduction unit. In addition to the gain in efficiency this organization of the abstract architecture allows to discuss the issues of the graph reduction process, the parallelization of this process and the structure of the communication network separately.}, keywords = {Programmed Graph Reduction}, } @inproceedings{MaHa87, author = {Martin, Chris and Hankin, Chris}, title = {{Finding Fixed Points in Finite Lattices}}, booktitle = {{Functional Programming Languages and Computer Architecture}}, editor = {Kahn, Gilles}, year = {1987}, address= {Portland, Oregon, USA, September 14--16}, volume = {274}, series = {Lecture Notes in Computer Science}, publisher = {Springer, Berlin}, pages = {426--445}, abstract = {Recently there has been much interest in the abstract interpretation of declarative languages. Abstract interpretation is a semantics-based approach to program analysis that uses compile time evaluation of programs using simplified value domains. This gives information about the run-time properties of programs and provides the basis for significant performance improvements. A particular example of abstract interpretation is strictness analysis which allows the detection of parameters in which a function is strict; these parameters may be passed by value without compromising the termination properties of the program. The central, most complex task of an abstract interpreter is finding the fixpoints of recursive functions in the abstract value space. An elegant algorithm, the frontiers algorithm, has been proposed by Simon Peyton-Jones and Chris Clack that performs very well for the strictness analysis of first-order functions. In this paper we extend their algorithm and show how it can be applied to higher-order functions over arbitrary fine lattices. This raises the possibility of using the algorithm as the basis for more general abstract interpretation tools. We describe the algorithm in a modular way that is conducive to proofs of correctness and termination properties.}, keywords = {Abstract Interpretation, Fixed Points, Frontiers Algorithm}, } @inproceedings{Mara91, author = {Maranget, Luc}, title = {{GAML: a Parallel Implementation of Lazy ML}}, booktitle = {{FPCA '91, Functional Programming Languages and Computer Architectures}}, address = {Harvard, Massachusetts, USA}, year = {1991}, series = {Lecture Notes in Computer Science}, volume = {523}, editor = {Hughes, John}, publisher = {Springer, Berlin}, pages = {102--123}, abstract = {We present a new parallel implementation of Lazy ML. Our scheme is a direct extension of the G-machine-based implementation of Lazy ML. Parallelism is introduced by {\em fork\/} annotations inserted by the programmer. We discuss the interference of such user annotations with strictness annotations generated by our compiler. The system has been implemented on a Sequent Balance computer. We also address the main practical issues involved, including stack and heap management.}, keywords = {Parallel Graph Reductoin, G-Machine, Annotations}, } @techreport{Merr91, author = {Merrall, Simon}, title = {{TPL --- An Implementation of Paralation Lisp in EuLisp}}, institution = {School of Mathematical Sciences}, address = {University of Bath}, year = {1991}, abstract = {The department has recently acquired a Maspar MP-1, a massively parallel SIMD computer which has 1024 processors scalable to 16384 processors. We have been looking for an appropriate model of SIMD computation to allow us to use the MasPar with EuLisp alongside our existing MIMD models. In this paper we describe the Paralation Model and an experimental serial implementation of Paralation Lisp written in EuLisp.}, keywords = {Lisp, SIMD}, } @techreport{Merr91a, author = {Merrall, Simon}, title = {{Plurals --- A SIMD extension to Eulisp}}, institution = {School of Mathematical Sciences}, address = {University of Bath}, year = {1991}, abstract = {We describe in this report a massively parallel SIMD extension to EuLisp our local parallel dialect of Lisp, called Plurals. The extensions allow the programmers to allocate Lisp objects on a data parallel processor array, in our case a MasPar MP-10111. The physical size of the processor array is hidden from the programmer who can request different sized sets of virtual processing elements. This gives a mechanism for working with datasets with fewer elements than the processor array whilst leaving the remaining PEs free for other datasets. This has removed us one level from the traditional global execution model of data parallel processing.}, keywords = {Lisp, SIMD}, } @article{MiMo90, author = {Milewski, Jaroslaw and Mori, Imrich}, title = {{Efficient Loop Handling in a Stream-Oriented Unraveling Dataflow Interpreter}}, journal = {Computers and Artificial Intelligence}, volume = {9}, number = {6}, year = {1990}, pages = {545--570}, abstract = {We propose a detailed implementation of an unraveling dataflow interpreter of the Id programming language, based on table encoding of activation names. We concentrate on loop programming construct and on label-manipulating operators of the underlying dataflow model. Moreover, we show how to represent and process the stream, being the basic data structure in classical Id. Since streams are processed by loops, the algorithmic attractiveness of the former meets in our model the efficient implementation of the latter.}, keywords = {Dataflow, Id, Streams}, } @inproceedings{Moor82, author = {Moor, Ian W.}, title = {{An Applicative Compiler for a Parallel Machine}}, booktitle = {{ACM SIGPLAN Symposium on Compiler Construction}}, address = {Boston, Massachusetts}, series = {SIGPLAN Notices}, number = {17(6)}, month = {June}, year = {1982}, pages = {284--293}, publisher = {ACM Press, New York}, note = {Also: Research Report Doc 83/6, Department of Computing, Imperial College, London, March 1983}, abstract = {A compiler for the applicative language HOPE is described. The compiler is itself written in HOPE and generates a machine independent compiler target language, suitable for execution on the parallel reduction machine ALICE. The advantages of writing a compiler in a very high level applicative language ae discussed, and the use of program transformation and other techniques to turn the initial ``runnable specification'' into a more efficient (if less clear) program are outlined. Extensions to the HOPE language and the compiler which can exploit the parallelism and various execution mode of ALICE are described.}, keywords = {Graph Reduction, ALICE, HOPE}, } @techreport{MoTo92, author = {Morrisett, J. Gregory and Tolmach, Andrew}, title = {{A Portable Multiprocessor Interface for Standard ML of New Jersey}}, number = {CMU-CS-92-155}, institution = {School of Computer Science}, address = {Carnegie Mellon University, Pittsburgh, PA}, month = {June}, year = {1992}, abstract = {We have designed a portable interface between shared-memory multiprocessors and Standard ML of New Jersey. The interface is based on the conventional kernel thread model and provides facilities that can be used to implement user-level thread packages. The interface supports experimentation with different thread scheduling policies and synchronization constructs. It has been ported to three different multiprocessors and used to construct a general purpose, user-level thread package. In this paper, we discuss the interface and its implementation and performance, with emphasis on the Silicon Graphics 4D/380S multiprocessor.}, keywords = {ML, Threads}, } @techreport{MSA*85, author = {McGraw, James and Skedzielewski, Stephen and Allan, Stephen and Oldehoeft, Rod and Glauert, John and Kirkham, Chris and Noyce, Bill and Thomas, Robert}, title = {{SISAL --- Streams and Iteration in a Single Assignment Language, Language Reference Manual, Version 1.2}}, number = {M-146}, institution = {Lawrence Livermore National Laboratory}, address = {University of California}, month = {March}, year = {1985}, abstract = {SISAL (Streams and Iteration in a Single-Assignment Language) is a functional data-flow language intended for use on a variety of sequential, vector, multi-, and data-flow processors. SISAL is designed to express algorithms fore execution on computers capable of highly concurrent operation. More specifically, the application area to be supported is numerical computation which strains the limits of high performance machines, and the primary targets for translation of SISAL programs are dataflow data-driven machines. Nevertheless, it has been our intention that the language not have idiosyncrasies reflecting the particular nature of the application area or target machine. It should be reasonable for SISAL to evolve into a general purpose language appropriate for writing programs to run on future general parallel computers.}, keywords = {SISAL, Dataflow}, } @techreport{Nikh87a, author = {Nikhil, Rishiyur S. }, title = {{Id World Reference Manual (for Lisp Machines)}}, type = {Computation Structures Group Memo}, number = {280}, institution = {Laboratory for Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, Massachusetts}, month = {December}, year = {1987}, abstract = {Id World is a powerful, integrated programming environment produced by the Computation Structures Group at MIT's Laboratory for Computer Science for the experimenter who wishes to prepare and execute parallel programs and study their behavior, as quickly as possible. Id World provides a powerful and convenient means to prototype parallel programs rapidly and to study these questions. Programs are written in the parallel programming language Id Nouveau and executed on GITA, an emulation of the MIT Tagged-Token Dataflow Architecture. GITA is heavily instrumented to measure many interesting aspects of parallelism.}, keywords = {Dataflow, Id, Performance Monitoring, Prototyping}, } @techreport{Nikh87b, author = {Nikhil, Rishiyur S. }, title = {{Id Nouveau, Reference Manual, Part I: Syntax}}, institution = {Computation Structures Group, Laboratory for Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, Massachusetts}, month = {April}, year = {1987}, abstract = {This document is Part I of two reference manuals for Id Noveau, the language used to program the MIT Tagged-Token Dataflow Machine. This part describes the syntax of the language. Id Noveau is a functional programming language augmented with a parallel data-structuring mechanism called {\em I-structures\/}.}, keywords = {Dataflow, Id Noveau}, } @techreport{Nikh87c, author = {Nikhil, Rishiyur S. }, title = {{Id Nouveau, Reference Manual, Part II: Operational Semantics}}, institution = {Computation Structures Group, Laboratory for Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, Massachusetts}, month = {April}, year = {1987}, abstract = {This document is Part I of two reference manuals for Id Noveau, the language used to program the MIT Tagged-Token Dataflow Machine. This part describes the operational semantics of the language. Id Noveau is a functional programming language augmented with a parallel data-structuring mechanism called {\em I-structures\/}.}, keywords = {Dataflow, Id Noveau, Operational Semantics}, } @techreport{Nikh88, author = {Nikhil, Rishiyur S. }, title = {{ID (Version 88.0) Reference Manual}}, type = {Computation Structures Group Memo}, number = {284}, institution = {Laboratory for Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, MA}, month = {March}, year = {1988}, abstract = {Id is a parallel programming language designed by members of the Computation Structures Group of MIT/LCS. It is used for programming dataflow and other parallel machines. Id is a functional language augmented with a parallel data-structuring mechanism called {\em I-structures\/}.}, keywords = {Dataflow, Id}, } @inproceedings{PaCu90, author = {Papadopoulos, Gregory M. and Culler, David E.}, title = {{Monsoon: An Explicit Token-Store Architecture}}, booktitle = {{17th International Symposium on Computer Architecture}}, address = {Seattle, Washington, May 28--31}, year = {1990}, pages = {82--91}, number = {18(2)}, series = {ACM SIGARCH Computer Architecture News}, month = {June}, abstract = {Dataflow architectures tolerate long unpredictable communication delays and support generation and coordination of parallel activities directly in hardware, rather than assuming that program mappings will cause these issues to disappear. However, the proposed mechanisms are complex and introduce new mapping complications. This paper presents a greatly simplified approach to dataflow execution, called the {\em explicit token store\/} (ETS) architecture, and its current realization in {\em Monsoon\/}. The essence of dynamic dataflow execution is captured by a simple transition on state bits associated with storage local to a processor. Low-level storage management is performed by the compiler in assigning nodes to slots in an {\em activation frame\/}, rather than dynamically in hardware. The processor is simple, highly pipelined, and quite general. It may be viewed as a generalization of a fairly primitive von Neumann architecture. Although the addresssing capability is restrictive, there is exactly one instruction executed for each action on the dataflow graph. Thus, the machine oriented ETS model provides new understanding of the merits and the real cost of direct execution of dataflow graphs.}, keywords = {Dataflow, Compilation, Abstract Machine}, } @phdthesis{Papa88, author = {Papadopoulos, Gregory Michael}, title = {{Implementation of a General Purpose Dataflow Multiprocessor}}, school = {Department of Electrical Engineering and Computer Science}, address = {Massachusetts Institute of Technology, Cambridge, MA}, month = {August}, year = {1988}, abstract = {General purpose multiprocessors have largely failed to meet expectations for programmability and performance. We blame the lack of usable parallel programming languages and systems on the underlying processor architecture. Machines built out of conventional sequential processors simply do not support the synchronization demands of parallel execution, so the programmer focuses upon the dangerous and arduous task of discovering a minimum set of synchronization points without introducing nondeterminism. We argue that processors must be fundamentally changed to execute a parallel machine language, in which parallel activities are coordinated as efficiently as instructions are scheduled. Dataflow architectures address this challenge by radically reformulating the basic specification of a machine program. These machines {\em directly execute\/} dataflow graphs, which specify only the essential prerequisites for the execution of an instruction --- the availability of operands. Unfortunately, dataflow machines, including the M.I.T. Tagged Token Dataflow Architecture (TTDA), have labored under a number of implementation burdens, notably the apparent need for a fully associative operand matching store which discovers when instructions are able to execute. We introduce and develop a novel dataflow architecture, the Explicit Token Store (ETS), which directly executes tagged-token dataflow graphs while correcting a number of inherent inefficiencies of previous dataflow machines. In the ETS model, operand matching is performed at compiler-designated offsets within an activation frame. We show that the ETS is compatible with the TTDA by giving translations from TTDA machine graphs to ETS machine graphs. Finally, we describe an implementation of an ETS dataflow multiprocessor, called Monsoon, now under construction.}, keywords = {Dataflow, Compilation, Id}, } @inproceedings{PaMe91, author = {Padget, Julian and Merrall, Simon}, title = {{Bridging the MIMD--SIMD Gap}}, booktitle = {{Workshop on Abstract Machine Models for Parallel Computation}}, year = {1991}, abstract = {This paper concerns some programming models for SIMD architectures to support symbolic processing. We report some initial experience with the MasPar array processor and the development of an implementation of EuLisp, extended by the paralation data structure as the first stages in this exercise. In particular, it seems that the paralation structure, although originally conceived for array architectures (viz.\ the Connection Machine), could be applied equally well to MIMD process management. We describe the notion of the active paralation as a mechanism to bridge the gap between SIMD and MIMD architectures.}, keywords = {Lisp, SIMD, MIMD}, } @techreport{Perr88a, author = {Perry, Nigel}, title = {{I/O and Inter-language Calling for Functional Languages}}, institution = {Department of Computing}, address = {Imperial College, London, UK}, year = {1988}, abstract = {This paper discusses the provision of I/O and inter-language calling facilities in pure functional languages. In the introduction we first describe the scope of ``I/O'' by specifying a number of broad operation categories. We then describe why the ``purity'' of functional languages poses problems in providing ``safe'' I/O --- compared to the same task in an imperative language. The first section is a survey existing I/O proposals for functional languages which fall into two groups; environment passing where the file system, I/O devices etc., are passed around as either an explicit or implicit abstract data structure by the program. Secondly stream based systems, where all I/O is channelled over ``streams'' which either carry basic values or more complex data types representing operation request to the O/S. In the second section we define a new system, termed {\em result continuations\/}, which provides a complete and safe I/O system. We show how all the operation categories are handled by this system and prove that the system is safe. The final section looks at providing inter-languages calling between functional and imperative languages. We show how, by using result continuations, an interface may be provided which does not compromise the referential transparency of the functional language.}, keywords = {Operating Systems, Non-determinism, Result Continuations}, } @techreport{Perr88b, author = {Perry, Nigel}, title = {{Towards a Functional Operating System}}, institution = {Department of Computing}, address = {Imperial College, London, UK}, year = {1988}, abstract = {In this short note we discuss an operating system design based around our previously proposed {\em result continuations\/} thus avoiding the problems of stream I/O. To handle non-determinism a mall preemptive process scheduler is defined which is written in assembler and {\em not\/} a functional language, thus avoiding introducing a non-referentially transparent construct into the language. This two tiered approach is not unusual for an operating system written in a high-level language, c.f. Unix.}, keywords = {Operating Systems, Non-Determinism, Result Continuations}, } @inproceedings{Perr92, author = {Perry, Nigel}, title = {{Towards a Concurrent Object/Process Oriented Functional Language (Short Version)}}, booktitle = {15th Australian Computer Science Conference}, volume = {14(1)}, series = {Australian Computer Science Communications}, month = {January}, year = {1992}, abstract = {In this paper we first define the programming methodology continuation passing style (CSP), which originates from tail functions and upon which result continuations are based, and then give an introduction to functional language I/O and inter-language working. We then show how the result continuation mechanism can be extended to support coroutine programming, and then use this to define remote function call (RPC) and object style systems. Next we show how concurrent functional programming can be supported using the result continuation mechanism, this work stems from our sketch of a functional operating system described in Perry. In conclusion we describe how this paper lays the groundwork for the development of concurrent functional and object/process-oriented programming paradigms and support non-determinism. A semantics for result continuations, functional coroutines, remote function calls, and concurrent functional programming is being developed. The semantics are defined using a version of the $\pi$-calculus and enable concurrent functional programs to be reasoned about. This reasoning can either be at two levels; using standard functional notation ($\lambda$-calculus) reasoning for the individual processes and $\pi$-calculus style reasoning for the concurrent communication, treating each process as a ``black box''; or at a single level by implementing the $\lambda$-calculus in the $\pi$-calculus.}, keywords = {Continuation Passing, Non-Determinism, Concurrent Objects, Parallel Semantics}, } @inproceedings{PeSa89, author = {Peyton Jones, Simon L. and Salkild, Jon}, title = {{The Spineless Tagless G-machine}}, booktitle = {{FPCA '89, Functional Programming Languages and Computer Architecture}}, address = {London, UK, September 11--13}, year = {1989}, publisher = {ACM Press, New York}, abstract = {The Spineless Tagless G-machine is an abstract machine based on graph reduction, designed as a target for compilers for non-strict functional languages. As its name implies, it is a development of earlier work, especially the G-Machine and TIM. It has a number of unusual features: the abstract machine code is rather high-level than is common, allowing better code generation; the representation of the graph eliminates most interpretive overheads; vectored returns from data structures give fast case-analysis; and the machine is readily extended for a parallel implementation.}, keywords = {Parallel Graph Reduction, G-Machine}, } @article{Peyt89, author = {Peyton Jones, Simon L.}, title = {{Parallel Implementations of Functional Programming Languages}}, journal = {The Computer Journal}, volume = {32}, number = {2}, pages = {175-186}, month = {April}, year = {1989}, abstract = {One of the most attractive features of functional programming languages is their suitability for programming parallel computers. This paper is devoted to discussion of such a claim. Firstly, parallel functional programming is discussed from the programmer's point of view. Secondly, since most parallel functional language implementations are based on the concept of graph reduction, the issues raised by graph reduction are discussed. Finally, the paper concludes with a case study of a particular parallel graph reduction machine and a survey of other parallel architectures.}, keywords = {Parallel Graph Reduction, Para-Functional Programming, Dataflow} } @inproceedings{Peyt91, author = {{Peyton Jones}, Simon L.}, title = {{The Spineless Tagless G-machine: Second Attempt}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {147--192}, abstract = {The Spineless Tagless G-Machine is an abstract machine designed to support functional languages. This presentation of the machine falls into two parts. Firstly, we present the {\em STG language\/}, an austere but recognizably-functional language, which as well as a {\em denotational\/} meaning has a well-defined {\em operational\/} semantics. The STG language is the ``abstract machine code'' for the Spineless Tagless G-machine, but it is sufficiently abstract that it can readily be compiled into G-machine G-code or TIM code instead. Secondly, we discuss the mapping of the STG language onto stock hardware. The success of an abstract machine model depends largely on how efficient this mapping can be made, though this topic is often relegated to a short section. Instead, we give a detailed discussion of the design issues and the choices we have made. Our principal target is the C language, treating the C compiler as a portable assembler.}, keywords = {Graph Reduction, Abstract Machine}, } @book{PeLe92, author = {{Peyton Jones}, Simon L. and Lester, David R.}, title = {{Implementing Functional Languages: a tutorial}}, publisher = {Prentice Hall}, year = {1992}, abstract = {This book gives a practical approach to understanding implementations of non-strict functional languages using lazy graph reduction. The book is intended to be a source of practical labwork material, to help make functional-language implementations ``come alive'', by helping the reader to develop, modify and experiment with some non-trivial compilers. The unusual aspect of the book is that it is meant to be {\em executed\/} as well as {\em read\/}. Rather than merely presenting an abstract description of each implementation technique, we present the code for a complete working prototype of each major method, and then work through a series of improvements to it. All of the code is available in machine-readable form. A section of the book is devoted to the development of an abstract machine and a compiler for a parallel G-machine.}, keywords = {Parallel Graph Reduction}, } @techreport{Repp91, author = {Reppy, John H.}, title = {{Concurrent Programming with Events -- The Concurrent ML Manual}}, type = {Reference Manual}, number = {Version 0.9.6}, institution = {Department of Computer Science}, address = {Cornell University, Ithaca, NY}, month = {October}, year = {1991}, abstract = {Concurrent ML (CML) is a system for concurrent programming in Standard ML (SML). A CML program consists of a set of {\em threads\/} (or light-weight processes). A thread is the sequential evaluation of a ML expression. It does not have to be a terminating computation; in fact, infinitely looping threads are often useful. The evaluation of a thread may involve communication with other threads, which is done by sending a message on a channel. Message passing is synchronous and forms the basis of communication and synchronization in CML. This model is extended by {\em first-class synchronous operations\/}, which provide a mechanism for building new synchronization and communication abstractions.}, keywords = {ML, Threads}, } @inproceedings{Repp91a, author = {Reppy, J. H.}, title = {{CML: A Higher-Order Concurrent Language}}, booktitle = {SIGPLAN '91 Conference on Programming Language Design and Implementation}, address = {Toronto, Ontario, Canada, June 26--28}, year = {1991}, series = {SIGPLAN Notices}, volume = {26(6)}, pages = {294--305}, publisher = {ACM Press, New York}, abstract = {Concurrent ML (CML) is a high-level, high-performance language for concurrent programming. It is an extension of Standard ML (SML) and is implemented on top of Standard ML of New Jersey (SML/NJ). CML is a practical language and is being used to build real systems. It demonstrates that we need not sacrifice high-level notation in order to have a good performance.}, keywords = {ML, Threads}, } @techreport{Rich84a, author = {{Richards Jr.}, Hamilton}, title = {{An Applicative Programming Languages Bibliography}}, institution = {Burroughs Corporation}, address = {Austin Research Center, Austin, Texas}, month = {October}, year = {1984}, keywords = {Bibliography}, } @inproceedings{RuSa87, author = {Ruggerio, Carlos A. and Sargeant, John}, title = {{Control of Parallelism in the Manchester Dataflow Machine}}, booktitle = {FPCA '87, Functional Programming Language and Computer Architecture}, address = {Portland, Oregon, September 14--16}, year = {1987}, editor = {Kahn, Gilles}, volume = {274}, series = {Lecture Notes in Computer Science}, publisher = {Springer, Berlin}, pages = {1--15}, abstract = {Fine-grain parallel machines, such as tagged-token dataflow machines, allow very high degrees of program parallelism to be exploited for many applications. In fact, so much parallelism can be generated that it is necessary to {\em control parallelism\/} in order to bound storage use. This paper reviews software mechanisms for parallelism control, which rely on merely planting extra code to control execution order. Such methods are found to be inadequate, so a fundamental architectural mechanism known as throttle is considered necessary. Various attempts to design a throttle for the Manchester Dataflow Machine are described. The eventual solution, a coarse-grain, process-based throttle, is explained, and simulation results are presented which demonstrate its effectiveness.}, keywords = {Parallelism Control}, } @inproceedings{SaHe86a, author = {Sarkar, Vivek and Hennessy, John}, title = {{Partitioning Parallel Programs for Macro-Dataflow}}, booktitle = {{1986 ACM Symposium on Lisp and Functional Programming}}, address = {Cambridge, Massachusetts, August 4--6}, year = {1986}, publisher = {ACM Press, New York}, pages = {202--211}, abstract = {Partitioning techniques are necessary to execute functional programs at a coarse granularity. Fine granularity execution is inefficient on general purpose multiprocessors. There is a trade-off between parallelism and the overhead of exploiting parallelism. We present a compile-time partitioning approach to achieve this trade-of