% % (Compiled by) 1993 % % Wolfgang Schreiner % Research Institute for Symbolic Computation (RISC-Linz) % Johannes Kepler University, A-4040 Linz, Austria % % PLEASE DO NOT REMOVE THIS NOTICE. % @inproceedings{Acht91, author = {Achten, Peter}, title = {{Annotations for Load Distribution}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {247--264}, abstract = {Parallel programming is the art of gaining vast computational speed-up by finding a well (or optimal) load distribution of a program onto some network of processors. Optimal load distribution of programparts on a network of processors is an undecidable problem. Therefore supplying the programmer with annotations for directing the actual load distribution of a certain program still seems to be a necessary method of increasing the programs performance. The talk discusses what a good set of primitives to describe load distributions in Concurrent Clean might look like. Some of the primitives have been implemented on the PABC simulator, on which a number of parallelized programs have been tested.}, keywords = {Para-Functional Programming, Annotations, Process Mapping}, } @inproceedings{AFB91, author = {Aharoni, Gad and Farber, Yaron and Barak, Amnon}, title = {{A Strategy for the Run-Time Mangagement of Fine-Grained Parallelism}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {227--246}, abstract = {We present a dynamic, adaptive mechanism for granularity control of the parallel execution of functional programs. This mechanism utilizes the inherent parallelism of functional programs, yet it endeavors to limit excessive parallelism. It is based on the ability to expose, at run time, the shape of the underlying execution structure of the program, and then to allocate the tasks to the processing elements accordingly. High-density structures are distributed quickly among many processing elements, whereas low density ones have a tendency to be confined to few processors. Continuous sampling of the execution structure allows self-tuning of the mechanism, when the underlying structure changes gradually. The advantage of the presented mechanism is its ability to handle programs whose execution structure cannot be determined in advance. We present performance figures that show the effectiveness of the approach for a wide range of computation structures, including divide-and-conquer programs, and linear recursive algorithms.}, keywords = {Load Balancing, Granularity Control}, } @inproceedings{AHPT91, author = {Akerholt, G. and Hammond, K. and Peyton Jones, S. and Trinder, Ph.}, title = {{A Parallel Functional Database for GRIP}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {7--30}, abstract = {GRIP is a shared memory multiprocessor designed for efficient parallel evaluation of functional languages using compiled graph reduction. In this paper, we consider the feasibility of implementing a data base manager on GRIP, and present results obtained from a pilot implementation. A database implemented in a pure functional language must be modified {\em non-destructively\/}, i.e.\ the original database must be preserved and a new copy constructed. The naive implementation provides evidence for the feasibility of a pure functional database in the form of modest real-time speed-ups, and acceptable real-time performance. The functional database is also used to investigate the GRIP architecture, compared with an idealized machine. The particular features investigated are the thread-creation costs and caching of GRIP's distributed memory.}, keywords = {Databases}, } @inproceedings{Bara91, author = {Baraki, Gebreselassie}, title = {{A Note on Abstract Interpretation of Polymorphic Functions}}, booktitle = {{Functional Programming Languages and Computer Architectures}}, address = {Harvard, Massachusetts, USA}, year = {1991}, series = {Lecture Notes in Computer Science}, volume = {523}, publisher = {Springer, Berlin}, editor = {Hughes, John}, pages = {367--378}, abstract = {Strictness analysis of monomorphic functional programs by abstract interpretation has been extensively studied. Abramsky's work describes the extension to polymorphic functions. However, if different instances of a polymorphic function are used in a program, we have to re-analyze to compute abstract functions at each instance used. In the first-order case, Hughes showed how to compute the needed abstract functions without re-analysis. In this paper we develop a method of computing safe approximations to abstract functions of higher-order functions. The approximations are computed from abstract functions of simple instances of polymorphic functions. In the first-order case, the method gives exact values.}, keywords = {Abstract Interpretation}, } @inproceedings{BeKe92, author = {Bennett, Andrew J. and Kelly, Paul H. J.}, title = {{Simulation of Multicache Parallel Graph Reduction}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {Parallel Graph Reduction is a simple formal model for parallel program execution which uses the shared-memory abstraction for all communication and synchronization between processors. Shared memory is used under a strict access regime with single assignment and blocking reads. In this paper we present the design and implementation of an efficient and accurate multiprocessor simulation scheme suitable to work with a parallel graph reducer and use the simulator to study the performance and pattern of access of a suite of benchmark programs. Threads are never migrated to another processor by the parallel graph reducer, and the effect of this scheduling policy is investigated}, keywords = {Parallel Graph Reduction, Simulation}, } @inproceedings{BjHo89, author = {Bjerner, Bron and Holmstr{\"o}m, S{\"o}ren}, title = {{A Compositional Approach to Time Analysis of First Order Functional Programs}}, booktitle = {FPCA '89 Functional Programming Languages and Computer Architecture}, address = {London, UK, September 11--13}, year = {1989}, pages = {157--165}, publisher = {ACM Press, New York}, abstract = {We present a method for computing the number of steps needed to compute a lazy first order functional program {\tt e} (to an approximation of its value). The method itself is described as a lazy functional program. Moreover, it is {\em compositional\/}, because it is defined by recursion on the structure of {\tt e}. In other words, the number of steps needed to compute {\tt e} is described in terms of the number of steps the parts of {\tt e} need. (This is in contrast with the obvious operational method to define an interpreter and count the number of steps that it needs.) The equations that define the analyzing program can also be used when reasoning about time complexity of lazy functional programs. Two simple examples are given at the end of the paper.}, keywords = {Time Analysis}, } @inproceedings{Brat92, author = {Bratvold, Tore A.}, title = {{Determining Useful Parallelism in Higher Order Functions}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {Programs written in explicitly parallel languages are often composed of pieces of task specific code structured and interwoven with a limited number of well known parallel patterns. While such programs may indeed be very efficient in terms of execution speed, they are usually very hard both to write and reason about, and portability is low. The approach of expressing parallel patterns as templates or higher order functions has received attention lately, both from a theoretical point of view and in practical implementations. The paper presents an outline of the compilation stages in a proposed system based on this approach in combination with functional programming. The presence of useful parallelism is discussed for three higher order functions implemented as processor farms, and an example is presented to illustrate the approach.}, keywords = {Higher Order Functions, Skeletons}, } @article{Broy92, author = {Broy, Manfred and Dendorfer, Claus}, title = {{Modelling Operating System Structures by Timed Stream Processing Functions}}, journal = {Journal of Functional Programming}, volume = {2}, number = {1}, pages = {1--21}, month = {January}, year = {1992}, abstract = {Some extensions of the basic formalism of stream processing functions are useful to specify complex structures such as operating systems. In this paper we give the foundations of higher order stream processing functions. These are functions which send and accept not only messages representing atomic data, but also complex elements such as functions. Some special notations are introduced for the specification and manipulation of such functions. A representation of time is outlined, which enables us to model time dependent behavior. Finally, we demonstrate how characteristic operating system structures can be modelled by timed higher order stream processing functions.}, keywords = {Operating System, Streams}, } @techreport{BuMe92, author = {Burn, Geoffrey and Le Metayer, Daniel}, title = {{Cps-Translation and the Correctness of Optimising Compilers}}, number = {DoC92/20}, institution = {Department of Computing}, address = {Imperial College, London, UK}, year = {1992}, abstract = {We show that compiler optimizations based on strictness analysis can be expressed formally in the functional framework using continuations. This formal presentation has two benefits: it allows us to give a rigorous correctness proof of the optimized compiler; and it exposes the various optimizations made possible by strictness analysis. These benefits are especially significant in the presence of partially evaluated data structures.}, keywords = {Strictness Analysis, Optimization, Correctness Proof}, } @inproceedings{Burt89, author = {Burton, F.\ Warren}, title = {{Indeterminate Behavior with Determinate Semantics in Parallel Programs}}, booktitle = {FPCA '89 Functional Programming Languages and Computer Architecture}, address = {London, UK, September 11--13}, year = {1989}, pages = {340--346}, publisher = {ACM Press, New York}, abstract = {A parallel program may be indeterminate so that it can adapt its behavior to the number of processors available, or at least so that low level timing issues are removed from the program. Indeterminate programs are hard to write, understand, modify or verify. They are impossible to debug, since they may not behave the same from one run to the next. We propose a new construct, a polymorphic abstract data type called an {\em improving value\/}, with operations that have indeterminate behavior but simple determinate semantics. These operations allow the type of indeterminate behavior required by many parallel algorithms. We define improving values in the context of a functional programming language, but the technique can be used in procedural languages as well.}, keywords = {Non-determinism}, } @article{Burt91, author = {Burton, Warren F.}, title = {{Encapsulating Non-Determinacy in an Abstract Data Type with Determinate Semantics}}, journal = {Journal of Functional Programming}, volume = {1}, number = {1}, pages = {3--20}, month = {January}, year = {1991}, abstract = {A parallel program may be indeterminate so that it can adapt its behavior to the number of processors available. Indeterminate programs are hard to write, understand, modify or verify. They are impossible to debug, since they may not behave the same from one run to the next. We propose a new construct, a polymorphic abstract data type called an {\em improving value\/}, with operations that have indeterminate behavior but simple determinate semantics. These operations allow the type of indeterminate behavior required by many parallel algorithms. We define improving values in the context of a functional programming language, but the technique can be used in procedural languages as well.}, keywords = {Non-determinism}, } @inproceedings{CSS*91, author = {Culler, David E. and Sah, Anurag and Schauser, Klaus Erik and von Eicken, Thorsten and Wawrzynek, John}, title = {{Fine-grain Parallelism with Minimal Hardware Support: A Compiler-Controlled Threaded Abstract Machine}}, booktitle = {Fourth International Conference on Architectural Support for Programming Languages and Operating Systems}, address = {Santa Clara, California, April 8--11}, year = {1991}, pages = {164--175}, volume = {26(4)}, series = {SIGPLAN Notices}, abstract = {In this paper, we present a relatively primitive execution model for fine-grain parallelism, in which all synchronization, scheduling, and storage management is explicit and under compiler control. This is defined by a threaded abstract machine (TAM) with a multilevel scheduling hierarchy. Considerable temporal locality of logically related threads is demonstrated, providing an avenue for effective register use under quasi-dynamic scheduling. A prototype TAM instruction set, TL0, has been developed along with a translator to a variety of existing sequential and parallel machines. Compilation of Id, an extended functional language requiring fine-grain synchronization, under this model yields performance approaching that of conventional languages on current uniprocessors. Measurements suggest that the net cost of synchronization on conventional multiprocessors can be reduced to within a small factor of that on machines with elaborate hardware support, such as proposed dataflow architectures. This brings into question whether tolerance to latency and inexpensive synchronization require specific hardware support or merely an appropriate compilation strategy and program representation.}, keywords = {Multi-Threaded Machines, Dataflow, Compilation}, } @inproceedings{DFH*91, author = {Darlington, J. and Field, A. J. and Harrison, P. G. and Harper, D. and Jouret, G. K. and Kelly, P. J~. and Sephton, K. M. and Sharp, D. W.}, title = {{Structured Parallel Functional Programming}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {31--52}, abstract = {We present a methodology, based on functional program transformation, aimed at facilitating the development of applications on a variety of diverse parallel machines. The novelty of the method lies in employing a fixed repertoire of parallel program forms, or {\em skeletons\/}, expressed as higher-order functions for which efficient implementations existon particular parallel machines. Program transformation techniques are then used to convert general functional programs into compositions of known skeletons. We give an overview of the problems of programming parallel machines, highlighting the problem in both the implicit and explicit approach, present a taxonomyof parallel machines, discuss the role and advantage of using skeletons, give definitions of particular skeletons, discuss semantic issues, detail the transformations employed and the implementation route used and give examples of the technique in action. The methodology is successful in allowing a broad variety of parallel machines, from SIMD to MIMD, to be programmed efficiently in a uniform manner that still allows portability across different machine types.}, keywords = {Parallel Programming Skeletons, Program Transformation}, } @inproceedings{Dorr92, author = {D{\"o}rr, Heiko}, title = {{Monitoring with Graph-Grammars as Formal Operational Models}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {This paper presents a monitoring method which reduces the costs of event-tracing. To support the monitoring of a system-run, a model is used to manage the system's information which has been collected since the beginning of the run. By this means the collection of redundant event-trace information is avoided. The background and usage of model-based monitoring is discussed, too. Graph-grammars with grouped rules are proposed and defined as an operational formalism to model the behavior of a system. They are especially suitable for the description of distributed systems whose functionality can be characterized by structural changes. In graph-grammar models, relevant functional parts become directly addressable. Therefore, system analysis becomes easier. The formal model of a parallel graph reduction machine is introduced in order to illustrate the monitoring method.}, keywords = {Monitoring, Parallel Graph Reduction}, } @inproceedings{GHV92, author = {Glas, J. C. and Hofman, R. F. H. and Vree, W. G.}, title = {{Parallelization of Branch-and-Bound Algorithms in a Functional Programming Environment}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {The referential transparency of functional languages imposes restraints on the communication between parallel tasks. This seriously hampers the implementation of parallel branch-and-bound algorithms, because the parallel tasks cannot update a shared bound asynchronously. Maintaining local bounds only causes enormous performance loss, because parallel branch-and-bound algorithms exploit speculative parallelism. Therefore, the parallel tasks have to exchange their bounds synchronously, but this lowers the average processor utilization. Three parallel functional implementations of a generally applicable branch-and-bound algorithm have been developed to investigate this trade-off. Experiments have been performed with the cargo loading problem. Two functional equivalents of imperative branch-and-bound approaches are outperformed by a dedicated parallel functional branch-and-bound implementation. A new speculative parallel annotation is proposed to tackle the trade-off between speculative transformation loss and average processor utilization on system level.}, keywords = {Branch-and-Bound, Speculative Parallelism}, } @inproceedings{Glau92, author = {Glauert, John}, title = {{Parallel Implementation of Functional Languages Using Small Processes}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {We report work in progress on the implementation of languages that integrate concurrent and functional programming styles. A translation scheme is presented for mapping such languages into process networks using a simple notation based on communication between named processes. The translation of functional code is sequential, but parallelism arises from the use of concurrency constructs in the source language. Early implementation experiments have used graph rewriting techniques, although work on a more direct implementation is in progress.}, keywords = {Process Networks, Para-Functional Programming, Parallel Graph Reduction}, } @inproceedings{Gron92, author = {van Groningen, John H. G.}, title = {{Some Implementation Aspects of Concurrent Clean on Distributed Memory Architectures}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {We have implemented a code generator and a runtime system that can be used to simulate parallel execution of Concurrent Clean programs on a single Macintosh computer. This code generator generates machine code and is an extension of the (sequential) ABC code generator for the MC680x0 and SPARC processors. This implementation is discussed briefly. The largest part of the paper describes two aspects of the implementation in detail. The first one is how to let a process wait until a node is overwritten with its head normal form. Several possible solutions are described. The other one is a description of an efficient graph copying algorithm. This graph copying algorithm can be used to send graphs to remote processors using a more compact representation. Finally, some results are presented.}, keywords = {Parallel Graph Reduction}, } @inproceedings{HGW91, author = {Hartel, Pieter and Glaser, Hugh and Wild, John}, title = {{On the Benefits of Different Analyses in the Compilation of a Lazy Functional Language}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {123--145}, abstract = {Implementations of lazy functional languages have not as yet approached the speed of execution provided by imperative, or even non-lazy functional languages. It has been suggested that the answer to this problem lies with powerful analyses, based on abstract interpretation, such as strictness analyses that can deal with arbitrary data structures and higher-order functions. In this paper, we present a break down of the benefits of a number of analyses, each performed at increasing levels of sophistication, and analyzed for a set of medium-sized functional programs. We conclude that the benefits of important analyses are limited.}, keywords = {Static Analysis}, } @techreport{Hill93, author = {Hill, Jonathan M. D.}, title = {{Data Parallel Haskell: Mixing Old and New Glue}}, institution = {Queen Mary and Westfield College}, address = {University of London, UK}, year = {1993}, abstract = {John Hughes argues that the ability to decompose a problem into parts, depends upon the ability to `glue' those parts together. A functional programming language such ass Haskell provides the user with two powerful kinds of glue: higher order functions and lazy evaluation. Although many data parallel extensions to functional languages exist --- Connection Machine Lisp, Paralation Lisp, Parallel EuLisp, and Tuple, they all lack the special glue of a lazy language. This paper describes two data parallel extensions to the lazy functional language Haskell: {\sc pod}s and {\sc pod} comprehensions. The enable parallel algorithms to be expressed by decomposing a problem into smaller sequential parts, which are then glued together in parallel. We contrast an implementation of a text searching algorithm and an LL(1) parer, and show that the parallelism within both can be expressed in terms of the same higher order parallel {\tt map} and {\tt scan} functions. In the first example, we decompose the problem into a sub-string algorithm, and a parallel line joining algorithm. We glue these two subproblems together by using lazy evaluation and {\sc pod} comprehensions --- the old and new glue seem to work well.}, keywords = {Data Parallelism}, } @inproceedings{HKL91, author = {Huang, Shell-Ying and Kelly, Paul and Liu, Junxian}, title = {{Pilot Implementation of Abstract, Declarative Process Placement}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {197--214}, abstract = {To get high performance on a distributed-memory multicomputer some explicit control is needed, at present and for the foreseeable future. This paper describes work aimed at harnessing the power of the functional notation in exercising such control. We have developed a declarative annotation scheme which allows explicit control over process placement and communications. The language, called Caliban, is being implemented on a configurable, loosely-coupled commercial multicomputer and a pilot version is described here.}, keywords = {Para-Functional Programming, Annotations}, } @inproceedings{HoBu92, author = {Howe, Denis B. and Burn, Geoffrey, L.}, title = {{Experiments with Strict STG Code}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {Lazy evaluation has many desirable features for the functional programmer but imposes certain overheads on the execution mechanism. Much work has gone into making lazy evaluation efficient. One of the most promising projects in this area is GHC, Glasgow University's Haskell compiler based on the Spineless Tagless G-Machine. Strictness analysis tells us where we can turn lazy evaluation into strict evaluation without changing the results of the program. Much theoretical work has gone into making strictness analysis more powerful and efficient enough for practical use. Less work has focussed on using the results of strictness analysis to generate better code and it is this that we consider here. We have taken the results of one kind of strictness analysis, known as evaluation transformers, and used them to transform the lazy code produced by GHC to make it more strict. We describe the execution of lazy and strict code by the STG Machine and explain why the strict code is not always faster. Finally, we suggest alternative code generation strategies and program transformations which might improve the strict code.}, keywords = {Strictness Analysis, Evaluation Transformers, Code Generation}, } @inproceedings{HoLo92, author = {Hogen, Guido and Loogen, Rita}, title = {{PASTEL --- A Parallel Stack-based Implementation of Eager Functional Programs with Lazy Data Structures}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {In this paper we present an alternative realization of the runtime structures which tends to support parallel and sequential evaluations as well. Instead of representing the cactus stack as a linked tree of function blocks, we allocate function blocks, within each processor element of the parallel system, on a single {\em meshed stack\/}, which is organized as a contiguous array, but shared by several processes. The graph is only used for the representation of parallel processes, structured data, closures and suspensions. Evaluation is controlled by the stack. During the sequential evaluation of processes the meshed stack is used as in conventional implementations. Function blocks aer allocated on top of it, when function calls are executed, and removed, when function calls terminate.}, keywords = {Parallel Graph Reduction, Stacks}, } @techreport{HoLo92a, author = {Hogen, Guido and Loogen, Rita}, title = {{A New Stack Technique for the Management of Runtime Structures in Distributed Implementation}}, institution = {Lehrstuhl f{\"u}r Informatik II}, address = {RWTH Aachen, Germany}, year = {1992}, abstract = {A new technique for the management of runtime structures in distributed implementations is presented. Instead of storing the runtime stacks of parallel processes as linked lists of function blocks in a heap structure, the local stacks of several parallel processes, which are executed on the same processor element, are stored in an interleaved manner on a single physical stack, called the {\em meshed stack\/}. The technique originated from the stack organization in the well known Warren Abstract Machine (WAM) for logic languages, where the runtime information of several alternative computation branches is stored on a single stack. In principle, the meshed stack technique is independent of the implemented language. We will explain it for the parallel implementation of functional language.s The new technique supports sequential and parallel evaluations equally well.}, keywords = {Parallel Graph Reduction, Stacks}, } @techreport{HuAn88, author = {Hudak, Paul and Anderson, Steve}, title = {{Haskell Solutions to the Language Session Problems at the 1988 Salishan High-Speed Computing Conference}}, type = {Research Report}, number = {YALEU/DCS/RR-627}, month = {May}, year = {1988}, institution = {Department of Computer Science}, address = {Yale University}, abstract = {This paper is intended to give the reader some familiarity with Haskell by giving solutions to the four language session problems presented at the 19888 Salishan Conference on High-Speed Computing.}, keywords = {Functional Programming}, } @article{HuHa91, author = {Hunt, Sebastian and Hankin, Chris}, title = {{Fixed Points and Frontiers: A New Perspective}}, journal = {Journal of Functional Programming}, volume = {1}, number = {1}, pages = {91--120}, month = {January}, year = {1991}, abstract = {Abstract interpretation is the collective name for a family of semantics-based techniques for compile-time analysis of programs. One of the most costly operations in automating such analyses is the computation of fixed points. The frontiers algorithm is an elegant method, invented by Chris Clack and Simon Peyton Jones, which addresses this issue. In this article we present a new approach to the frontiers algorithm based on the insight that frontiers represent upper and lower subsets of a function's argument domain. This insight leads to a new formulation of the frontiers algorithm for higher-order functions, which is considerably more concise than previous versions. We gon on to argue that for many functions, especially in the higher-order case, finding fixed points is an intractable problem unless the sizes of the abstract domains are reduced. We show how the semantic machinery of abstract interpretation allows us to place upper and lower bounds on the values of fixed points in large lattices by working within smaller ones.}, keywords = {Abstract Interpretation, Fixed Points, Frontiers}, } @inproceedings{Hunt89, author = {Hunt, Sebastian}, title = {{Frontiers and Open Sets in Abstract Interpretation}}, booktitle = {FPCA '89 Functional Programming Languages and Computer Architecture}, address = {London, UK, September 11--13}, year = {1989}, pages = {1--11}, publisher = {ACM Press, New York}, abstract = {Abstract interpretation is the name applied to a range of techniques which use non-standard semantics to identify properties of computer programs. The central task in applying these techniques is to find fixed points of recursive definitions. To fulfill this task efficiently, a compact representation of function values is needed. Clack and Peyton Jones showed how sets of incomparable points from a function's argument domain, known as {\em frontiers\/}, could meet this need for function spaces of the form $[2^n \rightarrow 2]$. They also developed the {\em frontiers algorithm\/}, a method of establishing the frontier representation of a function. Martin and Hankin extended the method to cope with higher-order functions over more general finite lattices. In this paper we present a new approach to the frontiers algorithm based on the insight that frontiers represent open and closed subsets of a function's argument domain. This insight leads to a new formulation of the frontiers algorithm for higher-order functions over a rich family of finite lattices. This formulation is considerably more concise than previous versions. We go on to argue that for many functions, especially in the higher-order case, finding fixed points is an intractable problem unless the sizes of the abstract domains are reduced. We show how the semantic machinery of abstract interpretation allows us to place upper and lower bounds on the values of fixed points in large lattices by working within smaller ones.}, keywords = {Abstract Interpretation, Fixed Points, Frontiers}, } @inproceedings{HwRu92, author = {Hwang, Suntae and Rushall, David}, title = {{The $\nu$-STG machine: A Parallelized Spineless Tagless Graph Reduction Machine in a Distributed Memory Architecture (Draft Version)}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {This paper describes the $\nu$-STG machine a parallelized Spineless Tagless Graph Reduction (STG) machine in a distributed memory architecture. In the $\nu$-STG machine, a stack for each task is distributed by allocating a {\em context frame\/} for each {\em tail-call sequence\/} on the {\em heap\/}. Two sparking mechanisms, {\em BSpark\/} and {\em ISpark\/}, are supported to introduce parallelism, which are similar to those in the HDG machine. The {\em BSpark\/} cannot be ignored and should be synchronized with the parent task to prevent it being blocked and resumed too frequently, while {\em ISpark\/} may be ignored. Even though a variable is sparked by {\em BSpark\/} or {\em ISpark\/}, it may be evaluated {\em in-line\/} by the parent task. Creating a new task to evaluate a {\em boxed\/} variable according to the spark annotation is delayed until it is really needed. A message passing mechanism is used to distribute jobs and synchronize them in a machine. A {\em lazy task creation mechanism\/} is supported to exploit paallelism in {\em unboxed\/} arithmetic expression by {\em boxing up\/} on demand. A simple printing mechanism is added to print outputs clearly.}, keywords = {Parallel Graph Reduction, G-Machine}, } @inproceedings{Jens91, author = {Jensen, Thomas P.}, title = {{Strictness Analysis in Logical Form}}, booktitle = {{Functional Programming Languages and Computer Architectures}}, address = {Harvard, Massachusetts, USA}, year = {1991}, series = {Lecture Notes in Computer Science}, volume = {523}, publisher = {Springer, Berlin}, editor = {Hughes, John}, pages = {352--366}, abstract = {This paper presents a framework for comparing two strictness analysis techniques: Abstract interpretation and non-standard type inference. The comparison is based on the representation of a lattice by its ideals. A formal system for deducing inclusions between ideals of a lattice is presented and proved sound and complete. Viewing the ideals as {\em strictness properties\/} we use the formal system to define a program logic for deducing strictness properties of expressions in a typed lambda calculus. This {\em strictness logic\/} is shown to be sound and complete with respect to the abstract interpretation, which establishes the main result that strictness analysis by type-inference and by abstract interpretation are equally powerful techniques.}, keywords = {Strictness Analysis, Abstract Interpretation, Type Inference}, } @inproceedings{Jour91, author = {Jouret, Guido K.}, title = {{A Foundation for Declarative Data Parallelism}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {311--330}, abstract = {Data-parallelism is inherent in the independent activity of individual data elements where this activity consists of either communication or computation. Single-Instruction, Multiple Datastream (SIMD) architectures have been designed to exploit this form of parallelism but despite the impressive performance of such architectures, language development for data-parallel architectures is an under-developed area. The main difficulties in language-design for the exploitation of data-parallelism is the traditional reliance on non-monolithic language constructs encouraged by von-Neumann architectures; the uniform and globally-accessible machine state and the resulting absence of any notions of locality and communication. Consideration of these issues and their relevance on language design leads to the development of a declarative language for data-parallel programming based on the functional style.}, keywords = {Data-Parallelism, SIMD, Para-Functional Programming}, } @inproceedings{Kess91, author = {Kesseler, Marco}, title = {{Implementing the PABC Machine on Transputers}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {409--421}, abstract = {In this paper an efficient implementation of the PABC machine on transputers is presented. The PABC machine is an abstract machine that is used to implement Concurrent Clean, an experimental lazy functional language. For a single transputer this means an `unbounded' graph store has to be implemented and a set of reducers, each having a couple of stacks. In a network of transputers each reducer should be able to send graphs to other transputers, start new reducers on these remote graphs and to get reduced graphs back. At the moment the PABC machine runs on a single transputer only. Efficient and reliable implementation on a network of transputers has started.}, keywords = {Term Graph Reduction, Abstract Machine}, } @inproceedings{KiWa92, author = {King, David J. and Wadler, Philip}, title = {{Combining Monads}}, booktitle = {Functional Programming, Glasgow}, address = {Ayr, Scotland}, editor = {Launchbury, J. and Sansom, P. M.}, publisher = {Springer, Berlin}, series = {Workshops in Computing}, year = {1992}, abstract = {Monads provide a way of structuring functional programs. Most real applications require a combination of primitive monads. Here we describe how some monads may be combined with others to yield a {\em combined monad\/}.}, keywords = {Monads, State}, } @inproceedings{KuGl92, author = {Kuchen, Herbert and Gladitz, Katia}, title = {{Implementing Bags on a Shared Memory MIMD-Machine}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {Multisets (also called bags) are an interesting data structure for parallely implemented functional programming languages, since they do not force an unneeded restriction of the data flow and allow to exploit as much parallelism as possible. Most operations on multisets can be understood as special cases of the so-called Gamma scheme. In the present paper, we investigate efficient implementations of several instances of this Gamma scheme on MIMD-machines with shared memory.}, keywords = {Multisets, Bags, Non-Determinism}, } @inproceedings{Lang91, author = {Langendoen, Koen}, title = {{WYBERT: Parallel Reduction on Shared Memory}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {219--223}, abstract = {The Dutch Parallel Reduction Machine project has shown that it is possible to implement parallel reduction based on a coarse-grain model. Within the framework of this project a prototype machine has been constructed: the Amsterdam Parallel Experimental Reduction Machine (APERM). Experimental results indicate that the architecture can be improved by connecting several processors to one graph memory. The design of APERM2 is based on this idea and consists of an extensible number of processor clusters (shared-memory multiprocessors) interconnected by a communication network. The distribution of parallel tasks over the clusters will be done by the same coarse-grain reduction model as adopted in APERM (divide\&conquer), while parallel reduction inside a cluster is the research topic of WYBERT. The objective of WYBERT is to study the opportunities to exploit shared memory, and its associated caches, for parallel graph reduction of divide\&conquer applications.}, keywords = {Graph Reduction, Shared Memory, Runtime System}, } @article{Meta88, author = {Le Metayer, Daniel}, title = {{ACE: An Automatic Complexity Evaluator.}}, journal = toplas, volume = {10}, number = {2}, pages = {248--266}, month = {April}, year = {1988}, abstract = {There has been a great deal of research done on the evaluation of the complexity of particular algorithms; little effort, however, has been devoted to the mechanization of this evaluation. The ACE (Automatic Complexity Evaluator) system is able to analyze reasonably large programs, like sorting programs, in a fully mechanically way. A time-complexity function is derived from the initial functional program. This function is transformed into its nonrecursive equivalent according to McCarthy's recursion induction principle, using a predefined library of recursive definitions. As the complexity is not a decidable property, this transformation will not be possible in all cases. The richer the predefined library is, the more likely the system is to succeed. The operations performed by ACE are described and the use ofthe system is illustrated with the analysis of a sorting algorithm. Related works and further improvements are discussed in the conclusion.}, keywords = {Granularity Analysis}, } @inproceedings{Lest89, author = {Lester, David R.}, title = {{Stacklessness: Compiling Recursion for a Distributed Architecture}}, booktitle = {FPCA '89, Functional Programming Languages and Computer Architecture}, address = {London, UK, September 11--13}, year = {1989}, pages = {116--128}, publisher = {ACM Press, New York}, abstract = {Compiling general programming languages to run efficiently on a distributed architecture is hard. One of the problems that confronts the potential implementor, is how to store the stack. This is normally used in two ways: firstly it is used to hold the arguments and temporary variables of a function or procedure, and secondly it is used to hold a previous state on entry to a new function or procedure. One way to represent such a stack is as a contiguous array. An alternative is to hold the arguments and temporary variables in a stack frame, with a back-link to the previous stack frame. In a parallel machine each task will require a separate stack. We implement each of these stacks as a linked list of stack frames, each of which resides in a garbage collected heap. Using the stacklessness analysis, a node which requires evaluation can be created with a stack frame large enough to evaluate all tail recursive calls that may occur in the reduction sequence. It is therefore unnecessary to provide an extension mechanism which enlarges stack frames. The stacklessness analysis involves giving a non-standard semantics to a typed functional language. The technique may be applied to any resource with a stack-like pattern of consumption. The paper includes a proof that an approximation to the fixpoint of a combinator in the abstract interpretation is computable. The \LaTeX\ source of the paper is also an executable Orwell program to find this approximation. The problem is particularly difficult when higher order functions are used. The stacklessness analysis is able to deal with this case. The analysis is used in the HDG-machine, a compiler and abstract machine for a Transputer based implementation of distributed graph reduction.}, keywords = {Parallel Graph Reduction, Compilation, Tail-Recursion}, } @article{Maas92, author = {Maa{\ss}en, Andreas}, title = {{Parallel Programming with Data Structures and Higher Order Functions}}, journal = {Science of Computer Programming}, volume = {18}, year = {1992}, pages = {1--38}, abstract = {Linear lists, which are the standard data structure in functional programming languages, have proved to be useful for many applications. Yet for several reasons it seems that there is a need for additional applicative data structures: Firstly, functional programming languages are well suited for parallel execution, because programs written in these languages cannot have side effects, and expressions can be evaluated independently. But parallel execution of functional programs is often hampered by the linear structure of lists, which supports only one element at a time algorithms. Secondly, lists lack the expressive power that we need in general purpose languages. For example, there is a need for array-like data structures. Thirdly, many functions on lists have to be defined recursively. No matter how efficiently recursion may be implemented, it will always fall short of the efficiency that can be obtained with imperative implementations. We propose a choice of data structures and corresponding higher order functions that enable programmers to write concise and (almost) recursion-free functional programs which have a higher degree of potential parallelism. Possible implementation techniques are discussed, and the major complexity result is that, under parallel evaluation, many functions on our data structures need only logarithmic time, whereas using lists results in linear time complexity.}, keywords = {Distributed Data Structures}, } @inproceedings{Mart92, author = {Martins, Wellington Santos}, title = {{Parallel Implementations of Functional Languages}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {With interest in parallel implementations of functional languages as a whole, we first discuss some issues related to parallel execution of functional programs, and then review some key proposals considering three different approaches, namely Parallel Lisp Systems, Dataflow Systems and Reduction Systems}, keywords = {Parallel Lisp, Dataflow, Parallel Graph Reduction}, } @inproceedings{Mint92, author = {Mintchev, Sava}, title = {{Using Strictness Information in the STG-Machine}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {The paper presents an attempt at exploiting strictness information for parallel evaluation of functional programs. A simple evaluation model, which uses strictness in a limited way, is suggested. It has been applied in a parallel version of the STG-machine; special attention has been paid to avoiding the creation of useless tasks. Some results from the simulation of the parallel STG-machine are provided, throwing light on the amount and granularity of parallelism.}, keywords = {Strictness Analysis, Parallel Graph Reduction}, } @inproceedings{MiVr91, author = {Milikowski, R. and Vree, W. G.}, title = {{The G-Line --- A Distributed Processor for Graph Reduction}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {265--282}, abstract = {The G-line is a horizontally coded graph reducer. In lazy functional languages much time is used to manage the graph. A method has been developed to perform construction of a subgraph in a single parallel access to the graph memory. With a simulation, using infinite hardware, we have shown that the architecture performs well. We argue that a realistic implementation is possible.}, keywords = {Parallel Graph Reduction, Abstract Machine}, } @inproceedings{NPS91, author = {N{\"o}cker, Eric and Plasmeijer, Rinus and Smetsers, Sjaak}, title = {{The Parallel ABC Machine}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {351--381}, abstract = {In this paper we will introduce the parallel ABC machine. The parallel ABC machine is an abstract parallel machine used for creating and describing implementations of Concurrent Clean, a parallel functional language based on the principles of term graph rewriting. Basically, the parallel ABC machine represents a loosely coupled processor architecture, but abstracts from specific implementation issues. We will show how such a machine can be defined, and how code for it can be generated. As for the sequential ABC machine, the description will be given in a functional framework.}, keywords = {Term Graph Rewriting, Abstract Machine}, } @inproceedings{Osth91, author = {Ostheimer, Gerrrald}, title = {{Parallel Functional Computation on STAR:DUST}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {393--407}, abstract = {STAR:DUST ('St. Andrews RISC: Dataflow Using Sequential Threads') is a processor design optimized for efficient execution of sequential threads while supporting plug-and-play construction of large multiprocessor systems. Besides satisfying the major RISC criteria (small instruction set, simple instruction format, load/store principle pipelining), STAR:DUST employs a dataflow approach to communication and parallelism. We will describe the architecture and propose a runtime model for parallel functional computation based on STAR:DUST's dataflow primitives.}, keywords = {Dataflow, Sequential Threads, Hybrid Architecture}, } @inproceedings{Rose89, author = {Rosendahl, Mads}, title = {{Automatic Complexity Analysis}}, booktitle = {FPCA '89 Functional Programming Languages and Computer Architecture}, address = {London, UK, September 11--13}, year = {1989}, pages = {144-156}, publisher = {ACM Press, New York}, abstract = {One way to analyze programs is to derive expressions for their computational behavior. A time bound function (or worst-case complexity) gives an upper bound for the computation time as a function of the size of the input. We describe a system to derive such time bounds automatically using abstract interpretation. The semantics-based setting makes it possible to prove the correctness of the time-bound function. The system can analyze programs in a first-order subset of Lisp and we show how the system also can be used to analyze programs in other languages.}, keywords = {Complexity Analysis, Abstract Interpretation}, } @inproceedings{SaWa91, author = {Sargeant, John and Watson, Ian}, title = {{Some Experiments in Controlling the Dynamic Behavior of Parallel Functional Programs}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {103--122}, abstract = {In our quest for the ``Holy Grail'' of efficient implicit parallelism, we have opted for a conventional architecture with physically distributed but virtually shared memory (the EDS machine), and a Large Grain Graph Rewriting computational model. On programming languages, we hedge our bets, but the work described here uses a (strict) functional language. This set of choices eliminates many of the problems and brings those which remain into sharp focus. ``All'' we need to do is to achieve efficient dynamic load balancing, and high granularity, and good data structure locality. We have a compiler from Hope+ to the EDS machine (simulator), and this way used to investigate the properties of a ``real'' parallel program, a relational database implementation. This exercise showed the inadequacy of static analysis alone for detecting ``good'' tasks to do in parallel. Attempts to overcome this by programmer annotations had limited success. Furthermore, experiments with a much simpler program showed alarming variations in performance due to apparently innocuous differences in the basic low-level mechanisms for spawning parallel tasks. We are investigating an approach whereby the program is first compiled with monitoring code inserted, and run (using smallish data) to produce statistics which are fed back into the compiler which then produces optimized parallel code. This work is embryonic, but the method seems to have considerable advantages over static analysis or hand annotation, and also provides extra performance information for the programmer.}, keywords = {Load Balancing, Static analysis, Annotations, Monitoring}, } @inproceedings{Sche91, author = {Schepers, J{\"o}rg}, title = {{Using Functional Languages for Process Specifications}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {89--102}, abstract = {The GRAPH-system, a tool for the specification, analysis and execution of communicating functional processes, is presented. Processes are specified in GRAPH at two separate specification levels: At the functional level the input/output behavior of processes is specified by means of pgorgrams of a functional language. At the process level these processes constitute the atomic components of hierarchically structured process systems, wherein process communication is realized via streams. Different process types are available, supporting the design of dynamically expanding process structures as well as of non-deterministic processes, such as semaphores. Process systems are externally represented as graphs which are variants of Petri-nets, composed from processes and streams. The Petri-net semantics allows to verify statically certain invariance properties which guarantee a stable overall system behavior. Structural deadlocks can also be detected by a static analysis. The GRAPH-system provides a syntax-oriented editor for the graphical specification of process systems. Furthermore, it includes several control and analysis facilities and supports the stepwise execution of processes under interactive control.}, keywords = {Process networks, streams, Petri nets, deadlocks}, } @inproceedings{Sewa92, author = {Seward, Julian}, title = {{Polymorphic Strictness Analysis using Frontiers (Draft Version)}}, booktitle = {4th International Workshop on the Parallel Implementation of Functional Languages}, address = {Aachen, Germany, September 28--30}, year = {1992}, editor = {Kuchen, Herbert and Loogen, Rita}, abstract = {This paper shows how to implement sensible polymorphic strictness analysis using the Frontiers algorithm. A central notion is to only ever analyze each function once, at its simplest polymorphic isntance. Subsequent non-base uses of functions are dealt with by generalizing their simplest instance analyzes. This generalisation is done using an algorithm developed by Baraki, based on embedding-closure pairs. Compared with an alternative approach of expanding the program out into a collection of monomorphic instances, this technique is hundreds of times faster for realistic programs. There are some approximations involved, but these do not seem to have a detrimental effect on the overall result. The overall effect of this technology is to considerably expand the range of programs for which the Frontiers algorithm gives useful results reasonably quickly.}, keywords = {Strictness Analysis, Abstract Interpretation, Fixed Points}, } @inproceedings{SHD91, author = {Sharp, David and Harrison, Peter and Darlington, John}, title = {{A Synthesis of a Dynamic Message-Passing Algorithm for Quicksort}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {53--74}, abstract = {We present a methodology for systematically synthesizing algorithms for {\em message passing parallel machines\/} and illustrate the technique by the formal derivation of a message passing sorting algorithm, based on Quicksort, that is, we claim, the fastest parallel quicksort algorithm having a complexity of $O(\log n * \log n)$ for a machine with $n$ processors sorting an $n$ element list. The algorithm is thus highly suitable for massively parallel machines such as the Thinking Machines Connection Machine.}, keywords = {Prrogram Transformation}, } @inproceedings{Tick88, author = {Tick, E.}, title = {{Compile-Time Granularity Analysis for Parallel Logic Programming Languages}}, booktitle = {International Conference on Fifth Generation Computer Systems}, address = {Tokyo, Japan, November 28 -- December 2}, year = {1988}, pages = {994--1000}, publisher = {ICOT, Tokyo}, abstract = {The paper describes a simple compiler analysis method for determining the ``weight'' of procedures in parallel logic programming languages. Using Flat Guarded Horn Clauses (FGHC) as an example, the analysis algorithm is described. Consideration of weights has been incorporated in the scheduler of a real-parallel FGHC emulator running on the Sequent Symmetry multiprocessor. Alternative demand-distribution methods are discussed, including {\em oldest-first\/} and {\em heaviest-first\/} distributions. Performance measurements, collected from a group of non-trivial benchmarks on eight processors, show that the new schemes do {\em not\/} perform significantly faster than conventional distribution methods. This result is attributed to a combination of factors overshadowing the benefits of the new method: high system overheads, the low cost of spawning a goal on a shared memory multiprocessor, and the increase in synchronization caused by the new methods. Directions of further research are discussed, indicating where further speedup can be attained.}, keywords = {Granularity Analysis}, } @inproceedings{Wadl92, author = {Wadler, Philip}, title = {{The Essence of Functional Programming}}, booktitle = {19th Annual Symposium on Principles of Programming Languages}, addresss = {Santa Fe, New Mexico}, month = {January}, year = {1992}, abstract = {This paper explores the use of monads to structure functional programs. No prior knowledge of monads or even category theory is required. Monads increase the ease with which programs may be modified. They can mimic the effect of impure features such as exceptions, state, and continuations; and also provide effects not easily achieved with such features. The types of a program reflect which effects occur. The first section is an extended example of the use of monads. A simple interpreter is modified to support various extra features: error messages, state, output, and non-deterministic choice. The second section describes the relation between monads and continuation-passing style. The third section sketches how monads are used in a compiler for Haskell that is written in Haskell.}, keywords = {Monads, State}, } @inproceedings{Waug91, author = {Waugh, Kevin G.}, title = {{Parallel Imperative Programs from Functional Prototypes}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {75--88}, abstract = {We have been considering the automatic translation of sequential SML prototypes of vision algorithms into efficient parallel Occam programs. We use function instrumentation of the prototype to obtain typical and pathological execution details and static analysis allows us to consider argument sizes and flow. From these results an abstract form of the functional program is used to estimate partition size and hence granularity. The transformations which apply to the functional prototype can also to be applied to the abstract form of the program. In this way we can use the transformed abstracted program to determine the effect of particular transformations on execution of the prototype.}, keywords = {Program Transformation, Weight Analysis}, } @inproceedings{Zimm91, author = {Zimmer, Ralf}, title = {{Reflections, the Curch-Rosser Property, and ``Real'' Applications in Functional Systems}}, booktitle = {3rd International Workshop on the Parallel Implementation of Functional Languages}, address = {Southampton, UK, June 5--7}, year = {1991}, editor = {Glaser, Hugh and Hartel, Pieter}, publisher = {Technical Report CSTR 91-07, University of Southampton}, pages = {331--349}, abstract = {Modern functional systems are often claimed to be well suited for programming and using multiprocessor machines. Unfortunately, most of them do not feature the Church-Rosser property. This paper is to show that for the correct construction of parallel programs and for easy reasoning about their correctness the Church-Rosser property is indispensable. A conservative extension of functional systems using a reflection mechanism is proposed for thereduction system $\pi$-RED. The resulting reduction system $\rho$-RED has the Church-Rosser $\pi$-RED property. Necessary conditions for reflexive extensions of functional systems are identified. With certain restrictions, the reflection mechanism can also be employed for the integration of different (even non-functional) subsystems, e.g.\ parsers, numerical packages, and computer algebra systems. Functional programs which have imperative subsystems included by the reflection mechanism can be executed in $\rho$-RED non-sequentially without causing side-effects due to these subsystems.}, keywords = {Reflections, Church-Rosser}, }