%
% (Compiled by) 1993
%
%   Wolfgang Schreiner <Wolfgang.Schreiner at risc.uni-linz.ac.at>
%   Research Institute for Symbolic Computation (RISC-Linz)
%   Johannes Kepler University, A-4040 Linz, Austria
%
% PLEASE DO NOT REMOVE THIS NOTICE.
%

@inproceedings{Acht91,
author = {Achten, Peter},
title = {{Annotations for Load Distribution}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {247--264},
abstract = {Parallel programming is the art of gaining vast computational
speed-up by finding a well (or optimal) load distribution of a program onto
some network of processors. Optimal load distribution of programparts on a
network of processors is an undecidable problem. Therefore supplying the
programmer with annotations for directing the actual load distribution of a
certain program still seems to be a necessary method of increasing the
programs performance. The talk discusses what a good set of primitives to
describe load distributions in Concurrent Clean might look like. Some of the
primitives have been implemented on the PABC simulator, on which a number of
parallelized programs have been tested.},
keywords = {Para-Functional Programming, Annotations, Process Mapping},
}

@inproceedings{AFB91,
author = {Aharoni, Gad and Farber, Yaron and Barak, Amnon},
title = {{A Strategy for the Run-Time Mangagement of Fine-Grained
Parallelism}}, 
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {227--246},
abstract = {We present a dynamic, adaptive mechanism for granularity control
of the parallel execution of functional programs. This mechanism utilizes the
inherent parallelism of functional programs, yet it endeavors to limit
excessive parallelism. It is based on the ability to expose, at run time, the
shape of the underlying execution structure of the program, and then to
allocate the tasks to the processing elements accordingly. High-density
structures are distributed quickly among many processing elements, whereas low
density ones have a tendency to be confined to few processors. Continuous
sampling of the execution structure allows self-tuning of the mechanism, when
the underlying structure changes gradually. The advantage of the presented
mechanism is its ability to handle programs whose execution structure cannot
be determined in advance. We present performance figures that show the
effectiveness of the approach for a wide range of computation structures,
including divide-and-conquer programs, and linear recursive algorithms.},
keywords = {Load Balancing, Granularity Control},
}

@inproceedings{AHPT91,
author = {Akerholt, G. and Hammond, K. and Peyton Jones, S. and Trinder, Ph.},
title = {{A Parallel Functional Database for GRIP}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {7--30},
abstract = {GRIP is a shared memory multiprocessor designed for efficient
parallel evaluation of functional languages using compiled graph reduction. In
this paper, we consider the feasibility of implementing a data base manager on
GRIP, and present results obtained from a pilot implementation. A database
implemented in a pure functional language must be modified {\em
non-destructively\/}, i.e.\ the original database must be preserved and a new
copy constructed. The naive implementation provides evidence for the
feasibility of a pure functional database in the form of modest real-time
speed-ups, and acceptable real-time performance. The functional database is
also used to investigate the GRIP architecture, compared with an idealized
machine. The particular features investigated are the thread-creation costs
and caching of GRIP's distributed memory.},
keywords = {Databases},
}

@inproceedings{Bara91,
author = {Baraki, Gebreselassie},
title = {{A Note on Abstract Interpretation of Polymorphic Functions}},
booktitle = {{Functional Programming Languages and Computer Architectures}},
address = {Harvard, Massachusetts, USA},
year = {1991},
series = {Lecture Notes in Computer Science},
volume = {523},
publisher = {Springer, Berlin},
editor = {Hughes, John},
pages = {367--378},
abstract = {Strictness analysis of monomorphic functional programs by abstract
interpretation has been extensively studied. Abramsky's work describes the
extension to polymorphic functions. However, if different instances of a
polymorphic function are used in a program, we have to re-analyze to compute
abstract functions at each instance used. In the first-order case, Hughes
showed how to compute the needed abstract functions without re-analysis. In
this paper we develop a method of computing safe approximations to abstract
functions of higher-order functions. The approximations are computed from
abstract functions of simple instances of polymorphic functions. In the
first-order case, the method gives exact values.},
keywords = {Abstract Interpretation},
}

@inproceedings{BeKe92,
author = {Bennett, Andrew J. and Kelly, Paul H. J.},
title = {{Simulation of Multicache Parallel Graph Reduction}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {Parallel Graph Reduction is a simple formal model for parallel
program execution which uses the shared-memory abstraction for all
communication and synchronization between processors. Shared memory is used
under a strict access regime with single assignment and blocking reads. In
this paper we present the design and implementation of an efficient and
accurate multiprocessor simulation scheme suitable to work with a parallel
graph reducer and use the simulator to study the performance and pattern of
access of a suite of benchmark programs. Threads are never migrated to another
processor by the parallel graph reducer, and the effect of this scheduling
policy is investigated},
keywords = {Parallel Graph Reduction, Simulation},
}

@inproceedings{BjHo89,
author = {Bjerner, Bron and Holmstr{\"o}m, S{\"o}ren},
title = {{A Compositional Approach to Time Analysis of First Order Functional
Programs}},
booktitle = {FPCA '89 Functional Programming Languages and
Computer Architecture},
address = {London, UK, September 11--13},
year = {1989},
pages = {157--165},
publisher = {ACM Press, New York},
abstract = {We present a method for computing the number of steps needed to
compute a lazy first order functional program {\tt e} (to an approximation of
its value). The method itself is described as a lazy functional program.
Moreover, it is {\em compositional\/}, because it is defined by recursion on
the structure of {\tt e}. In other words, the number of steps needed to
compute {\tt e} is described in terms of the number of steps the parts of
{\tt e} need. (This is in contrast with the obvious operational method to
define an interpreter and count the number of steps that it needs.) The
equations that define the analyzing program can also be used when reasoning
about time complexity of lazy functional programs. Two simple examples are
given at the end of the paper.},
keywords = {Time Analysis},
}

@inproceedings{Brat92,
author = {Bratvold, Tore A.},
title = {{Determining Useful Parallelism in Higher Order Functions}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {Programs written in explicitly parallel languages are often
composed of pieces of task specific code structured and interwoven with a
limited number of well known parallel patterns. While such programs may indeed
be very efficient in terms of execution speed, they are usually very hard both
to write and reason about, and portability is low. The approach of expressing
parallel patterns as templates or higher order functions has received
attention lately, both from a theoretical point of view and in practical
implementations. The paper presents an outline of the compilation stages in a
proposed system based on this approach in combination with functional
programming. The presence of useful parallelism is discussed for three higher
order functions implemented as processor farms, and an example is presented to
illustrate the approach.},
keywords = {Higher Order Functions, Skeletons},
}

@article{Broy92,
author = {Broy, Manfred and Dendorfer, Claus},
title = {{Modelling Operating System Structures by Timed Stream Processing
Functions}}, 
journal = {Journal of Functional Programming},
volume = {2},
number = {1},
pages = {1--21},
month = {January},
year = {1992},
abstract = {Some extensions of the basic formalism of stream processing
functions are useful to specify complex structures such as operating systems.
In this paper we give the foundations of higher order stream processing
functions. These are functions which send and accept not only messages
representing atomic data, but also complex elements such as functions. Some
special notations are introduced for the specification and manipulation of
such functions. A representation of time is outlined, which enables us to
model time dependent behavior. Finally, we demonstrate how characteristic
operating system structures can be modelled by timed higher order stream
processing functions.},
keywords = {Operating System, Streams},
}

@techreport{BuMe92,
author = {Burn, Geoffrey and Le Metayer, Daniel},
title = {{Cps-Translation and the Correctness of Optimising Compilers}},
number = {DoC92/20},
institution = {Department of Computing},
address = {Imperial College, London, UK},
year = {1992},
abstract = {We show that compiler optimizations based on strictness analysis
can be expressed formally in the functional framework using continuations.
This formal presentation has two benefits: it allows us to give a rigorous
correctness proof of the optimized compiler; and it exposes the various
optimizations made possible by strictness analysis. These benefits are
especially significant in the presence of partially evaluated data
structures.}, 
keywords = {Strictness Analysis, Optimization, Correctness Proof},
}

@inproceedings{Burt89,
author = {Burton, F.\ Warren},
title = {{Indeterminate Behavior with Determinate Semantics in Parallel
Programs}},
booktitle = {FPCA '89 Functional Programming Languages and
Computer Architecture},
address = {London, UK, September 11--13},
year = {1989},
pages = {340--346},
publisher = {ACM Press, New York},
abstract = {A parallel program may be indeterminate so that it can adapt its
behavior to the number of processors available, or at least so that low level
timing issues are removed from the program. Indeterminate programs are hard
to write, understand, modify or verify. They are impossible to debug, since
they may not behave the same from one run to the next. We propose a new
construct, a polymorphic abstract data type called an {\em improving
value\/}, with operations that have indeterminate behavior but simple
determinate semantics. These operations allow the type of indeterminate
behavior required by many parallel algorithms. We define improving values in
the context of a functional programming language, but the technique can be
used in procedural languages as well.},
keywords = {Non-determinism},
}

@article{Burt91,
author = {Burton, Warren F.},
title = {{Encapsulating Non-Determinacy in an Abstract Data Type with
Determinate Semantics}},
journal = {Journal of Functional Programming},
volume = {1},
number = {1},
pages = {3--20},
month = {January},
year = {1991},
abstract = {A parallel program may be indeterminate so that it can adapt its
behavior to the number of processors available.  Indeterminate programs are
hard to write, understand, modify or verify. They are impossible to debug,
since they may not behave the same from one run to the next. We propose a new
construct, a polymorphic abstract data type called an {\em improving
value\/}, with operations that have indeterminate behavior but simple
determinate semantics. These operations allow the type of indeterminate
behavior required by many parallel algorithms. We define improving values in
the context of a functional programming language, but the technique can be
used in procedural languages as well.},
keywords = {Non-determinism},
}

@inproceedings{CSS*91,
author = {Culler, David E. and Sah, Anurag and Schauser, Klaus Erik and
von Eicken, Thorsten and Wawrzynek, John},
title = {{Fine-grain Parallelism with Minimal Hardware Support: A
Compiler-Controlled Threaded Abstract Machine}},
booktitle = {Fourth International Conference on Architectural Support for
Programming Languages and Operating Systems},
address = {Santa Clara, California, April 8--11},
year = {1991},
pages = {164--175},
volume = {26(4)},
series = {SIGPLAN Notices},
abstract = {In this paper, we present a relatively primitive execution model
for fine-grain parallelism, in which all synchronization, scheduling, and
storage management is explicit and under compiler control. This is defined by
a threaded abstract machine (TAM) with a multilevel scheduling hierarchy.
Considerable temporal locality of logically related threads is demonstrated,
providing an avenue for effective register use under quasi-dynamic scheduling.
A prototype TAM instruction set, TL0, has been developed along with a
translator to a variety of existing sequential and parallel machines.
Compilation of Id, an extended functional language requiring fine-grain
synchronization, under this model yields performance approaching that of
conventional languages on current uniprocessors. Measurements suggest that the
net cost of synchronization on conventional multiprocessors can be reduced to
within a small factor of that on machines with elaborate hardware support,
such as proposed dataflow architectures. This brings into question whether
tolerance to latency and inexpensive synchronization require specific hardware
support or merely an appropriate compilation strategy and program
representation.},
keywords = {Multi-Threaded Machines, Dataflow, Compilation},
}

@inproceedings{DFH*91,
author = {Darlington, J. and Field, A. J. and Harrison, P. G. and Harper, D.
and Jouret, G. K. and Kelly, P. J~. and Sephton, K. M. and Sharp, D. W.},
title = {{Structured Parallel Functional Programming}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {31--52},
abstract = {We present a methodology, based on functional program
transformation, aimed at facilitating the development of applications on a
variety of diverse parallel machines. The novelty of the method lies in
employing a fixed repertoire of parallel program forms, or {\em skeletons\/},
expressed as higher-order functions for which efficient implementations
existon particular parallel machines. Program transformation techniques are
then used to convert general functional programs into compositions of known
skeletons. We give an overview of the problems of programming parallel
machines, highlighting the problem in both the implicit and explicit approach,
present a taxonomyof parallel machines, discuss the role and advantage of
using skeletons, give definitions of particular skeletons, discuss semantic
issues, detail the transformations employed and the implementation route used
and give examples of the technique in action. The methodology is successful in
allowing a broad variety of parallel machines, from SIMD to MIMD, to be
programmed efficiently in a uniform manner that still allows portability
across different machine types.},
keywords = {Parallel Programming Skeletons, Program Transformation},
}

@inproceedings{Dorr92,
author = {D{\"o}rr, Heiko},
title = {{Monitoring with Graph-Grammars as Formal Operational Models}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {This paper presents a monitoring method which reduces the costs of
event-tracing. To support the monitoring of a system-run, a model is used to
manage the system's information which has been collected since the beginning
of the run. By this means the collection of redundant event-trace information
is avoided. The background and usage of model-based monitoring is discussed,
too. Graph-grammars with grouped rules are proposed and defined as an
operational formalism to model the behavior of a system. They are especially
suitable for the description of distributed systems whose functionality can be
characterized by structural changes. In graph-grammar models, relevant
functional parts become directly addressable. Therefore, system analysis
becomes easier. The formal model of a parallel graph reduction machine is
introduced in order to illustrate the monitoring method.},
keywords = {Monitoring, Parallel Graph Reduction},
}

@inproceedings{GHV92,
author = {Glas, J. C. and Hofman, R. F. H. and Vree, W. G.},
title = {{Parallelization of Branch-and-Bound Algorithms in a Functional
Programming Environment}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {The referential transparency of functional languages imposes
restraints on the communication between parallel tasks. This seriously hampers
the implementation of parallel branch-and-bound algorithms, because the
parallel tasks cannot update a shared bound asynchronously. Maintaining local
bounds only causes enormous performance loss, because parallel
branch-and-bound algorithms exploit speculative parallelism. Therefore, the
parallel tasks have to exchange their bounds synchronously, but this lowers
the average processor utilization. Three parallel functional implementations
of a generally applicable branch-and-bound algorithm have been developed to
investigate this trade-off. Experiments have been performed with the cargo
loading problem. Two functional equivalents of imperative branch-and-bound
approaches are outperformed by a dedicated parallel functional
branch-and-bound implementation. A new speculative parallel annotation is
proposed to tackle the trade-off between speculative transformation loss and
average processor utilization on system level.},
keywords = {Branch-and-Bound, Speculative Parallelism},
}

@inproceedings{Glau92,
author = {Glauert, John},
title = {{Parallel Implementation of Functional Languages Using Small
Processes}}, 
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {We report work in progress on the implementation of languages that
integrate concurrent and functional programming styles. A translation scheme
is presented for mapping such languages into process networks using a simple
notation based on communication between named processes. The translation of
functional code is sequential, but parallelism arises from the use of
concurrency constructs in the source language. Early implementation
experiments have used graph rewriting techniques, although work on a more
direct implementation is in progress.},
keywords = {Process Networks, Para-Functional Programming, Parallel Graph
Reduction}, 
}

@inproceedings{Gron92,
author = {van Groningen, John H. G.},
title = {{Some Implementation Aspects of Concurrent Clean on Distributed
Memory Architectures}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {We have implemented a code generator and a runtime system that can
be used to simulate parallel execution of Concurrent Clean programs on a
single Macintosh computer. This code generator generates machine code and is
an extension of the (sequential) ABC code generator for the MC680x0 and SPARC
processors. This implementation is discussed briefly. The largest part of the
paper describes two aspects of the implementation in detail. The first one is
how to let a process wait until a node is overwritten with its head normal
form. Several possible solutions are described. The other one is a description
of an efficient graph copying algorithm. This graph copying algorithm can be
used to send graphs to remote processors using a more compact representation.
Finally, some results are presented.},
keywords = {Parallel Graph Reduction},
}

@inproceedings{HGW91,
author = {Hartel, Pieter and Glaser, Hugh and Wild, John},
title = {{On the Benefits of Different Analyses in the Compilation of a Lazy
Functional Language}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {123--145},
abstract = {Implementations of lazy functional languages have not as yet
approached the speed of execution provided by imperative, or even non-lazy
functional languages. It has been suggested that the answer to this problem
lies with powerful analyses, based on abstract interpretation, such as
strictness analyses that can deal with arbitrary data structures and
higher-order functions. In this paper, we present a break down of the benefits
of a number of analyses, each performed at increasing levels of
sophistication, and analyzed for a set of medium-sized functional programs. We
conclude that the benefits of important analyses are limited.},
keywords = {Static Analysis},
}

@techreport{Hill93,
author = {Hill, Jonathan M. D.},
title = {{Data Parallel Haskell: Mixing Old and New Glue}},
institution = {Queen Mary and Westfield College},
address = {University of London, UK},
year = {1993},
abstract = {John Hughes argues that the ability to decompose a problem into
parts, depends upon the ability to `glue' those parts together. A functional
programming language such ass Haskell provides the user with two powerful
kinds of glue: higher order functions and lazy evaluation. Although many data
parallel extensions to functional languages exist --- Connection Machine Lisp,
Paralation Lisp, Parallel EuLisp, and Tuple, they all lack the special glue of
a lazy language. This paper describes two data parallel extensions to the lazy
functional language Haskell: {\sc pod}s and {\sc pod} comprehensions. The
enable parallel algorithms to be expressed by decomposing a problem into
smaller sequential parts, which are then glued together in parallel. We
contrast an implementation of a text searching algorithm and an LL(1) parer,
and show that the parallelism within both can be expressed in terms of the
same higher order parallel {\tt map} and {\tt scan} functions. In the first
example, we decompose the problem into a sub-string algorithm, and a parallel
line joining algorithm. We glue these two subproblems together by using lazy
evaluation and {\sc pod} comprehensions --- the old and new glue seem to work
well.}, 
keywords = {Data Parallelism},
}

@inproceedings{HKL91,
author = {Huang, Shell-Ying and Kelly, Paul and Liu, Junxian},
title = {{Pilot Implementation of Abstract, Declarative Process Placement}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {197--214},
abstract = {To get high performance on a distributed-memory multicomputer some
explicit control is needed, at present and for the foreseeable future. This
paper describes work aimed at harnessing the power of the functional notation
in exercising such control. We have developed a declarative annotation scheme
which allows explicit control over process placement and communications. The
language, called Caliban, is being implemented on a configurable,
loosely-coupled commercial multicomputer and a pilot version is described
here.},
keywords = {Para-Functional Programming, Annotations},
}

@inproceedings{HoBu92,
author = {Howe, Denis B. and Burn, Geoffrey, L.},
title = {{Experiments with Strict STG Code}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {Lazy evaluation has many desirable features for the functional
programmer but imposes certain overheads on the execution mechanism. Much work
has gone into making lazy evaluation efficient. One of the most promising
projects in this area is GHC, Glasgow University's Haskell compiler based on
the Spineless Tagless G-Machine. Strictness analysis tells us where we can
turn lazy evaluation into strict evaluation without changing the results of
the program. Much theoretical work has gone into making strictness analysis
more powerful and efficient enough for practical use. Less work has focussed
on using the results of strictness analysis to generate better code and it is
this that we consider here. We have taken the results of one kind of
strictness analysis, known as evaluation transformers, and used them to
transform the lazy code produced by GHC to make it more strict. We describe
the execution of lazy and strict code by the STG Machine and explain why the
strict code is not always faster. Finally, we suggest alternative code
generation strategies and program transformations which might improve the
strict code.}, 
keywords = {Strictness Analysis, Evaluation Transformers, Code Generation},
}

@inproceedings{HoLo92,
author = {Hogen, Guido and Loogen, Rita},
title = {{PASTEL --- A Parallel Stack-based Implementation of Eager Functional
Programs with Lazy Data Structures}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {In this paper we present an alternative realization of the runtime
structures which tends to support parallel and sequential evaluations as well.
Instead of representing the cactus stack as a linked tree of function blocks,
we allocate function blocks, within each processor element of the parallel
system, on a single {\em meshed stack\/}, which is organized as a contiguous
array, but shared by several processes. The graph is only used for the
representation of parallel processes, structured data, closures and
suspensions. Evaluation is controlled by the stack. During the sequential
evaluation of processes the meshed stack is used as in conventional
implementations. Function blocks aer allocated on top of it, when function
calls are executed, and removed, when function calls terminate.},
keywords = {Parallel Graph Reduction, Stacks},
}

@techreport{HoLo92a,
author = {Hogen, Guido and Loogen, Rita},
title = {{A New Stack Technique for the Management of Runtime Structures in
Distributed Implementation}},
institution = {Lehrstuhl f{\"u}r Informatik II},
address = {RWTH Aachen, Germany},
year = {1992},
abstract = {A new technique for the management of runtime structures in
distributed implementations is presented. Instead of storing the runtime
stacks of parallel processes as linked lists of function blocks in a heap
structure, the local stacks of several parallel processes, which are executed
on the same processor element, are stored in an interleaved manner on a single
physical stack, called the {\em meshed stack\/}. The technique originated from
the stack organization in the well known Warren Abstract Machine (WAM) for
logic languages, where the runtime information of several alternative
computation branches is stored on a single stack. In principle, the meshed
stack technique is independent of the implemented language. We will explain it
for the parallel implementation of functional language.s The new technique
supports sequential and parallel evaluations equally well.},
keywords = {Parallel Graph Reduction, Stacks},
}

@techreport{HuAn88,
author = {Hudak, Paul and Anderson, Steve},
title = {{Haskell Solutions to the Language Session Problems at the 1988
Salishan High-Speed Computing Conference}},
type = {Research Report},
number = {YALEU/DCS/RR-627},
month = {May},
year = {1988},
institution = {Department of Computer Science},
address = {Yale University},
abstract = {This paper is intended to give the reader some familiarity with
Haskell by giving solutions to the four language session problems presented
at the 19888 Salishan Conference on High-Speed Computing.},
keywords = {Functional Programming},
}

@article{HuHa91,
author = {Hunt, Sebastian and Hankin, Chris},
title = {{Fixed Points and Frontiers: A New Perspective}},
journal = {Journal of Functional Programming},
volume = {1},
number = {1},
pages = {91--120},
month = {January},
year = {1991},
abstract = {Abstract interpretation is the collective name for a family of
semantics-based techniques for compile-time analysis of programs. One of the
most costly operations in automating such analyses is the computation of
fixed points. The frontiers algorithm is an elegant method, invented by Chris
Clack and Simon Peyton Jones, which addresses this issue. In this article we
present a new approach to the frontiers algorithm based on the insight that
frontiers represent upper and lower subsets of a function's argument domain.
This insight leads to a new formulation of the frontiers algorithm for
higher-order functions, which is considerably more concise than previous
versions. We gon on to argue that for many functions, especially in the
higher-order case, finding fixed points is an intractable problem unless the
sizes of the abstract domains are reduced. We show how the semantic machinery
of abstract interpretation allows us to place upper and lower bounds on the
values of fixed points in large lattices by working within smaller ones.},
keywords = {Abstract Interpretation, Fixed Points, Frontiers},
}

@inproceedings{Hunt89,
author = {Hunt, Sebastian},
title = {{Frontiers and Open Sets in Abstract Interpretation}},
booktitle = {FPCA '89 Functional Programming Languages and
Computer Architecture},
address = {London, UK, September 11--13},
year = {1989},
pages = {1--11},
publisher = {ACM Press, New York},
abstract = {Abstract interpretation is the name applied to a range of
techniques which use non-standard semantics to identify properties of
computer programs. The central task in applying these techniques is to find
fixed points of recursive definitions. To fulfill this task efficiently, a
compact representation of function values is needed. Clack and Peyton Jones
showed how sets of incomparable points from a function's argument domain,
known as {\em frontiers\/}, could meet this need for function spaces of the
form $[2^n \rightarrow 2]$. They also developed the {\em frontiers
algorithm\/}, a method of establishing the frontier representation of a
function. Martin and Hankin extended the method to cope with higher-order
functions over more general finite lattices. In this paper we present a new
approach to the frontiers algorithm based on the insight that frontiers
represent open and closed subsets of a function's argument domain. This
insight leads to a new formulation of the frontiers algorithm for
higher-order functions over a rich family of finite lattices. This
formulation is considerably more concise than previous versions. We go on to
argue that for many functions, especially in the higher-order case, finding
fixed points is an intractable problem unless the sizes of the abstract
domains are reduced. We show how the semantic machinery of abstract
interpretation allows us to place upper and lower bounds on the values of
fixed points in large lattices by working within smaller ones.},
keywords = {Abstract Interpretation, Fixed Points, Frontiers},
}

@inproceedings{HwRu92,
author = {Hwang, Suntae and Rushall, David},
title = {{The $\nu$-STG machine: A Parallelized Spineless Tagless Graph
Reduction Machine in a Distributed Memory Architecture (Draft Version)}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {This paper describes the $\nu$-STG machine a parallelized
Spineless Tagless Graph Reduction (STG) machine in a distributed memory
architecture. In the $\nu$-STG machine, a stack for each task is distributed
by allocating a {\em context frame\/} for each {\em tail-call sequence\/} on
the {\em heap\/}. Two sparking mechanisms, {\em BSpark\/} and {\em ISpark\/},
are supported to introduce parallelism, which are similar to those in the HDG
machine. The {\em BSpark\/} cannot be ignored and should be synchronized with
the parent task to prevent it being blocked and resumed too frequently, while
{\em ISpark\/} may be ignored. Even though a variable is sparked by {\em
BSpark\/} or {\em ISpark\/}, it may be evaluated {\em in-line\/} by the parent
task. Creating a new task to evaluate a {\em boxed\/} variable according to
the spark annotation is delayed until it is really needed. A message passing
mechanism is used to distribute jobs and synchronize them in a machine. A {\em
lazy task creation mechanism\/} is supported to exploit paallelism in {\em
unboxed\/} arithmetic expression by {\em boxing up\/} on demand. A simple
printing mechanism is added to print outputs clearly.},
keywords = {Parallel Graph Reduction, G-Machine},
}

@inproceedings{Jens91,
author = {Jensen, Thomas P.},
title = {{Strictness Analysis in Logical Form}},
booktitle = {{Functional Programming Languages and Computer Architectures}},
address = {Harvard, Massachusetts, USA},
year = {1991},
series = {Lecture Notes in Computer Science},
volume = {523},
publisher = {Springer, Berlin},
editor = {Hughes, John},
pages = {352--366},
abstract = {This paper presents a framework for comparing two strictness
analysis techniques: Abstract interpretation and non-standard type inference.
The comparison is based on the representation of a lattice by its ideals. A
formal system for deducing inclusions between ideals of a lattice is presented
and proved sound and complete. Viewing the ideals as {\em strictness
properties\/} we use the formal system to define a program logic for deducing
strictness properties of expressions in a typed lambda calculus. This {\em
strictness logic\/} is shown to be sound and complete with respect to the
abstract interpretation, which establishes the main result that strictness
analysis by type-inference and by abstract interpretation are equally powerful
techniques.},
keywords = {Strictness Analysis, Abstract Interpretation, Type Inference},
}

@inproceedings{Jour91,
author = {Jouret, Guido K.},
title = {{A Foundation for Declarative Data Parallelism}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {311--330},
abstract = {Data-parallelism is inherent in the independent activity of
individual data elements where this activity consists of either communication
or computation. Single-Instruction, Multiple Datastream (SIMD) architectures
have been designed to exploit this form of parallelism but despite the
impressive performance of such architectures, language development for
data-parallel architectures is an under-developed area. The main difficulties
in language-design for the exploitation of data-parallelism is the traditional
reliance on non-monolithic language constructs encouraged by von-Neumann
architectures; the uniform and globally-accessible machine state and the
resulting absence of any notions of locality and communication. Consideration
of these issues and their relevance on language design leads to the
development of a declarative language for data-parallel programming based on
the functional style.},
keywords = {Data-Parallelism, SIMD, Para-Functional Programming},
}

@inproceedings{Kess91,
author = {Kesseler, Marco},
title = {{Implementing the PABC Machine on Transputers}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {409--421},
abstract = {In this paper an efficient implementation of the PABC machine on
transputers is presented. The PABC machine is an abstract machine that is used
to implement Concurrent Clean, an experimental lazy functional language. For a
single transputer this means an `unbounded' graph store has to be
implemented and a set of reducers, each having a couple of stacks. In a
network of transputers each reducer should be able to send graphs to other
transputers, start new reducers on these remote graphs and to get reduced
graphs back. At the moment the PABC machine runs on a single transputer only.
Efficient and reliable implementation on a network of transputers has
started.},
keywords = {Term Graph Reduction, Abstract Machine},
}

@inproceedings{KiWa92,
author = {King, David J. and Wadler, Philip},
title = {{Combining Monads}},
booktitle = {Functional Programming, Glasgow},
address = {Ayr, Scotland},
editor = {Launchbury, J. and Sansom, P. M.},
publisher = {Springer, Berlin},
series = {Workshops in Computing},
year = {1992},
abstract = {Monads provide a way of structuring functional programs. Most real
applications require a combination of primitive monads. Here we describe how
some monads may be combined with others to yield a {\em combined monad\/}.},
keywords = {Monads, State},
}

@inproceedings{KuGl92,
author = {Kuchen, Herbert and Gladitz, Katia},
title = {{Implementing Bags on a Shared Memory MIMD-Machine}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {Multisets (also called bags) are an interesting data structure for
parallely implemented functional programming languages, since they do not
force an unneeded restriction of the data flow and allow to exploit as much
parallelism as possible. Most operations on multisets can be understood as
special cases of the so-called Gamma scheme. In the present paper, we
investigate efficient implementations of several instances of this Gamma
scheme on MIMD-machines with shared memory.},
keywords = {Multisets, Bags, Non-Determinism},
}

@inproceedings{Lang91,
author = {Langendoen, Koen},
title = {{WYBERT: Parallel Reduction on Shared Memory}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {219--223},
abstract = {The Dutch Parallel Reduction Machine project has shown that it is
possible to implement parallel reduction based on a coarse-grain model. Within
the framework of this project a prototype machine has been constructed: the
Amsterdam Parallel Experimental Reduction Machine (APERM). Experimental
results indicate that the architecture can be improved by connecting several
processors to one graph memory. The design of APERM2 is based on this idea and
consists of an extensible number of processor clusters (shared-memory
multiprocessors) interconnected by a communication network. The distribution
of parallel tasks over the clusters will be done by the same coarse-grain
reduction model as adopted in APERM (divide\&conquer), while parallel
reduction inside a cluster is the research topic of WYBERT. The objective of
WYBERT is to study the opportunities to exploit shared memory, and its
associated caches, for parallel graph reduction of divide\&conquer
applications.}, 
keywords = {Graph Reduction, Shared Memory, Runtime System},
}

@article{Meta88,
author = {Le Metayer, Daniel},
title = {{ACE: An Automatic Complexity Evaluator.}},
journal = toplas,
volume = {10},
number = {2},
pages = {248--266},
month = {April},
year = {1988},
abstract = {There has been a great deal of research done on the evaluation of
the complexity of particular algorithms; little effort, however, has been
devoted to the mechanization of this evaluation. The ACE (Automatic Complexity
Evaluator) system is able to analyze reasonably large programs, like sorting
programs, in a fully mechanically way. A time-complexity function is derived
from the initial functional program. This function is transformed into its
nonrecursive equivalent according to McCarthy's recursion induction
principle, using a predefined library of recursive definitions. As the
complexity is not a decidable property, this transformation will not be
possible in all cases. The richer the predefined library is, the more likely
the system is to succeed. The operations performed by ACE are described and
the use ofthe system is illustrated with the analysis of a sorting algorithm.
Related works and further improvements are discussed in the conclusion.},
keywords = {Granularity Analysis},
}

@inproceedings{Lest89,
author = {Lester, David R.},
title = {{Stacklessness: Compiling Recursion for a Distributed Architecture}},
booktitle = {FPCA '89, Functional Programming Languages and
Computer Architecture},
address = {London, UK, September 11--13},
year = {1989},
pages = {116--128},
publisher = {ACM Press, New York},
abstract = {Compiling general programming languages to run efficiently on a
distributed architecture is hard. One of the problems that confronts the
potential implementor, is how to store the stack. This is normally used in two
ways: firstly it is used to hold the arguments and temporary variables of a
function or procedure, and secondly it is used to hold a previous state on
entry to a new function or procedure. One way to represent such a stack is as
a contiguous array. An alternative is to hold the arguments and temporary
variables in a stack frame, with a back-link to the previous stack frame. In a
parallel machine each task will require a separate stack. We implement each
of these stacks as a linked list of stack frames, each of which resides in a
garbage collected heap. Using the stacklessness analysis, a node which
requires evaluation can be created with a stack frame large enough to evaluate
all tail recursive calls that may occur in the reduction sequence. It is
therefore unnecessary to provide an extension mechanism which enlarges stack
frames. The stacklessness analysis involves giving a non-standard semantics
to a typed functional language. The technique may be applied to any resource
with a stack-like pattern of consumption. The paper includes a proof that an
approximation to the fixpoint of a combinator in the abstract interpretation
is computable. The \LaTeX\ source of the paper is also an executable Orwell
program to find this approximation. The problem is particularly difficult when
higher order functions are used. The stacklessness analysis is able to deal
with this case. The analysis is used in the HDG-machine, a compiler and
abstract machine for a Transputer based implementation of distributed graph
reduction.},
keywords = {Parallel Graph Reduction, Compilation, Tail-Recursion},
}

@article{Maas92,
author = {Maa{\ss}en, Andreas},
title = {{Parallel Programming with Data Structures and Higher Order
Functions}}, 
journal = {Science of Computer Programming},
volume = {18},
year = {1992},
pages = {1--38},
abstract = {Linear lists, which are the standard data structure in functional
programming languages, have proved to be useful for many applications. Yet for
several reasons it seems that there is a need for additional applicative data
structures: Firstly, functional programming languages are well suited for
parallel execution, because programs written in these languages cannot have
side effects, and expressions can be evaluated independently. But parallel
execution of functional programs is often hampered by the linear structure of
lists, which supports only one element at a time algorithms. Secondly, lists
lack the expressive power that we need in general purpose languages. For
example, there is a need for array-like data structures. Thirdly, many
functions on lists have to be defined recursively. No matter how efficiently
recursion may be implemented, it will always fall short of the efficiency that
can be obtained with imperative implementations. We propose a choice of data
structures and corresponding higher order functions that enable programmers to
write concise and (almost) recursion-free functional programs which have a
higher degree of potential parallelism. Possible implementation techniques are
discussed, and the major complexity result is that, under parallel evaluation,
many functions on our data structures need only logarithmic time, whereas
using lists results in linear time complexity.},
keywords = {Distributed Data Structures},
}

@inproceedings{Mart92,
author = {Martins, Wellington Santos},
title = {{Parallel Implementations of Functional Languages}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {With interest in parallel implementations of functional languages
as a whole, we first discuss some issues related to parallel execution of
functional programs, and then review some key proposals considering three
different approaches, namely Parallel Lisp Systems, Dataflow Systems and
Reduction Systems},
keywords = {Parallel Lisp, Dataflow, Parallel Graph Reduction},
}

@inproceedings{Mint92,
author = {Mintchev, Sava},
title = {{Using Strictness Information in the STG-Machine}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {The paper presents an attempt at exploiting strictness information
for parallel evaluation of functional programs. A simple evaluation model,
which uses strictness in a limited way, is suggested. It has been applied in a
parallel version of the STG-machine; special attention has been paid to
avoiding the creation of useless tasks. Some results from the simulation of
the parallel STG-machine are provided, throwing light on the amount and
granularity of parallelism.},
keywords = {Strictness Analysis, Parallel Graph Reduction},
}

@inproceedings{MiVr91,
author = {Milikowski, R. and Vree, W. G.},
title = {{The G-Line --- A Distributed Processor for Graph Reduction}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {265--282},
abstract = {The G-line is a horizontally coded graph reducer. In lazy
functional languages much time is used to manage the graph. A method has been
developed to perform construction of a subgraph in a single parallel access to
the graph memory. With a simulation, using infinite hardware, we have shown
that the architecture performs well. We argue that a realistic implementation
is possible.},
keywords = {Parallel Graph Reduction, Abstract Machine},
}

@inproceedings{NPS91,
author = {N{\"o}cker, Eric and Plasmeijer, Rinus and Smetsers, Sjaak},
title = {{The Parallel ABC Machine}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {351--381},
abstract = {In this paper we will introduce the parallel ABC machine. The
parallel ABC machine is an abstract parallel machine used for creating and
describing implementations of Concurrent Clean, a parallel functional language
based on the principles of term graph rewriting. Basically, the parallel ABC
machine represents a loosely coupled processor architecture, but abstracts
from specific implementation issues. We will show how such a machine can be
defined, and how code for it can be generated. As for the sequential ABC
machine, the description will be given in a functional framework.},
keywords = {Term Graph Rewriting, Abstract Machine},
}

@inproceedings{Osth91,
author = {Ostheimer, Gerrrald},
title = {{Parallel Functional Computation on STAR:DUST}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {393--407},
abstract = {STAR:DUST ('St. Andrews RISC: Dataflow Using Sequential Threads')
is a processor design optimized for efficient execution of sequential threads
while supporting plug-and-play construction of large multiprocessor systems.
Besides satisfying the major RISC criteria (small instruction set, simple
instruction format, load/store principle pipelining), STAR:DUST employs a
dataflow approach to communication and parallelism. We will describe the
architecture and propose a runtime model for parallel functional computation
based on STAR:DUST's dataflow primitives.},
keywords = {Dataflow, Sequential Threads, Hybrid Architecture},
}

@inproceedings{Rose89,
author = {Rosendahl, Mads},
title = {{Automatic Complexity Analysis}},
booktitle = {FPCA '89 Functional Programming Languages and
Computer Architecture},
address = {London, UK, September 11--13},
year = {1989},
pages = {144-156},
publisher = {ACM Press, New York},
abstract = {One way to analyze programs is to derive expressions for their
computational behavior. A time bound function (or worst-case complexity)
gives an upper bound for the computation time as a function of the size of
the input. We describe a system to derive such time bounds automatically
using abstract interpretation. The semantics-based setting makes it possible
to prove the correctness of the time-bound function. The system can analyze
programs in a first-order subset of Lisp and we show how the system also can
be used to analyze programs in other languages.},
keywords = {Complexity Analysis, Abstract Interpretation},
}

@inproceedings{SaWa91,
author = {Sargeant, John and Watson, Ian},
title = {{Some Experiments in Controlling the Dynamic Behavior of Parallel
Functional Programs}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {103--122},
abstract = {In our quest for the ``Holy Grail'' of efficient implicit
parallelism, we have opted for a conventional architecture with physically
distributed but virtually shared memory (the EDS machine), and a Large Grain
Graph Rewriting computational model. On programming languages, we hedge our
bets, but the work described here uses a (strict) functional language. This
set of choices eliminates many of the problems and brings those which remain
into sharp focus. ``All'' we need to do is to achieve efficient dynamic load
balancing, and high granularity, and good data structure locality. We have a
compiler from Hope+ to the EDS machine (simulator), and this way used to
investigate the properties of a ``real'' parallel program, a relational
database implementation. This exercise showed the inadequacy of static
analysis alone for detecting ``good'' tasks to do in parallel. Attempts to
overcome this by programmer annotations had limited success. Furthermore,
experiments with a much simpler program showed alarming variations in
performance due to apparently innocuous differences in the basic low-level
mechanisms for spawning parallel tasks. We are investigating an approach
whereby the program is first compiled with monitoring code inserted, and run
(using smallish data) to produce statistics which are fed back into the
compiler which then produces optimized parallel code. This work is embryonic,
but the method seems to have considerable advantages over static analysis or
hand annotation, and also provides extra performance information for the
programmer.}, 
keywords = {Load Balancing, Static analysis, Annotations, Monitoring},
}

@inproceedings{Sche91,
author = {Schepers, J{\"o}rg},
title = {{Using Functional Languages for Process Specifications}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {89--102},
abstract = {The GRAPH-system, a tool for the specification, analysis and
execution of communicating functional processes, is presented. Processes are
specified in GRAPH at two separate specification levels: At the functional
level the input/output behavior of processes is specified by means of pgorgrams
of a functional language. At the process level these processes constitute the
atomic components of hierarchically structured process systems, wherein
process communication is realized via streams. Different process types are
available, supporting the design of dynamically expanding process structures
as well as of non-deterministic processes, such as semaphores. Process systems
are externally represented as graphs which are variants of Petri-nets,
composed from processes and streams. The Petri-net semantics allows to verify
statically certain invariance properties which guarantee a stable overall
system behavior. Structural deadlocks can also be detected by a static
analysis. The GRAPH-system provides a syntax-oriented editor for the graphical
specification of process systems. Furthermore, it includes several control and
analysis facilities and supports the stepwise execution of processes under
interactive control.},
keywords = {Process networks, streams, Petri nets, deadlocks},
}

@inproceedings{Sewa92,
author = {Seward, Julian},
title = {{Polymorphic Strictness Analysis using Frontiers (Draft Version)}},
booktitle = {4th International Workshop on the Parallel Implementation of
Functional Languages},
address = {Aachen, Germany, September 28--30},
year = {1992},
editor = {Kuchen, Herbert and Loogen, Rita},
abstract = {This paper shows how to implement sensible polymorphic strictness
analysis using the Frontiers algorithm. A central notion is to only ever
analyze each function once, at its simplest polymorphic isntance. Subsequent
non-base uses of functions are dealt with by generalizing their simplest
instance analyzes. This generalisation is done using an algorithm developed by
Baraki, based on embedding-closure pairs. Compared with an alternative
approach of expanding the program out into a collection of monomorphic
instances, this technique is hundreds of times faster for realistic programs.
There are some approximations involved, but these do not seem to have a
detrimental effect on the overall result. The overall effect of this
technology is to considerably expand the range of programs for which the
Frontiers algorithm gives useful results reasonably quickly.},
keywords = {Strictness Analysis, Abstract Interpretation, Fixed Points},
}

@inproceedings{SHD91,
author = {Sharp, David and Harrison, Peter and Darlington, John},
title = {{A Synthesis of a Dynamic Message-Passing Algorithm for Quicksort}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {53--74},
abstract = {We present a methodology for systematically synthesizing
algorithms for {\em message passing parallel machines\/} and illustrate the
technique by the formal derivation of a message passing sorting algorithm,
based on Quicksort, that is, we claim, the fastest parallel quicksort
algorithm having a complexity of $O(\log n * \log n)$ for a machine with $n$
processors sorting an $n$ element list. The algorithm is thus highly suitable
for massively parallel machines such as the Thinking Machines Connection
Machine.},
keywords = {Prrogram Transformation},
}

@inproceedings{Tick88,
author = {Tick, E.},
title = {{Compile-Time Granularity Analysis for Parallel Logic Programming
Languages}}, 
booktitle = {International Conference on Fifth Generation Computer Systems},
address = {Tokyo, Japan, November 28 -- December 2},
year = {1988},
pages = {994--1000},
publisher = {ICOT, Tokyo},
abstract = {The paper describes a simple compiler analysis method for
determining the ``weight'' of procedures in parallel logic programming
languages. Using Flat Guarded Horn Clauses (FGHC) as an example, the analysis
algorithm is described. Consideration of weights has been incorporated in the
scheduler of a real-parallel FGHC emulator running on the Sequent Symmetry
multiprocessor. Alternative demand-distribution methods are discussed,
including {\em oldest-first\/} and {\em heaviest-first\/} distributions.
Performance measurements, collected from a group of non-trivial benchmarks on
eight processors, show that the new schemes do {\em not\/} perform
significantly faster than conventional distribution methods. This result is
attributed to a combination of factors overshadowing the benefits of the new
method: high system overheads, the low cost of spawning a goal on a shared
memory multiprocessor, and the increase in synchronization caused by the new
methods. Directions of further research are discussed, indicating where
further speedup can be attained.},
keywords = {Granularity Analysis},
}

@inproceedings{Wadl92,
author = {Wadler, Philip},
title = {{The Essence of Functional Programming}},
booktitle = {19th Annual Symposium on Principles of Programming Languages},
addresss = {Santa Fe, New Mexico},
month = {January},
year = {1992},
abstract = {This paper explores the use of monads to structure functional
programs. No prior knowledge of monads or even category theory is required.
Monads increase the ease with which programs may be modified. They can mimic
the effect of impure features such as exceptions, state, and continuations;
and also provide effects not easily achieved with such features. The types of
a program reflect which effects occur. The first section is an extended
example of the use of monads. A simple interpreter is modified to support
various extra features: error messages, state, output, and non-deterministic
choice. The second section describes the relation between monads and
continuation-passing style. The third section sketches how monads are used in
a compiler for Haskell that is written in Haskell.},
keywords = {Monads, State},
}

@inproceedings{Waug91,
author = {Waugh, Kevin G.},
title = {{Parallel Imperative Programs from Functional Prototypes}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {75--88},
abstract = {We have been considering the automatic translation of sequential
SML prototypes of vision algorithms into efficient parallel Occam programs. We
use function instrumentation of the prototype to obtain typical and
pathological execution details and static analysis allows us to consider
argument sizes and flow. From these results an abstract form of the functional
program is used to estimate partition size and hence granularity. The
transformations which apply to the functional prototype can also to be applied
to the abstract form of the program. In this way we can use the transformed
abstracted program to determine the effect of particular transformations on
execution of the prototype.},
keywords = {Program Transformation, Weight Analysis},
}

@inproceedings{Zimm91,
author = {Zimmer, Ralf},
title = {{Reflections, the Curch-Rosser Property, and ``Real'' Applications in
Functional Systems}},
booktitle = {3rd International Workshop on the Parallel Implementation of
Functional Languages},
address = {Southampton, UK, June 5--7},
year = {1991},
editor = {Glaser, Hugh and Hartel, Pieter},
publisher = {Technical Report CSTR 91-07, University of Southampton},
pages = {331--349},
abstract = {Modern functional systems are often claimed to be well suited for
programming and using multiprocessor machines. Unfortunately, most of them do
not feature the Church-Rosser property. This paper is to show that for the
correct construction of parallel programs and for easy reasoning about their
correctness the Church-Rosser property is indispensable. A conservative
extension of functional systems using a reflection mechanism is proposed for
thereduction system $\pi$-RED. The resulting reduction system $\rho$-RED has
the Church-Rosser $\pi$-RED property. Necessary conditions for reflexive
extensions of functional systems are identified. With certain restrictions,
the reflection mechanism can also be employed for the integration of different
(even non-functional) subsystems, e.g.\ parsers, numerical packages, and
computer algebra systems. Functional programs which have imperative subsystems
included by the reflection mechanism can be executed in $\rho$-RED
non-sequentially without causing side-effects due to these subsystems.},
keywords = {Reflections, Church-Rosser},
}