Added parser combinator and lazy evaluation library.

cvs
Chris Double 2004-08-15 23:23:47 +00:00
parent cfdaa293c9
commit 0c3fa9d74c
6 changed files with 1128 additions and 0 deletions

View File

@ -0,0 +1,63 @@
! Copyright (C) 2004 Chris Double.
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions are met:
!
! 1. Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
!
! 2. Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
!
! THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
! FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
! DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
! OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
IN: lazy-examples
USE: lazy
USE: stack
USE: arithmetic
USE: lists
USE: combinators
USE: kernel
USE: logic
: lfrom ( n -- llist )
#! Return a lazy list of increasing numbers starting
#! from the initial value 'n'.
dup [ succ lfrom ] curry1 lcons ;
: lfrom-by ( n quot -- llist )
#! Return a lazy list of values starting from n, with
#! each successive value being the result of applying quot to
#! n.
dupd [ dup [ call ] dip lfrom-by ] curry2 lcons ;
: lnaturals 0 lfrom ;
: lpositves 1 lfrom ;
: levens 0 [ 2 + ] lfrom-by ;
: lodds 1 lfrom [ 2 mod 1 = ] lsubset ;
: lpowers-of-2 1 [ 2 * ] lfrom-by ;
: lones 1 [ ] lfrom-by ;
: lsquares lnaturals [ dup * ] lmap ;
: first-five-squares 5 lsquares ltake ;
: divisible-by? ( a b -- bool )
#! Return true if a is divisible by b
mod 0 = ;
: sieve ( llist - llist )
#! Given a lazy list of numbers, use the sieve of eratosthenes
#! algorithm to return a lazy list of primes.
luncons over [ divisible-by? not ] curry1 lsubset [ sieve ] curry1 lcons ;
: lprimes 2 lfrom sieve ;
: first-ten-primes 10 lprimes ltake ;

View File

@ -0,0 +1,172 @@
! Copyright (C) 2004 Chris Double.
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions are met:
!
! 1. Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
!
! 2. Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
!
! THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
! FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
! DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
! OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
IN: lazy
USE: lists
USE: stack
USE: arithmetic
USE: stdio
USE: prettyprint
USE: kernel
USE: combinators
USE: logic
: curry1 ( n quot -- quot )
#! Return a quotation that when called will initially
#! have 'n' pushed on the stack.
cons ;
: curry2 ( n1 n2 quot -- quot )
#! Return a quotation that when called will initially
#! have 'n1' and 'n2' pushed on the stack.
cons cons ;
: delay ( value -- promise )
#! Return a promise that when 'forced' returns the original value.
unit ;
: force ( promise -- value )
#! Return the value associated with the promise.
call ;
: lcons ( car promise -- lcons )
#! Return a lazy pair, where the cdr is a promise and must
#! be forced to return the value.
cons ;
: lunit ( a -- llist )
#! Construct a lazy list of one element.
[ ] delay lcons ;
: lcar ( lcons -- car )
#! Return the car of a lazy pair.
car ;
: lcdr ( lcons -- cdr )
#! Return the cdr of a lazy pair, implicitly forcing it.
cdr force ;
: lnth ( n llist -- value )
#! Return the nth item in a lazy list
swap [ lcdr ] times lcar ;
: luncons ( lcons -- car cdr )
#! Return the car and forced cdr of the lazy cons.
uncons force ;
: (ltake) ( n llist accum -- list )
>r >r pred dup 0 < [
drop r> drop r> nreverse
] [
r> luncons swap r> cons (ltake)
] ifte ;
: ltake ( n llist -- list )
#! Return a list containing the first n items from
#! the lazy list.
[ ] (ltake) ;
: lmap ( llist quot -- llist )
#! Return a lazy list containing the collected result of calling
#! quot on the original lazy list.
over [ ] = [
2drop [ ]
] [
[ luncons ] dip
dup swapd
[ lmap ] curry2
[ call ] dip
lcons
] ifte ;
: lsubset ( llist pred -- llist )
#! Return a lazy list containing only the items from the original
#! lazy list for which the predicate returns a value other than f.
over [ ] = [
2drop [ ]
] [
[ luncons ] dip
dup swapd
[ lsubset ] curry2
-rot dupd call [
swap lcons
] [
drop call
] ifte
] ifte ;
: lappend* ;
: (lappend*) ;
: lappend-list* ;
: lappend-item* ( llists list item -- llist )
-rot [ lappend-list* ] curry2 lcons ;
: lappend-list* ( llists list -- llist )
dup [
#! non-empty list
luncons swap lappend-item*
] [
#! empty list
drop lappend*
] ifte ;
: (lappend*) ( llists -- llist )
dup lcar [ ( llists )
#! Yes, the first item in the list is a valid llist
luncons swap lappend-list*
] [
#! The first item in the list is an empty list.
#! Resume passing the next list.
lcdr lappend*
] ifte ;
: lappend* ( llists -- llist )
#! Given a lazy list of lazy lists, return a lazy list that
#! works through all of the sub-lists in sequence.
dup [
(lappend*)
] [
#! Leave empty list on the stack
] ifte ;
: list>llist ( list -- llist )
#! Convert a list to a lazy list.
dup [
uncons [ list>llist ] curry1 lcons
] when ;
: lappend ( llist1 llist2 -- llist )
#! Concatenate two lazy lists such that they appear to be one big lazy list.
2list list>llist lappend* ;
: leach ( llist quot -- )
#! Call the quotation on each item in the lazy list.
#! Warning: If the list is infinite then this will
#! never return.
over [
>r luncons r> tuck >r >r call r> r> leach
] [
2drop
] ifte ;

View File

@ -0,0 +1,280 @@
<html>
<head>
<title>Lazy Evaluation</title>
<link rel="stylesheet" type="text/css" href="style.css">
</head>
<body>
<h1>Lazy Evaluation</h1>
<p>The 'lazy' vocabulary adds lazy lists to Factor. This provides the
ability to describe infinite structures, and to delay execution of
expressions until they are actually used.</p>
<p>Lazy lists, like normal lists, are composed of a head and tail. In
a lazy list the tail is something called a 'promise'. To convert a
'promise' into its actual value a word called 'force' is used. To
convert a value into a 'promise' the word to use is 'delay'.</p>
<p>Many of the lazy list words are named similar to the standard list
words but with an 'l' suffixed to it. Here are the commonly used
words and their equivalent list operation:</p>
<table border="1">
<tr><th>Lazy List</th><th>Normal List</th></tr>
<tr><td><a href="#lcons">lcons</a></td><td>cons</td></tr>
<tr><td><a href="#lunit">lunit</a></td><td>unit</td></tr>
<tr><td><a href="#lcar">lcar</a></td><td>car</td></tr>
<tr><td><a href="#lcdr">lcdr</a></td><td>cdr</td></tr>
<tr><td><a href="#lnth">lnth</a></td><td>nth</td></tr>
<tr><td><a href="#luncons">luncons</a></td><td>uncons</td></tr>
<tr><td><a href="#lmap">lmap</a></td><td>map</td></tr>
<tr><td><a href="#lsubset">lsubset</a></td><td>subset</td></tr>
<tr><td><a href="#leach">leach</a></td><td>each</td></tr>
<tr><td><a href="#lappend">lappend</a></td><td>append</td></tr>
</table>
<p>A few additional words specific to lazy lists are:</p>
<table border="1">
<tr><td><a href="#ltake">ltake</a></td><td>Returns a normal list containing a specified
number of items from the lazy list.</td></tr>
<tr><td><a href="#lappendstar">lappend*</a></td><td>Given a lazy list of lazy lists,
concatenate them together in a lazy manner, returning a single lazy
list.</td></tr>
<tr><td><a href="#list>llist">list>llist</a></td><td>Given a normal list, return a lazy list
that contains the same elements as the normal list.</td></tr>
</table>
<p>A couple of helper functions are also provided by the lazy
vocabulary.</p>
<table border="1">
<tr><td>curry1</td><td>Given a value and a quotation, returns a new
quotation that when called will have the value on the stack.</td></tr>
<tr><td>curry2</td><td>Given two values and a quotation, returns a new
quotation that when called will have the two values on the
stack.</td></tr>
</table>
<h2>Reference</h2>
<!-- lcons description -->
<a name="lcons">
<h3>lcons ( value promise -- lcons )</h3>
<p>Provides the same effect as 'cons' does for normal lists. It
creates a cons cell where the first element is the value given and the
second element is a promise.</p>
<a name="promise">
<p>A promise is either a value that has had 'force' called on it, or
a quotation that when 'call' is applied to it, returns the actual
value.</p>
<pre class="code">
( 1 ) 5 6 delay <a href="#lcons">lcons</a> dup .
=> [ 5 6 ]
( 2 ) dup <a href="#lcar">lcar</a> .
=> 5
( 3 ) dup <a href="#lcdr">lcdr</a> .
=> 6
</pre>
<!-- lunit description -->
<a name="lunit">
<h3>lunit ( value -- llist )</h3>
<p>Provides the same effect as 'unit' does for normal lists. It
creates a lazy list where the first element is the value given.</p>
<pre class="code">
( 1 ) 42 <a href="#lunit">lunit</a> dup .
=> [ 42 f ]
( 2 ) dup <a href="#lcar">lcar</a> .
=> 42
( 3 ) dup <a href="#lcdr">lcdr</a> .
=> f
( 4 ) [ . ] <a href="#leach">leach</a>
=> 42
</pre>
<!-- lcar description -->
<a name="lcar">
<h3>lcar ( lcons -- value )</h3>
<p>Provides the same effect as 'car' does for normal lists. It
returns the first element in a lazy cons cell.</p>
<pre class="code">
( 1 ) 42 <a href="#lunit">lunit</a> dup .
=> [ 42 f ]
( 2 ) <a href="#lcar">lcar</a> .
=> 42
</pre>
<!-- lcdr description -->
<a name="lcdr">
<h3>lcdr ( lcons -- value )</h3>
<p>Provides the same effect as 'cdr' does for normal lists. It
returns the second element in a lazy cons cell and forces it. This
causes that element to be evaluated immediately.</p>
<pre class="code">
( 1 ) 5 [ 5 6 + ] <a href="#lcons">lcons</a> dup .
=> [ 5 5 6 + ]
( 2 ) <a href="#lcdr">lcdr</a> .
=> 11
</pre>
<pre class="code">
( 1 ) 5 lfrom dup .
=> [ 5 5 succ lfrom ]
( 2 ) <a href="#lcdr">lcdr</a> dup <a href="#lcar">lcar</a> .
=> 6
( 3 ) <a href="#lcdr">lcdr</a> dup <a href="#lcar">lcar</a> .
=> 7
( 4 ) <a href="#lcdr">lcdr</a> dup <a href="#lcar">lcar</a> .
=> 8
</pre>
<!-- lnth description -->
<a name="lnth">
<h3>lnth ( n llist -- value )</h3>
<p>Provides the same effect as 'nth' does for normal lists. It
returns the nth value in the lazy list. It causes all the values up to
'n' to be evaluated.</p>
<pre class="code">
( 1 ) 1 lfrom
=> [ 1 1 succ lfrom ]
( 2 ) 5 swap <a href="#lnth">lnth</a> .
=> 6
</pre>
<!-- luncons description -->
<a name="luncons">
<h3>luncons ( lcons -- car cdr )</h3>
<p>Provides the same effect as 'uncons' does for normal lists. It
returns the car and cdr of the lazy list. Note that cdr is forced
resulting in it being evaluated.</p>
<pre class="code">
( 1 ) 5 [ 6 ] <a href="#lcons">lcons</a> dup .
=> [ 5 6 ]
( 2 ) <a href="#luncons">luncons</a> .s
=> { 5 6 }
</pre>
<!-- lmap description -->
<a name="lmap">
<h3>lmap ( llist quot -- llist )</h3>
<p>Provides the same effect as 'map' does for normal lists. It
lazily maps over a lazy list applying the quotation to each element.
A new lazy list is returned which contains the results of the
quotation.</p>
<p>When initially called <a href="#lmap">lmap</a> will only call quot on the first element
of the list. It then constructs a lazy list that performs the
next '<a href="#lmap">lmap</a>' operation on the next element when it is evaluated. This
allows mapping over infinite lists.</p>
<pre class="code">
( 1 ) 1 lfrom
=> < infinite list of incrementing numbers >
( 2 ) [ 2 * ] <a href="#lmap">lmap</a>
=> < infinite list of numbers incrementing by 2 >
( 3 ) 5 swap <a href="#ltake">ltake</a> .
=> [ 2 4 6 8 10 ]
</pre>
<!-- lsubset description -->
<a name="lsubset">
<h3>lsubset ( llist pred -- llist )</h3>
<p>Provides the same effect as 'subset' does for normal lists. It
lazily iterates over a lazy list applying the predicate quotation to each
element. If that quotation returns true, the element will be included
in the resulting lazy list. If it is false, the element will be skipped.
A new lazy list is returned which contains all elements where the
predicate returned true.</p>
<p>When initially called <a href="#lsubset">lsubset</a> will only call
the predicate quotation on the first element
of the list. It then constructs a lazy list that performs the
next '<a href="#lsubset">lsubset</a>' operation on the next element when it is evaluated. This
allows subsetting over infinite lists.</p>
<pre class="code">
( 1 ) 1 lfrom
=> < infinite list of incrementing numbers >
( 2 ) [ prime? ] <a href="#lsubset">lsubset</a>
=> < infinite list of prime numbers >
( 3 ) 5 swap <a href="#ltake">ltake</a> .
=> [ 2 3 5 7 11 ]
</pre>
<!-- leach description -->
<a name="leach">
<h3>leach ( llist quot -- )</h3>
<p>Provides the same effect as 'each' does for normal lists. It
lazily iterates over a lazy list applying the quotation to each
element. If this operation is applied to an infinite list it will
never return unless the quotation escapes out by calling a continuation.</p>
<pre class="code">
( 1 ) 1 lfrom
=> < infinite list of incrementing numbers >
( 2 ) [ 2 mod 1 = ] <a href="#lsubset">lsubset</a>
=> < infinite list of odd numbers >
( 3 ) [ . ] <a href="#leach">leach</a>
=> 1
3
5
7
... for ever ...
</pre>
<!-- ltake description -->
<a name="ltake">
<h3>ltake ( n llist -- list )</h3>
<p>Iterates over the lazy list 'n' times, appending each element to a
normal list. The normal list is returned. This provides a convenient
way of getting elements out of a lazy list.</p>
<pre class="code">
( 1 ) : ones 1 [ ones ] <a href="#lcons">lcons</a> ;
( 2 ) 5 ones <a href="#ltake">ltake</a>
=> [ 1 1 1 1 1 ]
</pre>
<!-- lappend description -->
<a name="lappend">
<h3>lappend ( llist1 llist2 -- llist )</h3>
<p>Lazily appends two lists together. The actual appending is done
lazily on iteration rather than immediately so it works very fast no
matter how large the list.</p>
<pre class="code">
( 1 ) [ 1 2 3 ] <a href="#list>llist">list>llist</a> [ 4 5 6 ] <a href="#list>llist">list>llist</a> <a href="#lappend">lappend</a>
( 2 ) [ . ] <a href="#leach">leach</a>
=> 1
2
3
4
5
6
</pre>
<!-- lappend* description -->
<a name="lappendstar">
<h3>lappend* ( llists -- llist )</h3>
<p>Given a lazy list of lazy lists, concatenate them together in a
lazy fashion. The actual appending is done lazily on iteration rather
than immediately so it works very fast no matter how large the lists.</p>
<pre class="code">
( 1 ) [ 1 2 3 ] <a href="#list>llist">list>llist</a>
( 2 ) [ 4 5 6 ] <a href="#list>llist">list>llist</a>
( 3 ) [ 7 8 9 ] <a href="#list>llist">list>llist</a>
( 4 ) 3list <a href="#list>llist">list>llist</a> <a href="#lappendstar">lappend*</a>
( 5 ) [ . ] <a href="#leach">leach</a>
=> 1
2
3
4
5
6
7
8
9
</pre>
<!-- list>llist description -->
<a name="list>llist">
<h3>list>llist ( list -- llist )</h3>
<p>Converts a normal list into a lazy list. This is done lazily so the
initial list is not iterated through immediately.</p>
<pre class="code">
( 1 ) [ 1 2 3 ] <a href="#list>llist">list>llist</a>
( 2 ) [ . ] <a href="#leach">leach</a>
=> 1
2
3
</pre>
<p class="footer">
News and updates to this software can be obtained from the authors
weblog: <a href="http://radio.weblogs.com/0102385">Chris Double</a>.</p>
<p id="copyright">Copyright (c) 2004, Chris Double. All Rights Reserved.</p>
</body> </html>

View File

@ -0,0 +1,433 @@
! Copyright (C) 2004 Chris Double.
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions are met:
!
! 1. Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
!
! 2. Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
!
! THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
! FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
! DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
! OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
IN: parser-combinators
USE: lazy
USE: stack
USE: lists
USE: strings
USE: arithmetic
USE: logic
USE: kernel
USE: combinators
USE: parser
: phead ( object -- head )
#! Polymorphic head. Return the head item of the object.
#! For a string this is the first character.
#! For a list this is the car.
[
[ string? ] [ 0 swap str-nth ]
[ list? ] [ car ]
] cond ;
: ptail ( object -- tail )
#! Polymorphic tail. Return the tail of the object.
#! For a string this is everything but the first character.
#! For a list this is the cdr.
[
[ string? ] [ 1 str-tail ]
[ list? ] [ cdr ]
] cond ;
: pfirst ( object -- first )
#! Polymorphic first
phead ;
: psecond ( object -- second )
#! Polymorphic second
[
[ string? ] [ 1 swap str-nth ]
[ list? ] [ cdr car ]
] cond ;
: ph:t ( object -- head tail )
#! Return the head and tail of the object.
dup phead swap ptail ;
: pempty? ( object -- bool )
#! Polymorphic empty test.
[
[ string? ] [ "" = ]
[ list? ] [ not ]
] cond ;
: string-take ( n string -- string )
#! Return a string with the first 'n' characters
#! of the original string.
dup str-length pick < [
2drop ""
] [
swap str-head
] ifte ;
: (list-take) ( n list accum -- list )
>r >r pred dup 0 < [
drop r> drop r> nreverse
] [
r> uncons swap r> cons (list-take)
] ifte ;
: list-take ( n list -- list )
#! Return a list with the first 'n' characters
#! of the original list.
[ ] (list-take) ;
: ptake ( n object -- object )
#! Polymorphic take.
#! Return a collection of the first 'n'
#! characters from the original collection.
[
[ string? ] [ string-take ]
[ list? ] [ list-take ]
] cond ;
: string-drop ( n string -- string )
#! Return a string with the first 'n' characters
#! of the original string removed.
dup str-length pick < [
2drop ""
] [
swap str-tail
] ifte ;
: list-drop ( n list -- list )
#! Return a list with the first 'n' items
#! of the original list removed.
>r pred dup 0 < [
drop r>
] [
r> cdr list-drop
] ifte ;
: pdrop ( n object -- object )
#! Polymorphic drop.
#! Return a collection the same as 'object'
#! but with the first n items removed.
[
[ string? ] [ string-drop ]
[ list? ] [ list-drop ]
] cond ;
: ifte-head= ( string-or-list ch [ quot1 ] [ quot2 ] -- )
#! When the character 'ch' is equal to the head
#! of the string or list, run the quot1 otherwise run quot2.
[ swap phead = ] 2dip ifte ;
: symbol ( ch -- parser )
#! Return a parser that parses the given symbol.
[ ( inp ch -- result )
2dup [
swap ptail cons lunit
] [
2drop [ ]
] ifte-head=
] curry1 ;
: token ( string -- parser )
#! Return a parser that parses the given string.
[ ( inp string -- result )
2dup str-length swap ptake over = [
swap over str-length swap pdrop cons lunit
] [
2drop [ ]
] ifte
] curry1 ;
: satisfy ( p -- parser )
#! Return a parser that succeeds if the predicate 'p',
#! when passed the first character in the input, returns
#! true.
[ ( inp p -- result )
over pempty? [
2drop [ ]
] [
over phead swap call [
ph:t cons lunit
] [
drop [ ]
] ifte
] ifte
] curry1 ;
: satisfy2 ( p r -- parser )
#! Return a parser that succeeds if the predicate 'p',
#! when passed the first character in the input, returns
#! true. On success the word 'r' is called with the
#! successfully parser character on the stack. The result
#! of this is returned as the result of the parser.
[ ( inp p r -- result )
>r over phead swap call [
ph:t swap r> call swons lunit
] [
r> 2drop [ ]
] ifte
] curry2 ;
: epsilon ( -- parser )
#! A parser that parses the empty string.
[ ( inp -- result )
"" swap cons lunit
] ;
: succeed ( r -- parser )
#! A parser that always returns 'r' and consumes no input.
[ ( inp r -- result )
swap cons lunit
] curry1 ;
: fail ( -- parser )
#! A parser that always fails
[
drop [ ]
] ;
USE: prettyprint
USE: unparser
: ensure-list ( a -- [ a ] )
#! If 'a' is not a list, make it one.
dup list? [ unit ] unless ;
: ++ ( a b -- [ a b ] )
#! Join two items into a list.
>r ensure-list r> ensure-list append ;
: <&> ( p1 p2 -- parser )
#! Sequentially combine two parsers, returning a parser
#! that first calls p1, then p2 all remaining results from
#! p1.
[ ( inp p1 p2 -- result )
>r call r> [ ( [ x | xs ] p2 -- result )
>r uncons r> call swap [ ( [ x2 | xs2 ] x -- result )
>r uncons swap r> swap ++ swons
] curry1 lmap
] curry1 lmap lappend*
] curry2 ;
: <|> ( p1 p2 -- parser )
#! Choice operator for parsers. Return a parser that does
#! p1 or p2 depending on which will succeed.
[ ( inp p1 p2 -- result )
rot tuck swap call >r swap call r> lappend
] curry2 ;
: p-abc ( -- parser )
#! Test Parser. Parses the string "abc"
"a" token "b" token "c" token <&> <&> ;
: parse-skipwhite ( string -- string )
dup phead blank? [
ptail parse-skipwhite
] [
] ifte ;
: sp ( parser -- parser )
#! Return a parser that first skips all whitespace before
#! parsing.
[ ( inp parser -- result )
[ parse-skipwhite ] dip call
] curry1 ;
: just ( parser -- parser )
#! Return a parser that works exactly like the input parser
#! but guarantees that the rest string is empty.
[ ( inp parser -- result )
call [ ( [ x | xs ] -- )
cdr str-length 0 =
] lsubset
] curry1 ;
: <@ ( p f -- parser )
#! Given a parser p and a quotation f return a parser
#! that does the same as p but in addition applies f
#! to the resulting parse tree.
[ ( inp p f -- result )
>r call r> [ ( [ x | xs ] f -- [ fx | xs ] )
swap uncons [ swap over [ call ] [ drop ] ifte ] dip cons
] curry1 lmap
] curry2 ;
: p-1 ( -- parser )
"1" token "123" swap call lcar ;
: p-2 ( -- parser )
"1" token [ str>number ] <@ "123" swap call lcar ;
: some ( parser -- det-parser )
#! Given a parser, return a parser that only produces the
#! resulting parse tree of the first successful complete parse.
[ ( inp parser -- result )
just call lcar car
] curry1 ;
: delayed-parser ( [ parser ] -- parser )
[ ( inp [ parser ] -- result )
call call
] curry1 ;
: parens ;
: parens ( -- parser )
#! Parse nested parentheses
"(" token [ parens ] delayed-parser <&>
")" token <&> [ parens ] delayed-parser <&>
epsilon <|> ;
: nesting ( -- parser )
#! Count the maximum depth of nested parentheses.
"(" token [ nesting ] delayed-parser <&> ")" token <&>
[ nesting ] delayed-parser <&> [ .s drop "a" ] <@ epsilon <|> ;
: <& ( parser1 parser2 -- parser )
#! Same as <&> except only return the first item in the parse tree.
<&> [ pfirst ] <@ ;
: &> ( parser1 parser2 -- parser )
#! Same as <&> except only return the second item in the parse tree.
<&> [ psecond ] <@ ;
: lst ( [ x [ xs ] ] -- [x:xs] )
#! I need a good name for this word...
dup cdr [ uncons car cons ] when unit ;
: <*> ( parser -- parser )
#! Return a parser that accepts zero or more occurences of the original
#! parser.
dup [ <*> ] curry1 delayed-parser <&> [ lst ] <@ [ ] succeed <|> ;
: <+> ( parser -- parser )
#! Return a parser that accepts one or more occurences of the original
#! parser.
dup [ <*> ] curry1 delayed-parser <&> [ lst ] <@ ;
: <?> ( parser -- parser )
#! Return a parser where its construct is optional. It may or may not occur.
[ ] succeed <|> ;
: <first> ( parser -- parser )
#! Transform a parser into a parser that only returns the first success.
[
call dup [ lcar lunit ] when
] curry1 ;
: <!*> ( parser -- parser )
#! Version of <*> that only returns the first success.
<*> <first> ;
: <!+> ( parser -- parser )
#! Version of <+> that only returns the first success.
<+> <first> ;
: ab-test
"a" token <*> "b" token <&> "aaaaab" swap call [ . ] leach ;
: ab-test2
"a" token <*> "b" token <&> [ "a" "a" "a" "b" ] swap call [ . ] leach ;
: a "a" token "a" token <&> epsilon <|> ;
: b "b" token epsilon <|> ;
: c "c" token "c" token <&> ;
: d "d" token "d" token <&> ;
: count-a "a" token [ count-a ] delayed-parser &> "b" token <& [ 1 + ] <@ 0 succeed <|> ;
: tca "aaabbb" count-a call [ . ] leach ;
: parse-digit ( -- parser )
#! Return a parser for digits
[ digit? ] satisfy [ CHAR: 0 - ] <@ ;
: (reduce) ( start quot list -- value )
#! Call quot with start and the first value in the list.
#! quot is then called with the result of quot and the
#! next item in the list until the list is exhausted.
uncons >r swap dup dip r> dup [
(reduce)
] [
2drop
] ifte ;
: reduce ( list start quot -- value )
#! Call quot with start and the first value in the list.
#! quot is then called with the result of quot and the
#! next item in the list until the list is exhausted.
rot (reduce) ;
: natural ( -- parser )
#! a parser for natural numbers.
parse-digit <*> [ car 0 [ swap 10 * + ] reduce unit ] <@ ;
: natural2 ( -- parser )
#! a parser for natural numbers.
parse-digit <!+> [ car 0 [ swap 10 * + ] reduce unit ] <@ ;
: integer ( -- parser )
#! A parser that can parser possible negative numbers.
"-" token <?> [ drop -1 ] <@ natural2 <&> [ 1 [ * ] reduce ] <@ ;
: identifier ( -- parser )
#! Parse identifiers
[ letter? ] satisfy <+> [ car cat ] <@ ;
: identifier2 ( -- parser )
#! Parse identifiers
[ letter? ] satisfy <!+> [ car cat ] <@ ;
: ints ( -- parser )
integer "+" token [ drop [ [ + ] ] ] <@ <&>
integer <&> [ call swap call ] <@ ;
: url-quotable ( -- parser )
! [a-zA-Z0-9/_?] re-matches
[ letter? ] satisfy
[ LETTER? ] satisfy <|>
[ digit? ] satisfy <|>
CHAR: / symbol <|>
CHAR: _ symbol <|>
CHAR: ? symbol <|> just ;
: http-header ( -- parser )
[ CHAR: : = not ] satisfy <!+> [ car cat ] <@
": " token [ drop f ] <@ <&>
[ drop t ] satisfy <!+> [ car cat ] <@ <&> just ;
: parse-http-header ( string -- [ name value ] )
http-header call lcar car ;
: get-request ( -- parser )
"GET" token
[ drop t ] satisfy <!+> sp [ car cat ] <@ <&> ;
: post-request ( -- parser )
"POST" token
[ drop t ] satisfy <!+> sp [ car cat ] <@ <&> ;
: all-request ( -- parser )
"POST" token
[ 32 = not ] satisfy <!+> sp [ car cat ] <@ <&>
"HTTP/1.0" token sp <&> ;
: split-url ( -- parser )
"http://" token
[ CHAR: / = not ] satisfy <!*> [ car cat ] <@ <&>
"/" token <&>
[ drop t ] satisfy <!*> [ car cat ] <@ <&> ;

View File

@ -0,0 +1,152 @@
<html>
<head>
<title>Parser Combinators</title>
<link rel="stylesheet" type="text/css" href="style.css">
</head>
<body>
<h1>Parsers</h1>
<p>A parser is a word or quotation that, when called, processes
an input string on the stack, performs some parsing operation on
it, and returns a result indicating the success of the parsing
operation.</p>
<p>The result returned by a parser is known as a 'list of
successes'. It is a lazy list of standard Factor cons cells. Each cons
cell is a result of a parse. The car of the cell is the result of the
parse operation and the cdr of the cell is the remaining input left to
be parsed.</p>
<p>A list is used for the result as a parse operation can potentially
return many successful results. For example, a parser that parses one
or more digits will return more than one result for the input "123". A
successful parse could be "1", "12" or "123".</p>
<p>The list is lazy so if only one parse result is required the
remaining results won't actually be processed if they are not
requested. This improves efficiency.</p>
<p>The car of the result pair can be any value that the parser wishes
to return. It could be the successful portion of the input string
parsed, an abstract syntax tree representing the parsed input, or even
a quotation that should get called for later processing.</p>
<p>A Parser Combinator is a word that takes one or more parsers and
returns a parser that when called uses the original parsers in some
manner.</p>
<h1>Example Parsers</h1>
<p>The following are some very simple parsers that demonstrate how
general parsers work and the 'list of sucesses' that are returned as a
result.</p>
<pre class="code">
(1) : char-a ( inp -- result )
0 over str-nth CHAR: a = [
1 str-tail CHAR: a swons lunit
] [
drop f
] ifte ;
(2) "atest" char-a [ [ . ] leach ] when*
=> [ 97 | "test" ]
(3) "test" char-a [ [ . ] leach ] when*
=>
</pre>
<p>'char-a' is a parser that only accepts the character 'a' in the
input string. When passed an input string with a string with a leading
'a' then the 'list of successes' has 1 result value. The car of that
result value is the character 'a' successfully parsed, and the cdr is
the remaining input string. On failure of the parse an empty list is
returned.</p>
<p>The parser combinator library provides a combinator, <&>, that takes
two parsers off the stack and returns a parser that calls the original
two in sequence. An example of use would be calling 'char-a' twice,
which would then result in an input string expected with two 'a'
characters leading:</p>
<pre class="code">
(1) "aatest" [ char-a ] [ char-a ] <&> call
=> < list of successes >
(2) [ . ] leach
=> [ [ 97 97 ] | "test" ]
</pre>
<p>Creating parsers for specfic characters and tokens can be a chore
so there is a word that, given a string token on the stack, returns
a parser that parses that particular token:</p>
<pre class="code">
(1) "begin" token
=> < a parser that parses the token "begin" >
(2) dup "this should fail" swap call .
=> f
(3) "begin a successfull parse" swap call
=> < lazy list >
(4) [ . ] leach
=> [ "begin" | " a successfull parse" ]
</pre>
<p>The word 'satisfy' takes a quotation from the top of the stack and
returns a parser than when called will call the quotation with the
first item in the input string on the stack. If the quotation returns
true then the parse is successful, otherwise it fails:</p>
<pre class="code">
(1) : digit-parser ( -- parser )
[ digit? ] satisfy ;
(2) "5" digit-parser call [ . ] leach
=> [ 53 | "" ]
(3) "a" digit-parser call
=> f
</pre>
<p>Note that 'digit-parser' returns a parser, it is not the parser
itself. It is really a parser generating word like 'token'. Whereas
our 'char-a' word defined originally was a parser itself.</p>
<p>Now that we can parse single digits it would be nice to easily
parse a string of them. The '<*>' parser combinator word will do
this. It accepts a parser on the top of the stack and produces a
parser that parses zero or more of the constructs that the original
parser parsed. The result of the '<*>' generated parser will be a list
list of the successful results returned by the original parser.</p>
<pre class="code">
(1) digit-parser <*>
=> < parser >
(2) "123" swap call
=> < lazy list >
(3) [ . ] leach
=> [ [ [ 49 50 51 ] ] | "" ]
[ [ [ 49 50 ] ] | "3" ]
[ [ [ 49 ] ] | "23" ]
[ f | "123" ]
</pre>
<p>In this case there are multiple successful parses. This is because
the occurrence of zero or more digits happens more than once. There is
also the 'f' case where zero digits is parsed. If only the 'longest
match' is required then the lcar of the lazy list can be used and the
remaining parse results are never produced.</p>
<p>The result of the parse above is the list of characters
parsed. Sometimes you want this to be something else, like an abstract
syntax tree, or some calculation. For the digit case we may want the
actual integer number.</p>
<p>For this we can use the '<@' parser
combinator. This combinator takes a parser and a quotation on the
stack and returns a new parser. When the new parser is called it will
call the original parser to produce the results, then it will call the
quotation on each successfull result, and the result of that quotation
will be the result of the parse:</p>
<pre class="code">
(1) : digit-parser2 ( -- parser )
[ digit? ] satisfy [ CHAR: 0 - ] <@ ;
(2) "5" digit-parser2 call [ . ] leach
=> [ 5 | "" ]
</pre>
<p>Notice that now the result is the actual integer '5' rather than
character code '53'.</p>
<pre class="code">
(1) : natural-parser ( -- parser )
digit-parser2 <*> [ car 0 [ swap 10 * + ] reduce unit ] <@ ;
(2) "123" natural-parser call
=> < lazy list >
(3) [ . ] leach
=> [ [ 123 ] | "" ]
[ [ 12 ] | "3" ]
[ [ 1 ] | "23" ]
[ f | "123" ]
</pre>
<p>The number parsed is the actual integer number due to the operation
of the '<@' word. This allows parsers to not only parse the input
string but perform operations and transformations on the syntax tree
returned.</p>
<p class="footer">
News and updates to this software can be obtained from the authors
weblog: <a href="http://radio.weblogs.com/0102385">Chris Double</a>.</p>
<p id="copyright">Copyright (c) 2004, Chris Double. All Rights Reserved.</p>
</body> </html>

View File

@ -0,0 +1,28 @@
body { background: white; color: black; }
p { margin-left: 10%; margin-right: 10%;
font: normal 100% Verdana, Arial, Helvetica; }
td { margin-left: 10%; margin-right: 10%;
font: normal 100% Verdana, Arial, Helvetica; }
table { margin-left: 10%; margin-right: 10%; }
ul { margin-left: 10%; margin-right: 10%;
font: normal 100% Verdana, Arial, Helvetica; }
ol { margin-left: 10%; margin-right: 10%;
font: normal 100% Verdana, Arial, Helvetica; }
h1 { text-align: center; margin-bottom: 0; margin-top: 1em; }
h2 { margin: 0 5% 0 7.5%; font-size: 120%; font-style: italic; }
h3 { border: 2px solid blue; border-width: 2px 0.5em 2px 0.5em;
padding: 0.2em 0.2em 0.2em 0.5em; background: #fafafa;
margin-left: 10%; margin-right: 10%; margin-top: 2em;
font-size: 100%; }
.note { border: 2px solid blue; border-width: 2px 2px 2px 2em;
padding: 0.5em 0.5em 0.5em 1em; background: #ffe; }
.code { border: 1px solid black; border-width: 1px;
padding: 0.5em; background: #ffe;
margin-left: 10%; margin-right: 10%; }
blockquote { margin-left: 25%; margin-right: 25%;
font-style: italic; }
.highlite { color: red; }
.footer { margin-top: 2.5em; border-top: 1px solid gray; color:
#AAA; font-size: 85%; padding-top: 0.33em; }
#copyright { text-align: center; color: #AAA;
font-size: 65%; }