Get lazy lists and parser combinators working with new factor.

cvs
Chris Double 2005-04-30 03:20:11 +00:00
parent 19bc49ece3
commit aa5b8fe510
5 changed files with 905 additions and 560 deletions

View File

@ -28,17 +28,21 @@ USE: lists
USE: combinators
USE: kernel
USE: logic
USE: sequences
USE: namespaces
: lfrom ( n -- llist )
#! Return a lazy list of increasing numbers starting
#! from the initial value 'n'.
dup [ succ lfrom ] curry1 lcons ;
dup unit delay swap
[ 1 + lfrom ] cons delay lcons ;
: lfrom-by ( n quot -- llist )
#! Return a lazy list of values starting from n, with
#! each successive value being the result of applying quot to
#! n.
dupd [ dup [ call ] dip lfrom-by ] curry2 lcons ;
swap dup unit delay -rot
[ , dup , \ call , , \ lfrom-by , ] make-list delay lcons ;
: lnaturals 0 lfrom ;
: lpositves 1 lfrom ;
@ -56,8 +60,9 @@ USE: logic
: sieve ( llist - llist )
#! Given a lazy list of numbers, use the sieve of eratosthenes
#! algorithm to return a lazy list of primes.
luncons over [ divisible-by? not ] curry1 lsubset [ sieve ] curry1 lcons ;
luncons over [ divisible-by? not ]
cons lsubset [ sieve ] cons delay >r unit delay r> lcons ;
: lprimes 2 lfrom sieve ;
: first-ten-primes 10 lprimes ltake ;
: first-ten-primes 10 lprimes ltake llist>list ;

View File

@ -21,152 +21,277 @@
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
IN: lazy
USE: lists
USE: stack
USE: math
USE: stdio
USE: prettyprint
USE: kernel
USE: combinators
USE: logic
USE: sequences
USE: namespaces
USE: lists
USE: math
: curry1 ( n quot -- quot )
#! Return a quotation that when called will initially
#! have 'n' pushed on the stack.
cons ;
TUPLE: promise quot forced? value ;
: curry2 ( n1 n2 quot -- quot )
#! Return a quotation that when called will initially
#! have 'n1' and 'n2' pushed on the stack.
cons cons ;
: delay ( quot -- <promise> )
#! Given a quotation, create a promise which may later be forced.
#! When forced the quotation will execute returning the value. Future
#! forces of the promise will return that value and not re-execute
#! the quotation.
f f <promise> ;
: delay ( value -- promise )
#! Return a promise that when 'forced' returns the original value.
unit ;
: (force) ( <promise> -- value )
#! Force the given promise leaving the value of calling the
#! promises quotation on the stack. Re-forcing the promise
#! will return the same value and not recall the quotation.
dup promise-forced? [
dup promise-quot call over set-promise-value
t over set-promise-forced?
] unless
promise-value ;
: force ( promise -- value )
#! Return the value associated with the promise.
call ;
: force ( <promise> -- value )
(force) dup promise? [
force
] when ;
: lcons ( car promise -- lcons )
#! Return a lazy pair, where the cdr is a promise and must
#! be forced to return the value.
cons ;
TUPLE: lcons car cdr ;
: lunit ( a -- llist )
#! Construct a lazy list of one element.
[ ] delay lcons ;
SYMBOL: lazy-nil
DEFER: lnil
[ [ ] ] delay lazy-nil set
: lnil ( -- lcons )
#! Return the nil lazy list.
lazy-nil get ;
: lnil? ( lcons -- bool )
#! Is the given lazy cons the nil value
force not ;
: lcar ( lcons -- car )
#! Return the car of a lazy pair.
car ;
#! Return the value of the head of the lazy list.
dup lnil? [
force lcons-car (force)
] unless ;
: lcdr ( lcons -- cdr )
#! Return the cdr of a lazy pair, implicitly forcing it.
cdr force ;
#! Return the value of the rest of the lazy list.
#! This is itself a lazy list.
dup lnil? [
force lcons-cdr (force)
] unless ;
: lcons ( lcar lcdr -- promise )
#! Given a car and cdr, both lazy values, return a lazy cons.
swap [ , , \ <lcons> , ] make-list delay ;
: lunit ( lvalue -- llist )
#! Given a lazy value (a quotation that when called produces
#! the value) produce a lazy list containing that value.
[ lnil ] delay lcons ;
: lnth ( n llist -- value )
#! Return the nth item in a lazy list
swap [ lcdr ] times lcar ;
: luncons ( lcons -- car cdr )
#! Return the car and forced cdr of the lazy cons.
uncons force ;
: (ltake) ( n llist accum -- list )
>r >r pred dup 0 < [
drop r> drop r> reverse
] [
r> luncons swap r> cons (ltake)
] ifte ;
: ltake ( n llist -- list )
#! Return a list containing the first n items from
#! the lazy list.
[ ] (ltake) ;
#! Return the car and cdr of the lazy list
dup lcar swap lcdr ;
: lmap ( llist quot -- llist )
#! Return a lazy list containing the collected result of calling
#! quot on the original lazy list.
over [ ] = [
2drop [ ]
over lnil? [
drop
] [
>r luncons r>
dup swapd
[ lmap ] curry2
>r call r>
swap 2dup
[ , \ lcdr , , \ lmap , ] make-list delay >r
[ , \ lcar , , \ call , ] make-list delay r>
lcons
] ifte ;
: ltake ( n llist -- llist )
#! Return a lazy list containing the first n items from
#! the original lazy list.
over 0 = [
2drop lnil
] [
dup lnil? [
nip
] [
swap dupd ( llist llist n -- )
[ [ 1 - ] cons , \ call , , \ lcdr , \ ltake , ] make-list delay >r
[ , \ lcar , ] make-list delay r>
lcons
] ifte
] ifte ;
DEFER: lsubset
TUPLE: lsubset-state llist pred ;
: (lsubset-cdr) ( state -- llist )
#! Given a predicate and a lazy list, do the cdr
#! portion of lsubset.
dup lsubset-state-llist lcdr swap lsubset-state-pred lsubset ;
: (lsubset-car) ( state -- value )
#! Given a predicate and a lazy list, do the car
#! portion of lsubset.
dup lsubset-state-llist lcar over
lsubset-state-pred dupd call [ ( state lcar -- )
nip
] [ ( state lcar -- )
drop dup lsubset-state-llist lcdr over set-lsubset-state-llist
(lsubset-car)
] ifte ;
: (lsubset-set-first-car) ( state -- bool )
#! Set the state to the first valid car. If none found
#! return false.
dup lsubset-state-llist lcar over
lsubset-state-pred dupd call [ ( state lcar -- )
2drop t
] [ ( state lcar -- )
drop dup lsubset-state-llist lcdr dup lnil? [
2drop f
] [
over set-lsubset-state-llist
(lsubset-set-first-car)
] ifte
] ifte ;
: lsubset ( llist pred -- llist )
#! Return a lazy list containing only the items from the original
#! lazy list for which the predicate returns a value other than f.
over [ ] = [
2drop [ ]
over lnil? [
drop
] [
>r luncons r>
dup swapd
[ lsubset ] curry2
-rot dupd call [
swap lcons
<lsubset-state> dup
(lsubset-set-first-car) [
dup
[ (lsubset-cdr) ] cons delay >r
[ (lsubset-car) ] cons delay r> lcons
] [
drop call
drop lnil
] ifte
] ifte ;
: lappend* ;
: (lappend*) ;
: lappend-list* ;
DEFER: lappend*
DEFER: (lappend*)
TUPLE: lappend*-state current rest ;
: lappend-item* ( llists list item -- llist )
-rot [ lappend-list* ] curry2 lcons ;
USE: stdio
: lappend-list* ( llists list -- llist )
dup [
#! non-empty list
luncons swap lappend-item*
: (lappend*-cdr) ( state -- llist )
#! Given the state object, do the cdr portion of the
#! lazy append.
dup lappend*-state-current dup lnil? [ ( state current -- )
nip
] [ ( state current -- )
lcdr ( state cdr -- )
dup lnil? [ ( state cdr -- )
drop dup lappend*-state-rest dup lnil? [ ( state rest )
nip
] [
#! empty list
drop lappend*
nip
luncons ( state rest-car rest-cdr -- )
<lappend*-state> (lappend*)
] ifte
] [ ( state cdr -- )
swap lappend*-state-rest <lappend*-state> (lappend*)
] ifte
] ifte ;
: (lappend*) ( llists -- llist )
dup lcar [ ( llists )
#! Yes, the first item in the list is a valid llist
luncons swap lappend-list*
] [
#! The first item in the list is an empty list.
#! Resume passing the next list.
lcdr lappend*
: (lappend*-car) ( state -- value )
#! Given the state object, do the car portion of the
#! lazy append.
dup lappend*-state-current dup lnil? [ ( state current -- )
nip
] [ ( state current -- )
lcar nip
] ifte ;
: (lappend*) ( state -- llist )
#! Do the main work of the lazy list appending using a
#! state object.
dup
[ (lappend*-cdr) ] cons delay >r
[ (lappend*-car) ] cons delay r> lcons ;
: lappend* ( llists -- llist )
#! Given a lazy list of lazy lists, return a lazy list that
#! works through all of the sub-lists in sequence.
dup [
(lappend*)
] [
#! Leave empty list on the stack
] ifte ;
[ lnil? not ] lsubset
dup lnil? [
luncons <lappend*-state> (lappend*)
] unless ;
: list>llist ( list -- llist )
#! Convert a list to a lazy list.
dup [
uncons [ list>llist ] curry1 lcons
] when ;
DEFER: list>llist
: lappend ( llist1 llist2 -- llist )
#! Concatenate two lazy lists such that they appear to be one big lazy list.
#! Concatenate two lazy lists such that they appear to be one big
#! lazy list.
2list list>llist lappend* ;
: leach ( llist quot -- )
#! Call the quotation on each item in the lazy list.
#! Warning: If the list is infinite then this will
#! never return.
over [
>r luncons r> tuck >r >r call r> r> leach
] [
over lnil? [
2drop
] [
>r luncons r> tuck >r >r call r> r> leach
] ifte ;
: (llist>list) ( result llist -- list )
#! Helper function for llist>list.
dup lnil? [
drop
] [
dup lcar ( result llist car )
swap lcdr >r swons r> (llist>list)
] ifte ;
: llist>list ( llist -- list )
#! Convert a lazy list to a normal list. This will cause
#! an infinite loop if the lazy list is an infinite list.
f swap (llist>list) reverse ;
: list>llist ( list -- llist )
#! Convert a list to a lazy list.
dup [
uncons [ list>llist ] cons delay >r unit delay r> lcons
] [
drop lnil
] ifte ;
M: lcons nth lnth ;
: test1
[ 1 ] list>llist
[ 2 ] list>llist
2list
list>llist
lappend* ;
: test2
[ 1 2 ] list>llist
[ 3 4 ] list>llist
2list
list>llist
lappend* ;
: test3
[ 1 2 3 ] list>llist
[ 4 5 6 ] list>llist
[ 7 8 9 ] list>llist
3list
list>llist
lappend* ;
: test4
[ 1 2 3 4 5 ] list>llist
[ 2 mod 1 = ] lsubset ;
: test5 lnil unit delay lunit [ lnil? not ] lsubset ;
: test6 lnil unit delay lunit lappend* ;

View File

@ -9,14 +9,22 @@
ability to describe infinite structures, and to delay execution of
expressions until they are actually used.</p>
<p>Lazy lists, like normal lists, are composed of a head and tail. In
a lazy list the tail is something called a 'promise'. To convert a
a lazy list the head and tail are something called a 'promise'.
To convert a
'promise' into its actual value a word called 'force' is used. To
convert a value into a 'promise' the word to use is 'delay'.</p>
<table border="1">
<tr><td><a href="#delay">delay</a></td></tr>
<tr><td><a href="#force">force</a></td></tr>
</table>
<p>Many of the lazy list words are named similar to the standard list
words but with an 'l' suffixed to it. Here are the commonly used
words and their equivalent list operation:</p>
<table border="1">
<tr><th>Lazy List</th><th>Normal List</th></tr>
<tr><td><a href="#lnil">lnil</a></td><td>[ ]</td></tr>
<tr><td><a href="#lnilp">lnil?</a></td><td>Test for nil value</td></tr>
<tr><td><a href="#lcons">lcons</a></td><td>cons</td></tr>
<tr><td><a href="#lunit">lunit</a></td><td>unit</td></tr>
<tr><td><a href="#lcar">lcar</a></td><td>car</td></tr>
@ -38,47 +46,122 @@ list.</td></tr>
<tr><td><a href="#list>llist">list>llist</a></td><td>Given a normal list, return a lazy list
that contains the same elements as the normal list.</td></tr>
</table>
<p>A couple of helper functions are also provided by the lazy
vocabulary.</p>
<table border="1">
<tr><td>curry1</td><td>Given a value and a quotation, returns a new
quotation that when called will have the value on the stack.</td></tr>
<tr><td>curry2</td><td>Given two values and a quotation, returns a new
quotation that when called will have the two values on the
stack.</td></tr>
</table>
<h2>Reference</h2>
<!-- delay description -->
<a name="delay">
<h3>delay ( quot -- &lt;promise&gt; )</h3>
<p>'delay' is used to convert a value or expression into a promise.
The word 'force' is used to convert that promise back to its
value, or to force evaluation of the expression to return a value.
</p>
<p>The value on the stack that 'delay' expects must be quoted. This is
a requirement to prevent it from being evaluated.
</p>
<pre class="code">
( 1 ) [ 42 ] <a href="#delay">delay</a> dup .
=> &lt;&lt; promise [ ] [ 42 ] [ ] [ ] &gt;&gt;
( 2 ) <a href="#force">force</a> .
=> 42
</pre>
<!-- force description -->
<a name="force">
<h3>force ( &lt;promise&gt; -- value )</h3>
<p>'force' will evaluate a promises original expression
and leave the value of that expression on the stack.
</p>
<p>A promise can be forced multiple times but the expression
is only evaluated once. Future calls of 'force' on the promise
will returned the cached value of the original force. If the
expression contains side effects, such as i/o, then that i/o
will only occur on the first 'force'. See below for an example
(steps 3-5).
</p>
<p>If a promise is itself delayed, a force will evaluate all promises
until a value is returned. Due to this behaviour it is generally not
possible to delay a promise. The example below shows what happens
in this case.
</p>
<pre class="code">
( 1 ) [ 42 ] <a href="#delay">delay</a> dup .
=> &lt;&lt; promise [ ] [ 42 ] [ ] [ ] &gt;&gt;
( 2 ) <a href="#force">force</a> .
=> 42
#! Multiple forces on a promise returns cached value
( 3 ) [ "hello" print 42 ] <a href="#delay">delay</a> dup .
=> << promise [ ] [ "hello" print 42 ] [ ] [ ] >>
( 4 ) dup <a href="#force">force</a> .
=> hello
42
( 5 ) <a href="#force">force</a> .
=> 42
#! Forcing a delayed promise cascades up to return
#! original value, rather than the promise.
( 6 ) [ [ 42 ] <a href="#delay">delay</a> ] <a href="#delay">delay</a> dup .
=> << promise [ ] [ [ 42 ] delay ] [ ] [ ] >>
( 7 ) <a href="#force">force</a> .
=> 42
</pre>
<!-- lnil description -->
<a name="lnil">
<h3>lnil ( -- lcons )</h3>
<p>Returns a value representing the empty lazy list.</p>
<pre class="code">
( 1 ) <a href="#lnil">lnil</a> .
=> << promise [ ] [ [ ] ] t [ ] >>
</pre>
<!-- lnil description -->
<a name="lnilp">
<h3>lnil? ( lcons -- bool )</h3>
<p>Returns true if the given lazy cons is the value representing
the empty lazy list.</p>
<pre class="code">
( 1 ) <a href="#lnil">lnil</a> <a href="#lnilp">lnil?</a> .
=> t
( 2 ) [ 1 ] <a href="#list2llist">list&gt;llist</a> dup <a href="#lnilp">lnil?</a> .
=> [ ]
( 3 ) <a href="#lcdr">lcdr</a> <a href="#lnilp">lnil?</a> .
=> t
</pre>
<!-- lcons description -->
<a name="lcons">
<h3>lcons ( value promise -- lcons )</h3>
<p>Provides the same effect as 'cons' does for normal lists. It
creates a cons cell where the first element is the value given and the
second element is a promise.</p>
<a name="promise">
<p>A promise is either a value that has had 'force' called on it, or
a quotation that when 'call' is applied to it, returns the actual
value.</p>
<h3>lcons ( car-promise cdr-promise -- lcons )</h3>
<p>Provides the same effect as 'cons' does for normal lists.
Both values provided must be promises (ie. expressions that have
had <a href="#delay">delay</a> called on them).
</p>
<p>As the car and cdr passed on the stack are promises, they are not
evaluated until <a href="#lcar">lcar</a> or <a href="#lcdr">lcdr</a>
are called on the lazy cons.</p>
<pre class="code">
( 1 ) 5 6 delay <a href="#lcons">lcons</a> dup .
=> [ 5 6 ]
( 1 ) [ "car" ] <a href="#delay">delay</a> [ "cdr" ] <a href="#delay">delay</a> <a href="#lcons">lcons</a> dup .
=> &lt;&lt; promise ... &gt;&gt;
( 2 ) dup <a href="#lcar">lcar</a> .
=> 5
=> "car"
( 3 ) dup <a href="#lcdr">lcdr</a> .
=> 6
=> "cdr"
</pre>
<!-- lunit description -->
<a name="lunit">
<h3>lunit ( value -- llist )</h3>
<h3>lunit ( value-promise -- llist )</h3>
<p>Provides the same effect as 'unit' does for normal lists. It
creates a lazy list where the first element is the value given.</p>
<p>Like <a href="#lcons">lcons</a>, the value on the stack must be
a promise and is not evaluated until the <a href="#lcar">lcar</a>
of the list is requested.</a>
<pre class="code">
( 1 ) 42 <a href="#lunit">lunit</a> dup .
=> [ 42 f ]
( 1 ) [ 42 ] <a href="#delay">delay</a> <a href="#lunit">lunit</a> dup .
=> &lt;&lt; promise ... &gt;&gt;
( 2 ) dup <a href="#lcar">lcar</a> .
=> 42
( 3 ) dup <a href="#lcdr">lcdr</a> .
=> f
( 3 ) dup <a href="#lcdr">lcdr</a> <a href="#lnilp">lnil?</a> .
=> t
( 4 ) [ . ] <a href="#leach">leach</a>
=> 42
</pre>
@ -87,10 +170,11 @@ creates a lazy list where the first element is the value given.</p>
<a name="lcar">
<h3>lcar ( lcons -- value )</h3>
<p>Provides the same effect as 'car' does for normal lists. It
returns the first element in a lazy cons cell.</p>
returns the first element in a lazy cons cell. This will force
the evaluation of that element.</p>
<pre class="code">
( 1 ) 42 <a href="#lunit">lunit</a> dup .
=> [ 42 f ]
( 1 ) [ 42 ] <a href="#delay">delay</a> <a href="#lunit">lunit</a> dup .
=> &lt;&lt; promise ... &gt;&gt;
( 2 ) <a href="#lcar">lcar</a> .
=> 42
</pre>
@ -102,15 +186,15 @@ returns the first element in a lazy cons cell.</p>
returns the second element in a lazy cons cell and forces it. This
causes that element to be evaluated immediately.</p>
<pre class="code">
( 1 ) 5 [ 5 6 + ] <a href="#lcons">lcons</a> dup .
=> [ 5 5 6 + ]
( 1 ) [ 1 ] <a href="#delay">delay</a> [ 5 6 + ] <a href="#delay">delay</a> <a href="#lcons">lcons</a> dup .
=> &lt;&lt; promise ... &gt;&gt;
( 2 ) <a href="#lcdr">lcdr</a> .
=> 11
</pre>
<pre class="code">
( 1 ) 5 lfrom dup .
=> [ 5 5 succ lfrom ]
( 1 ) 5 <a href="#lfrom">lfrom</a> dup .
=> &lt;&lt; promise ... &gt;&gt;
( 2 ) <a href="#lcdr">lcdr</a> dup <a href="#lcar">lcar</a> .
=> 6
( 3 ) <a href="#lcdr">lcdr</a> dup <a href="#lcar">lcar</a> .
@ -126,8 +210,8 @@ causes that element to be evaluated immediately.</p>
returns the nth value in the lazy list. It causes all the values up to
'n' to be evaluated.</p>
<pre class="code">
( 1 ) 1 lfrom
=> [ 1 1 succ lfrom ]
( 1 ) 1 <a href="#lfrom">lfrom</a> dup .
=> &lt;&lt; promise ... &gt;&gt;
( 2 ) 5 swap <a href="#lnth">lnth</a> .
=> 6
</pre>
@ -136,32 +220,31 @@ returns the nth value in the lazy list. It causes all the values up to
<a name="luncons">
<h3>luncons ( lcons -- car cdr )</h3>
<p>Provides the same effect as 'uncons' does for normal lists. It
returns the car and cdr of the lazy list. Note that cdr is forced
resulting in it being evaluated.</p>
returns the car and cdr of the lazy list.</p>
<pre class="code">
( 1 ) 5 [ 6 ] <a href="#lcons">lcons</a> dup .
=> [ 5 6 ]
( 2 ) <a href="#luncons">luncons</a> .s
=> { 5 6 }
( 1 ) [ 5 ] <a href="#delay">delay</a> [ 6 ] <a href="#delay">delay</a> <a href="#lcons">lcons</a> dup .
=> &lt;&lt; promise ... &gt;&gt;
( 2 ) <a href="#luncons">luncons</a> . .
=> 6
5
</pre>
<!-- lmap description -->
<a name="lmap">
<h3>lmap ( llist quot -- llist )</h3>
<p>Provides the same effect as 'map' does for normal lists. It
lazily maps over a lazy list applying the quotation to each element.
<p>Lazily maps over a lazy list applying the quotation to each element.
A new lazy list is returned which contains the results of the
quotation.</p>
<p>When initially called <a href="#lmap">lmap</a> will only call quot on the first element
of the list. It then constructs a lazy list that performs the
next '<a href="#lmap">lmap</a>' operation on the next element when it is evaluated. This
allows mapping over infinite lists.</p>
<p>When intially called nothing in the original lazy list is
evaluated. Only when <a href="#lcar">lcar</a> is called will the item
in the list be evaluated and applied to the quotation. Ditto with <a
href="#lcdr">lcdr</a>, thus allowing infinite lists to be mapped over.</p>
<pre class="code">
( 1 ) 1 lfrom
( 1 ) 1 <a href="#lfrom">lfrom</a>
=> < infinite list of incrementing numbers >
( 2 ) [ 2 * ] <a href="#lmap">lmap</a>
=> < infinite list of numbers incrementing by 2 >
( 3 ) 5 swap <a href="#ltake">ltake</a> .
( 3 ) 5 swap <a href="#ltake">ltake</a> <a href="#llist2list">llist&gt;list</a> .
=> [ 2 4 6 8 10 ]
</pre>
@ -174,17 +257,15 @@ element. If that quotation returns true, the element will be included
in the resulting lazy list. If it is false, the element will be skipped.
A new lazy list is returned which contains all elements where the
predicate returned true.</p>
<p>When initially called <a href="#lsubset">lsubset</a> will only call
the predicate quotation on the first element
of the list. It then constructs a lazy list that performs the
next '<a href="#lsubset">lsubset</a>' operation on the next element when it is evaluated. This
allows subsetting over infinite lists.</p>
<p>Like <a href="#lmap">lmap</a>, when initially called no evaluation
will occur. A lazy list is returned that when values are retrieved
from in then items are evaluated and checked against the predicate.</p>
<pre class="code">
( 1 ) 1 lfrom
( 1 ) 1 <a href="#lfrom">lfrom</a>
=> < infinite list of incrementing numbers >
( 2 ) [ prime? ] <a href="#lsubset">lsubset</a>
( 2 ) [ <a href="#primep">prime?</a> ] <a href="#lsubset">lsubset</a>
=> < infinite list of prime numbers >
( 3 ) 5 swap <a href="#ltake">ltake</a> .
( 3 ) 5 swap <a href="#ltake">ltake</a> <a href="#llist2list">llist&gt;list</a> .
=> [ 2 3 5 7 11 ]
</pre>
@ -196,7 +277,7 @@ lazily iterates over a lazy list applying the quotation to each
element. If this operation is applied to an infinite list it will
never return unless the quotation escapes out by calling a continuation.</p>
<pre class="code">
( 1 ) 1 lfrom
( 1 ) 1 <a href="#lfrom">lfrom</a>
=> < infinite list of incrementing numbers >
( 2 ) [ 2 mod 1 = ] <a href="#lsubset">lsubset</a>
=> < infinite list of odd numbers >
@ -210,13 +291,13 @@ never return unless the quotation escapes out by calling a continuation.</p>
<!-- ltake description -->
<a name="ltake">
<h3>ltake ( n llist -- list )</h3>
<h3>ltake ( n llist -- llist )</h3>
<p>Iterates over the lazy list 'n' times, appending each element to a
normal list. The normal list is returned. This provides a convenient
way of getting elements out of a lazy list.</p>
lazy list. This provides a convenient way of getting elements out of
an infinite lazy list.</p>
<pre class="code">
( 1 ) : ones 1 [ ones ] <a href="#lcons">lcons</a> ;
( 2 ) 5 ones <a href="#ltake">ltake</a>
( 1 ) : ones [ 1 ] delay [ ones ] delay <a href="#lcons">lcons</a> ;
( 2 ) 5 ones <a href="#ltake">ltake</a> <a href="#llist2list">llist&gt;list</a> .
=> [ 1 1 1 1 1 ]
</pre>
@ -227,7 +308,7 @@ way of getting elements out of a lazy list.</p>
lazily on iteration rather than immediately so it works very fast no
matter how large the list.</p>
<pre class="code">
( 1 ) [ 1 2 3 ] <a href="#list>llist">list>llist</a> [ 4 5 6 ] <a href="#list>llist">list>llist</a> <a href="#lappend">lappend</a>
( 1 ) [ 1 2 3 ] <a href="#list2llist">list&gt;llist</a> [ 4 5 6 ] <a href="#list2llist">list&gt;llist</a> <a href="#lappend">lappend</a>
( 2 ) [ . ] <a href="#leach">leach</a>
=> 1
2
@ -244,10 +325,10 @@ matter how large the list.</p>
lazy fashion. The actual appending is done lazily on iteration rather
than immediately so it works very fast no matter how large the lists.</p>
<pre class="code">
( 1 ) [ 1 2 3 ] <a href="#list>llist">list>llist</a>
( 2 ) [ 4 5 6 ] <a href="#list>llist">list>llist</a>
( 3 ) [ 7 8 9 ] <a href="#list>llist">list>llist</a>
( 4 ) 3list <a href="#list>llist">list>llist</a> <a href="#lappendstar">lappend*</a>
( 1 ) [ 1 2 3 ] <a href="#list2>llist">list&gt;llist</a>
( 2 ) [ 4 5 6 ] <a href="#list2llist">list&gt;llist</a>
( 3 ) [ 7 8 9 ] <a href="#list2llist">list&gt;llist</a>
( 4 ) 3list <a href="#list2llist">list&gt;llist</a> <a href="#lappendstar">lappend*</a>
( 5 ) [ . ] <a href="#leach">leach</a>
=> 1
2
@ -261,12 +342,12 @@ than immediately so it works very fast no matter how large the lists.</p>
</pre>
<!-- list>llist description -->
<a name="list>llist">
<h3>list>llist ( list -- llist )</h3>
<a name="list2llist">
<h3>list&gt;llist ( list -- llist )</h3>
<p>Converts a normal list into a lazy list. This is done lazily so the
initial list is not iterated through immediately.</p>
<pre class="code">
( 1 ) [ 1 2 3 ] <a href="#list>llist">list>llist</a>
( 1 ) [ 1 2 3 ] <a href="#list2llist">list&gt;llist</a>
( 2 ) [ . ] <a href="#leach">leach</a>
=> 1
2

View File

@ -22,66 +22,85 @@
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
IN: parser-combinators
USE: lazy
USE: stack
USE: lists
USE: strings
USE: math
USE: logic
USE: kernel
USE: combinators
USE: parser
USE: sequences
USE: strings
USE: lists
USE: math
: phead ( object -- head )
GENERIC: phead
M: string phead ( object -- head )
#! Polymorphic head. Return the head item of the object.
#! For a string this is the first character.
#! For a list this is the car.
[
[ string? ] [ 0 swap str-nth ]
[ list? ] [ car ]
] cond ;
0 swap nth ;
: ptail ( object -- tail )
M: list phead ( object -- head )
#! Polymorphic head. Return the head item of the object.
#! For a list this is the car.
car ;
M: cons phead ( object -- head )
#! Polymorphic head. Return the head item of the object.
#! For a list this is the car.
car ;
GENERIC: ptail
M: string ptail ( object -- tail )
#! Polymorphic tail. Return the tail of the object.
#! For a string this is everything but the first character.
1 swap string-tail ;
M: list ptail ( object -- tail )
#! Polymorphic tail. Return the tail of the object.
#! For a list this is the cdr.
[
[ string? ] [ 1 swap str-tail ]
[ list? ] [ cdr ]
] cond ;
cdr ;
M: cons ptail ( object -- tail )
#! Polymorphic tail. Return the tail of the object.
#! For a list this is the cdr.
cdr ;
: pfirst ( object -- first )
#! Polymorphic first
#! Polymorphic first. The first item in a collection.
phead ;
: psecond ( object -- second )
GENERIC: psecond
M: string psecond ( object -- second )
#! Polymorphic second
[
[ string? ] [ 1 swap str-nth ]
[ list? ] [ cdr car ]
] cond ;
1 swap nth ;
M: list psecond ( object -- second )
#! Polymorphic second
cdr car ;
: ph:t ( object -- head tail )
#! Return the head and tail of the object.
dup phead swap ptail ;
: pempty? ( object -- bool )
#! Polymorphic empty test.
[
[ string? ] [ "" = ]
[ list? ] [ not ]
] cond ;
GENERIC: pempty?
M: string pempty? ( object -- bool )
#! Return true if the collection is empty.
string-length 0 = ;
M: list pempty? ( object -- bool )
#! Return true if the collection is empty.
not ;
: string-take ( n string -- string )
#! Return a string with the first 'n' characters
#! of the original string.
dup str-length pick < [
dup string-length pick < [
2drop ""
] [
str-head
string-head
] ifte ;
: (list-take) ( n list accum -- list )
>r >r pred dup 0 < [
>r >r 1 - dup 0 < [
drop r> drop r> reverse
] [
r> uncons swap r> cons (list-take)
@ -92,342 +111,381 @@ USE: parser
#! of the original list.
[ ] (list-take) ;
: ptake ( n object -- object )
GENERIC: ptake
M: string ptake ( n object -- object )
#! Polymorphic take.
#! Return a collection of the first 'n'
#! characters from the original collection.
[
[ string? ] [ string-take ]
[ list? ] [ list-take ]
] cond ;
string-take ;
M: list ptake ( n object -- object )
#! Polymorphic take.
#! Return a collection of the first 'n'
#! characters from the original collection.
list-take ;
: string-drop ( n string -- string )
#! Return a string with the first 'n' characters
#! of the original string removed.
dup str-length pick < [
dup string-length pick < [
2drop ""
] [
str-tail
string-tail
] ifte ;
: list-drop ( n list -- list )
#! Return a list with the first 'n' items
#! of the original list removed.
>r pred dup 0 < [
>r 1 - dup 0 < [
drop r>
] [
r> cdr list-drop
] ifte ;
: pdrop ( n object -- object )
GENERIC: pdrop
M: string pdrop ( n object -- object )
#! Polymorphic drop.
#! Return a collection the same as 'object'
#! but with the first n items removed.
[
[ string? ] [ string-drop ]
[ list? ] [ list-drop ]
] cond ;
string-drop ;
: ifte-head= ( string-or-list ch [ quot1 ] [ quot2 ] -- )
#! When the character 'ch' is equal to the head
#! of the string or list, run the quot1 otherwise run quot2.
r> r> swap phead = r> r> ifte ;
M: list pdrop ( n object -- object )
#! Polymorphic drop.
#! Return a collection the same as 'object'
#! but with the first n items removed.
list-drop ;
: symbol ( ch -- parser )
#! Return a parser that parses the given symbol.
[ ( inp ch -- result )
2dup [
swap ptail cons lunit
: token-parser ( inp sequence -- llist )
#! A parser that parses a specific sequence of
#! characters.
2dup length swap ptake over = [
swap over length swap pdrop swons unit delay lunit
] [
2drop [ ]
] ifte-head=
] curry1 ;
2drop lnil
] ifte ;
: token ( string -- parser )
#! Return a parser that parses the given string.
[ ( inp string -- result )
2dup str-length swap ptake over = [
swap over str-length swap pdrop cons lunit
#! Return a token parser that parses the given string.
[ token-parser ] cons ;
: satisfy-parser ( inp pred -- llist )
#! A parser that succeeds if the predicate,
#! when passed the first character in the input, returns
#! true.
over pempty? [
2drop lnil
] [
2drop [ ]
over phead swap call [
ph:t swons unit delay lunit
] [
drop lnil
] ifte
] curry1 ;
] ifte ;
: satisfy ( p -- parser )
#! Return a parser that succeeds if the predicate 'p',
#! when passed the first character in the input, returns
#! true.
[ ( inp p -- result )
over pempty? [
2drop [ ]
] [
over phead swap call [
ph:t cons lunit
] [
drop [ ]
] ifte
] ifte
] curry1 ;
[ satisfy-parser ] cons ;
: satisfy2 ( p r -- parser )
#! Return a parser that succeeds if the predicate 'p',
: satisfy2-parser ( inp pred quot -- llist )
#! A parser that succeeds if the predicate,
#! when passed the first character in the input, returns
#! true. On success the word 'r' is called with the
#! successfully parser character on the stack. The result
#! of this is returned as the result of the parser.
[ ( inp p r -- result )
>r over phead swap call [
ph:t swap r> call swons lunit
#! true. On success the quotation is called with the
#! successfully parsed character on the stack. The result
#! of that call is returned as the result portion of the
#! successfull parse lazy list.
-rot over phead swap call [ ( quot inp -- )
ph:t >r swap call r> swons unit delay lunit
] [
r> 2drop [ ]
] ifte
] curry2 ;
2drop lnil
] ifte ;
: satisfy2 ( pred quot -- parser )
#! Return a satisfy2-parser.
[ satisfy2-parser ] cons cons ;
: epsilon-parser ( input -- llist )
#! A parser that parses the empty string. It
#! does not consume any input and always returns
#! an empty list as the parse tree with the
#! unmodified input.
"" cons unit delay lunit ;
: epsilon ( -- parser )
#! A parser that parses the empty string.
[ ( inp -- result )
"" swap cons lunit
] ;
#! Return an epsilon parser
[ epsilon-parser ] ;
: succeed ( r -- parser )
#! A parser that always returns 'r' and consumes no input.
[ ( inp r -- result )
swap cons lunit
] curry1 ;
: succeed-parser ( input result -- llist )
#! A parser that always returns 'result' as a
#! successful parse with no input consumed.
cons unit delay lunit ;
: succeed ( result -- parser )
#! Return a succeed parser.
[ succeed-parser ] cons ;
: fail-parser ( input -- llist )
#! A parser that always fails and returns
#! an empty list of successes.
drop lnil ;
: fail ( -- parser )
#! A parser that always fails
[
drop [ ]
] ;
#! Return a fail-parser.
[ fail-parser ] ;
USE: prettyprint
USE: unparser
: <&>-do-parser3 ( [[ x1 xs2 ]] x -- result )
#! Called by <&>-do-parser2 on each result of the
#! parse from parser2.
>r uncons r> ( x1 xs2 x )
swap cons cons ;
: ensure-list ( a -- [ a ] )
#! If 'a' is not a list, make it one.
dup list? [ unit ] unless ;
: <&>-do-parser2 ( [[ x xs ]] parser2 -- result )
#! Called by the <&>-parser on each result of the
#! successfull parse of parser1. It's input is the
#! cons containing the data parsed and the remaining
#! input. This word will parser2 on the remaining input
#! returning a new cons cell containing the combined
#! parse result.
>r unswons r> ( x xs parser2 )
call swap ( llist x )
[ <&>-do-parser3 ] cons lmap ;
: ++ ( a b -- [ a b ] )
#! Join two items into a list.
>r ensure-list r> ensure-list append ;
: <&>-parser ( input parser1 parser2 -- llist )
#! Parse 'input' by sequentially combining the
#! two parsers. First parser1 is applied to the
#! input then parser2 is applied to the rest of
#! the input strings from the first parser.
>r call r> ( [[ x xs ]] p2 -- result )
[ <&>-do-parser2 ] cons lmap lappend* ;
: <&> ( p1 p2 -- parser )
: <&> ( parser1 parser2 -- parser )
#! Sequentially combine two parsers, returning a parser
#! that first calls p1, then p2 all remaining results from
#! p1.
[ ( inp p1 p2 -- result )
>r call r> [ ( [ x | xs ] p2 -- result )
>r uncons r> call swap [ ( [ x2 | xs2 ] x -- result )
>r uncons swap r> swap ++ swons
] curry1 lmap
] curry1 lmap lappend*
] curry2 ;
[ <&>-parser ] cons cons ;
: <|>-parser ( input parser1 parser2 -- result )
#! Return the combined list resulting from the parses
#! of parser1 and parser2 being applied to the same
#! input. This implements the choice parsing operator.
>r dupd call swap r> call lappend ;
: <|> ( p1 p2 -- parser )
#! Choice operator for parsers. Return a parser that does
#! p1 or p2 depending on which will succeed.
[ ( inp p1 p2 -- result )
rot tuck swap call >r swap call r> lappend
] curry2 ;
[ <|>-parser ] cons cons ;
: p-abc ( -- parser )
#! Test Parser. Parses the string "abc"
"a" token "b" token "c" token <&> <&> ;
: string-ltrim ( string -- string )
#! Return a new string without any leading whitespace
#! from the original string.
dup phead blank? [ ptail string-ltrim ] when ;
: parse-skipwhite ( string -- string )
dup phead blank? [
ptail parse-skipwhite
] [
] ifte ;
: sp-parser ( input parser -- result )
#! Skip all leading whitespace from the input then call
#! the parser on the remaining input.
>r string-ltrim r> call ;
: sp ( parser -- parser )
#! Return a parser that first skips all whitespace before
#! parsing.
[ ( inp parser -- result )
>r parse-skipwhite r> call
] curry1 ;
#! calling the original parser.
[ sp-parser ] cons ;
: just-parser ( input parser -- result )
#! Calls the given parser on the input removes
#! from the results anything where the remaining
#! input to be parsed is not empty. So ensures a
#! fully parsed input string.
call [ car pempty? ] lsubset ;
: just ( parser -- parser )
#! Return a parser that works exactly like the input parser
#! but guarantees that the rest string is empty.
[ ( inp parser -- result )
call [ ( [ x | xs ] -- )
cdr str-length 0 =
] lsubset
] curry1 ;
#! Return an instance of the just-parser.
[ just-parser ] cons ;
: <@ ( p f -- parser )
#! Given a parser p and a quotation f return a parser
#! that does the same as p but in addition applies f
#! to the resulting parse tree.
[ ( inp p f -- result )
>r call r> [ ( [ x | xs ] f -- [ fx | xs ] )
swap uncons r> swap over [ call ] [ drop ] ifte r> cons
] curry1 lmap
] curry2 ;
: (<@-parser-replace) ( [[ inp result ]] quot -- [[ inp new-result ]] )
#! Perform the result replacement step of <@-parser.
#! Given a successfull parse result, calls the quotation
#! with the result portion on the stack. The result of
#! that call is then used as the new result.
swap uncons rot call cons ;
: p-1 ( -- parser )
"1" token "123" swap call lcar ;
: <@-parser ( input parser quot -- result )
#! Calls the parser on the input. For each successfull
#! parse the quot is call with the parse result on the stack.
#! The result of that quotation then becomes the new parse result.
#! This allows modification of parse tree results (like
#! converting strings to integers, etc).
-rot call dup lnil? [ ( quot lnil -- )
nip
] [ ( quot result -- )
[ (<@-parser-replace) ] rot swons lmap
] ifte ;
: p-2 ( -- parser )
"1" token [ str>number ] <@ "123" swap call lcar ;
: <@ ( parser quot -- parser )
#! Return an <@-parser.
[ <@-parser ] cons cons ;
: some ( parser -- det-parser )
#! Given a parser, return a parser that only produces the
#! resulting parse tree of the first successful complete parse.
[ ( inp parser -- result )
just call lcar car
] curry1 ;
: some-parser ( input parser -- result )
#! Calls the parser on the input, guarantees
#! the parse is complete (the remaining input is empty),
#! picks the first solution and only returns the parse
#! tree since the remaining input is empty.
just call lcar cdr ;
: delayed-parser ( [ parser ] -- parser )
[ ( inp [ parser ] -- result )
call call
] curry1 ;
: some ( parser -- deterministic-parser )
#! Creates a 'some-parser'.
[ some-parser ] cons ;
: parens ;
: parens ( -- parser )
#! Parse nested parentheses
"(" token [ parens ] delayed-parser <&>
")" token <&> [ parens ] delayed-parser <&>
epsilon <|> ;
: nesting ( -- parser )
#! Count the maximum depth of nested parentheses.
"(" token [ nesting ] delayed-parser <&> ")" token <&>
[ nesting ] delayed-parser <&> [ .s drop "a" ] <@ epsilon <|> ;
: <&-parser ( input parser1 parser2 -- result )
#! Same as <&> except discard the results of the second parser.
<&> [ phead ] <@ call ;
: <& ( parser1 parser2 -- parser )
#! Same as <&> except only return the first item in the parse tree.
<&> [ pfirst ] <@ ;
#! Same as <&> except discard the results of the second parser.
[ <&-parser ] cons cons ;
: &>-parser ( input parser1 parser2 -- result )
#! Same as <&> except discard the results of the first parser.
<&> [ ptail ] <@ call ;
: &> ( parser1 parser2 -- parser )
#! Same as <&> except only return the second item in the parse tree.
<&> [ psecond ] <@ ;
#! Same as <&> except discard the results of the first parser.
[ &>-parser ] cons cons ;
: lst ( [ x [ xs ] ] -- [x:xs] )
#! I need a good name for this word...
dup cdr [ uncons car cons ] when unit ;
: (a,(b,c))>((a,b,c)) ( list -- list )
#! Convert a list where the car is a single value
#! and the cdr is a list to a list containing a flattened
#! list.
uncons car cons unit ;
: <:&>-parser ( input parser1 parser2 -- result )
#! Same as <&> except postprocess the result with
#! (a,(b,c))>((a,b,c)).
<&> [ (a,(b,c))>((a,b,c)) ] <@ call ;
: <:&> ( parser1 parser2 -- parser )
#! Same as <&> except postprocess the result with
#! (a,(b,c))>((a,b,c)).
[ <:&>-parser ] cons cons ;
DEFER: <*>
: (<*>) ( parser -- parser )
#! Non-delayed implementation of <*>
dup <*> <:&> [ ] succeed <|> ;
: <*> ( parser -- parser )
#! Return a parser that accepts zero or more occurences of the original
#! parser.
dup [ <*> ] curry1 delayed-parser <&> [ lst ] <@ [ ] succeed <|> ;
[ (<*>) call ] cons ;
: (<+>) ( parser -- parser )
#! Non-delayed implementation of <+>
dup <*> <:&> ;
: <+> ( parser -- parser )
#! Return a parser that accepts one or more occurences of the original
#! parser.
dup [ <*> ] curry1 delayed-parser <&> [ lst ] <@ ;
[ (<+>) call ] cons ;
: (<?>) ( parser -- parser )
#! Non-delayed implementation of <?>
[ unit ] <@ [ ] succeed <|> ;
: <?> ( parser -- parser )
#! Return a parser where its construct is optional. It may or may not occur.
[ ] succeed <|> ;
#! Return a parser that optionally uses the parser
#! if that parser would be successfull.
[ (<?>) call ] cons ;
: <first> ( parser -- parser )
#! Transform a parser into a parser that only returns the first success.
[
call dup [ lcar lunit ] when
] curry1 ;
USE: prettyprint
USE: parser
USE: unparser
USE: stdio
: <!*> ( parser -- parser )
#! Version of <*> that only returns the first success.
<*> <first> ;
! Testing <&>
: test1 "abcd" "a" token "b" token <&> call [ . ] leach ;
: test1a "abcd" "a" token "b" token <&> "c" token <&> call [ . ] leach ;
: test1b "abcd" "a" token "b" token "c" token <&> <&> call [ . ] leach ;
: test2 "decd" "a" token "b" token <&> call [ . ] leach ;
: test3 "dbcd" "a" token "b" token <&> call [ . ] leach ;
: test4 "adcd" "a" token "b" token <&> call [ . ] leach ;
: <!+> ( parser -- parser )
#! Version of <+> that only returns the first success.
<+> <first> ;
! Testing <|>
: test5 "abcd" "a" token "b" token <|> call [ . ] leach ;
: test6 "bbcd" "a" token "b" token <|> call [ . ] leach ;
: test7 "cbcd" "a" token "b" token <|> call [ . ] leach ;
: ab-test
"a" token <*> "b" token <&> "aaaaab" swap call [ . ] leach ;
! Testing sp
: test8 " abcd" "a" token call [ . ] leach ;
: test9 " abcd" "a" token sp call [ . ] leach ;
: ab-test2
"a" token <*> "b" token <&> [ "a" "a" "a" "b" ] swap call [ . ] leach ;
! Testing just
: test10 "abcd" "abcd" token "abc" token <|> call [ . ] leach ;
: test11 "abcd" "abcd" token "abc" token <|> just call [ . ] leach ;
: a "a" token "a" token <&> epsilon <|> ;
: b "b" token epsilon <|> ;
: c "c" token "c" token <&> ;
: d "d" token "d" token <&> ;
: count-a "a" token [ count-a ] delayed-parser &> "b" token <& [ 1 + ] <@ 0 succeed <|> ;
: tca "aaabbb" count-a call [ . ] leach ;
! Testing <@
: test12 "01234" [ digit? ] satisfy call [ . ] leach ;
: test13 "01234" [ digit? ] satisfy [ digit> ] <@ call [ . ] leach ;
: parse-digit ( -- parser )
#! Return a parser for digits
[ digit? ] satisfy [ CHAR: 0 - ] <@ ;
! Testing some
: test14 "begin1" "begin" token call [ . ] leach ;
: test15 "This should fail with an error" print
"begin1" "begin" token some call . ;
: test16 "begin" "begin" token some call . ;
: (reduce) ( start quot list -- value )
#! Call quot with start and the first value in the list.
#! quot is then called with the result of quot and the
#! next item in the list until the list is exhausted.
uncons >r swap dup swap r> call r> r> dup [
(reduce)
] [
2drop
] ifte ;
! parens test function
: parens ( -- parser )
#! Return a parser that parses nested parentheses.
[ "(" token parens <&> ")" token <&> parens <&> epsilon <|> call ] ;
: reduce ( list start quot -- value )
#! Call quot with start and the first value in the list.
#! quot is then called with the result of quot and the
#! next item in the list until the list is exhausted.
rot (reduce) ;
: test17 "" parens call [ . ] leach ;
: test18 "()" parens call [ . ] leach ;
: test19 "((()))" parens call [ . ] leach ;
: natural ( -- parser )
#! a parser for natural numbers.
parse-digit <*> [ car 0 [ swap 10 * + ] reduce unit ] <@ ;
! <& parser and &> parser
: test20 "abcd" "a" token "b" token <&> call [ . ] leach ;
: test21 "abcd" "a" token "b" token <& call [ . ] leach ;
: test22 "abcd" "a" token "b" token &> call [ . ] leach ;
: natural2 ( -- parser )
#! a parser for natural numbers.
parse-digit <!+> [ car 0 [ swap 10 * + ] reduce unit ] <@ ;
! nesting example
: parens-open "(" token ;
: parens-close ")" token ;
: nesting
[ parens-open
nesting &>
parens-close <&
nesting <&>
[ unswons 1 + max ] <@
0 succeed <|>
call ] ;
: integer ( -- parser )
#! A parser that can parser possible negative numbers.
"-" token <?> [ drop -1 ] <@ natural2 <&> [ 1 [ * ] reduce ] <@ ;
: test23 "" nesting just call [ . ] leach ;
: test24 "()" nesting just call [ . ] leach ;
: test25 "(())" nesting just call [ . ] leach ;
: test26 "()(()(()()))()" nesting just call [ . ] leach ;
: identifier ( -- parser )
#! Parse identifiers
[ letter? ] satisfy <+> [ car cat ] <@ ;
! Testing <*> and <:&>
: test27 "1234" "1" token <*> call [ . ] leach ;
: test28 "1111234" "1" token <*> call [ . ] leach ;
: test28a "1111234" "1" token <*> [ car cat unit ] <@ call [ . ] leach ;
: test29 "234" "1" token <*> call [ . ] leach ;
: pdigit [ digit? ] satisfy [ digit> ] <@ ;
: pnatural pdigit <*> ;
: pnatural2 pnatural [ car [ >digit ] map cat dup pempty? [ drop 0 ] [ str>number ] ifte unit ] <@ ;
: test30 "12345" pnatural2 call [ . ] leach ;
: identifier2 ( -- parser )
#! Parse identifiers
[ letter? ] satisfy <!+> [ car cat ] <@ ;
! Testing <+>
: test31 "1234" "1" token <+> call [ . ] leach ;
: test32 "1111234" "1" token <+> call [ . ] leach ;
: test33 "234" "1" token <+> call [ . ] leach ;
: ints ( -- parser )
integer "+" token [ drop [ [ + ] ] ] <@ <&>
integer <&> [ call swap call ] <@ ;
: url-quotable ( -- parser )
! [a-zA-Z0-9/_?] re-matches
[ letter? ] satisfy
[ LETTER? ] satisfy <|>
[ digit? ] satisfy <|>
CHAR: / symbol <|>
CHAR: _ symbol <|>
CHAR: ? symbol <|> just ;
: http-header ( -- parser )
[ CHAR: : = not ] satisfy <!+> [ car cat ] <@
": " token [ drop f ] <@ <&>
[ drop t ] satisfy <!+> [ car cat ] <@ <&> just ;
: parse-http-header ( string -- [ name value ] )
http-header call lcar car ;
: get-request ( -- parser )
"GET" token
[ drop t ] satisfy <!+> sp [ car cat ] <@ <&> ;
: post-request ( -- parser )
"POST" token
[ drop t ] satisfy <!+> sp [ car cat ] <@ <&> ;
: all-request ( -- parser )
"POST" token
[ 32 = not ] satisfy <!+> sp [ car cat ] <@ <&>
"HTTP/1.0" token sp <&> ;
: split-url ( -- parser )
"http://" token
[ CHAR: / = not ] satisfy <!*> [ car cat ] <@ <&>
"/" token <&>
[ drop t ] satisfy <!*> [ car cat ] <@ <&> ;
! Testing <?>
: test34 "ab" "a" token pdigit <?> <&> "b" token <&> call [ . ] leach ;
: test35 "ac" "a" token pdigit <?> <&> "b" token <&> call [ . ] leach ;
: test36 "a5b" "a" token pdigit <?> <&> "b" token <&> call [ . ] leach ;
: pinteger "-" token <?> pnatural2 <&> [ uncons swap [ car -1 * ] when ] <@ ;
: test37 "123" pinteger call [ . ] leach ;
: test38 "-123" pinteger call [ . ] leach ;

View File

@ -16,17 +16,17 @@
operation.</p>
<p>The result returned by a parser is known as a 'list of
successes'. It is a lazy list of standard Factor cons cells. Each cons
cell is a result of a parse. The car of the cell is the result of the
parse operation and the cdr of the cell is the remaining input left to
be parsed.</p>
<p>A list is used for the result as a parse operation can potentially
cell is a result of a parse. The car of the cell is the remaining
input left to be parsed and the cdr of the cell is the result of the
parsing operation.</p>
<p>A lazy list is used for the result as a parse operation can potentially
return many successful results. For example, a parser that parses one
or more digits will return more than one result for the input "123". A
successful parse could be "1", "12" or "123".</p>
<p>The list is lazy so if only one parse result is required the
remaining results won't actually be processed if they are not
requested. This improves efficiency.</p>
<p>The car of the result pair can be any value that the parser wishes
<p>The cdr of the result pair can be any value that the parser wishes
to return. It could be the successful portion of the input string
parsed, an abstract syntax tree representing the parsed input, or even
a quotation that should get called for later processing.</p>
@ -39,32 +39,32 @@ general parsers work and the 'list of sucesses' that are returned as a
result.</p>
<pre class="code">
(1) : char-a ( inp -- result )
0 over str-nth CHAR: a = [
1 str-tail CHAR: a swons lunit
0 over string-nth CHAR: a = [
1 swap string-tail CHAR: a cons unit delay lunit
] [
drop f
drop lnil
] ifte ;
(2) "atest" char-a [ [ . ] leach ] when*
=> [ 97 | "test" ]
=&gt; [[ "test" 97 ]]
(3) "test" char-a [ [ . ] leach ] when*
=>
=&gt;
</pre>
<p>'char-a' is a parser that only accepts the character 'a' in the
input string. When passed an input string with a string with a leading
'a' then the 'list of successes' has 1 result value. The car of that
result value is the character 'a' successfully parsed, and the cdr is
'a' then the 'list of successes' has 1 result value. The cdr of that
result value is the character 'a' successfully parsed, and the car is
the remaining input string. On failure of the parse an empty list is
returned.</p>
<p>The parser combinator library provides a combinator, <&>, that takes
<p>The parser combinator library provides a combinator, &lt;&amp;&gt;, that takes
two parsers off the stack and returns a parser that calls the original
two in sequence. An example of use would be calling 'char-a' twice,
which would then result in an input string expected with two 'a'
characters leading:</p>
<pre class="code">
(1) "aatest" [ char-a ] [ char-a ] <&> call
=> < list of successes >
(1) "aatest" [ char-a ] [ char-a ] &lt;&amp;&gt; call
=&gt; < list of successes >
(2) [ . ] leach
=> [ [ 97 97 ] | "test" ]
=&gt; [[ "test" [[ 97 97 ]] ]]
</pre>
<h2>Tokens</h2>
<p>Creating parsers for specfic characters and tokens can be a chore
@ -72,13 +72,13 @@ so there is a word that, given a string token on the stack, returns
a parser that parses that particular token:</p>
<pre class="code">
(1) "begin" token
=> < a parser that parses the token "begin" >
(2) dup "this should fail" swap call .
=> f
=&gt; < a parser that parses the token "begin" >
(2) dup "this should fail" swap call lnil? .
=&gt; t
(3) "begin a successfull parse" swap call
=> < lazy list >
=&gt; < lazy list >
(4) [ . ] leach
=> [ "begin" | " a successfull parse" ]
=&gt; [[ " a successfull parse" "begin" ]]
</pre>
<h2>Predicate matching</h2>
<p>The word 'satisfy' takes a quotation from the top of the stack and
@ -89,9 +89,9 @@ true then the parse is successful, otherwise it fails:</p>
(1) : digit-parser ( -- parser )
[ digit? ] satisfy ;
(2) "5" digit-parser call [ . ] leach
=> [ 53 | "" ]
(3) "a" digit-parser call
=> f
=&gt; [[ "" 53 ]]
(3) "a" digit-parser call lnil? .
=&gt; t
</pre>
<p>Note that 'digit-parser' returns a parser, it is not the parser
itself. It is really a parser generating word like 'token'. Whereas
@ -105,14 +105,14 @@ parser parsed. The result of the '<*>' generated parser will be a list
of the successful results returned by the original parser.</p>
<pre class="code">
(1) digit-parser <*>
=> < parser >
=&gt; < parser >
(2) "123" swap call
=> < lazy list >
=&gt; < lazy list >
(3) [ . ] leach
=> [ [ [ 49 50 51 ] ] | "" ]
[ [ [ 49 50 ] ] | "3" ]
[ [ [ 49 ] ] | "23" ]
[ f | "123" ]
=&gt; [ "" [ 49 50 51 ] ]
[ "3" [ 49 50 ] ]
[ "23" [ 49 ] ]
[ "123" ]
</pre>
<p>In this case there are multiple successful parses. This is because
the occurrence of zero or more digits happens more than once. There is
@ -124,7 +124,7 @@ remaining parse results are never produced.</p>
parsed. Sometimes you want this to be something else, like an abstract
syntax tree, or some calculation. For the digit case we may want the
actual integer number.</p>
<p>For this we can use the '<@' parser
<p>For this we can use the '&lt;@' parser
combinator. This combinator takes a parser and a quotation on the
stack and returns a new parser. When the new parser is called it will
call the original parser to produce the results, then it will call the
@ -132,27 +132,48 @@ quotation on each successfull result, and the result of that quotation
will be the result of the parse:</p>
<pre class="code">
(1) : digit-parser2 ( -- parser )
[ digit? ] satisfy [ CHAR: 0 - ] <@ ;
[ digit? ] satisfy [ digit> ] &lt;@ ;
(2) "5" digit-parser2 call [ . ] leach
=> [ 5 | "" ]
=&gt; [[ "" 5 ]]
</pre>
<p>Notice that now the result is the actual integer '5' rather than
character code '53'.</p>
<pre class="code">
(1) : natural-parser ( -- parser )
digit-parser2 <*> [ car 0 [ swap 10 * + ] reduce unit ] <@ ;
(2) "123" natural-parser call
=> < lazy list >
(3) [ . ] leach
=> [ [ 123 ] | "" ]
[ [ 12 ] | "3" ]
[ [ 1 ] | "23" ]
[ f | "123" ]
(1) : digit-list>number ( list -- number )
#! Converts a list of digits to a number
[ >digit ] map cat dup string-length 0 = [
drop 0
] [
str>number
] ifte ;
(2) : natural-parser ( -- parser )
digit-parser2 <*> [ car digit-list>number unit ] &lt;@ ;
(3) "123" natural-parser call
=&gt; < lazy list >
(4) [ . ] leach
=&gt; [ "" 123 ]
[ "3" 12 ]
[ "23" 1 ]
[ "123" 0 ]
[ [ 123 ] | "" ]
</pre>
<p>The number parsed is the actual integer number due to the operation
of the '<@' word. This allows parsers to not only parse the input
of the '&lt;@' word. This allows parsers to not only parse the input
string but perform operations and transformations on the syntax tree
returned.</p>
<p>A useful debugging method to work out what to use in the quotation
passed to &lt;@ is to write an initial version of the parser that just
displays the topmost item on the stack:</p>
<pre class="code">
(1) : natural-parser-debug ( -- parser )
digit-parser2 <*> [ "debug: " write dup . ] &lt;@ ;
(3) "123" natural-parser-debug call lcar .
=&gt; debug: [ [ 1 2 3 ] ]
[ "" [ 1 2 3 ] ]
</pre>
<p>From the debug output we can see how to manipulate the result to
get what we want. In this case it's the quotation in the previous example.</p>
<h2>Sequential combinator</h2>
<p>To create a full grammar we need a parser combinator that does
sequential compositions. That is, given two parsers, the sequential
@ -160,41 +181,101 @@ combinator will first run the first parser, and then run the second on
the remaining text to be parsed. As the first parser returns a lazy
list, the second parser will be run on each item of the lazy list. Of
course this is done lazily so it only ends up being done when those
list items are requested. The sequential combinator word is <&>.</p>
list items are requested. The sequential combinator word is &lt;&amp;&gt;.</p>
<pre class="code">
( 1 ) "number:" token
=> < parser that parses the text 'number:' >
( 2 ) natural
=> < parser that parses natural numbers >
( 3 ) <&>
=> < parser that parses 'number:' followed by a natural >
( 4 ) "number:1000" swap call
=> < list of successes >
=&gt; < parser that parses the text 'number:' >
( 2 ) natural-parser
=&gt; < parser that parses natural numbers >
( 3 ) &lt;&amp;&gt;
=&gt; < parser that parses 'number:' followed by a natural >
( 4 ) "number:100" swap call
=&gt; < list of successes >
( 5 ) [ . ] leach
=> [ [ "number:" 1000 ] | "" ]
[ [ "number:" 100 ] | "0" ]
[ [ "number:" 10 ] | "00" ]
[ [ "number:" 1 ] | "000" ]
[ [ "number:" ] | "1000" ]
=&gt; [ "" "number:" 100 ]
[ "0" "number:" 10 ]
[ "00" "number:" 1 ]
[ "100" "number:" 0 ]
</pre>
<p>In this example we might prefer not to have the parse result
contain the token, we want just the number. Two alternatives to &lt;&amp;&gt;
provide the ability to select which result to use from the two
parsers. These operators are &lt;&amp; and &amp;&gt;. The &lt; or &gt; points
in the direction of which parser to retain the results from. So our
example above could be:</p>
<pre class="code">
( 1 ) "number:" token
=&gt; < parser that parses the text 'number:' >
( 2 ) natural-parser
=&gt; < parser that parses natural numbers >
( 3 ) &amp;&gt;
=&gt; < parser that parses 'number:' followed by a natural >
( 4 ) "number:100" swap call
=&gt; < list of successes >
( 5 ) [ . ] leach
=&gt; [ "" 100 ]
[ "0" 10 ]
[ "00" 1 ]
[ "100" 0 ]
</pre>
<p>Notice how the parse result only contains the number due to &&gt;
being used to retain the result of the second parser.</p>
<h2>Choice combinator</h2>
<p>As well as a sequential combinator we need an alternative
combinator. The word for this is <|>. It takes two parsers from the
combinator. The word for this is &lt;|&gt;. It takes two parsers from the
stack and returns a parser that will first try the first parser. If it
succeeds then the result for that is returned. If it fails then the
second parser is tried and its result returned.</p>
<pre class="code">
( 1 ) "one" token
=> < parser that parses the text 'one' >
=&gt; < parser that parses the text 'one' >
( 2 ) "two" token
=> < parser that parses the text 'two' >
( 3 ) <|>
=> < parser that parses 'one' or 'two' >
=&gt; < parser that parses the text 'two' >
( 3 ) &lt;|&gt;
=&gt; < parser that parses 'one' or 'two' >
( 4 ) "one" over call [ . ] leach
=> [ "one" | "" ]
=&gt; [[ "" "one" ]]
( 5 ) "two" swap call [ . ] leach
=> [ "two" | "" ]
=&gt; [[ "" "two" ]]
</pre>
<h2>Option combinator</h2>
<p>The option combinator, &lt;?&gt; allows adding optional elements to
a parser. It takes one parser off the stack and if the parse succeeds
add it to the result tree, otherwise it will ignore it and
continue. The example below extends our natural-parser to parse
integers with an optional leading minus sign.</p>
<pre class="code">
( 1 ) : integer-parser
"-" token <?> natural-parser &lt;&amp;&gt; ;
( 2 ) "200" integer-parser call [ . ] leach
=&gt; [ "" [ ] 200 ]
[ "0" [ ] 20 ]
[ "00" [ ] 2 ]
[ "200" [ ] 0 ]
( 3 ) "-200" integer-parser call [ . ] leach
=&gt; [ "" [ "-" ] 200 ]
[ "0" [ "-" ] 20 ]
[ "00" [ "-" ] 2 ]
[ "200" [ "-" ] 0 ]
[ "-200" [ ] 0 ]
( 4 ) : integer-parser2
integer-parser [ uncons swap [ car -1 * ] when ] &lt;@ ;
( 5 ) "200" integer-parser2 call [ . ] leach
=&gt; [ "" 200 ]
[ "0" 20 ]
[ "00" 2 ]
[ "200" 0 ]
( 6 ) "-200" integer-parser2 call [ . ] leach
=&gt; [ "" -200 ]
[ "0" -20 ]
[ "00" -2 ]
[ "200" 0 ]
[ "-200" 0 ]
</pre>
<h2>Skipping Whitespace</h2>
<p>A parser transformer exists, the word 'sp', that takes an existing
parser and returns a new one that will first skip any whitespace
@ -202,59 +283,54 @@ before calling the original parser. This makes it easy to write
grammers that avoid whitespace without having to explicitly code it
into the grammar.</p>
<pre class="code">
( 1 ) natural
=> < a parser for natural numbers >
( 2 ) "+" token sp
=> < parser for '+' which ignores leading whitespace >
( 3 ) over sp
=> < a parser for natural numbers skipping leading whitespace >
( 4 ) <&> <&>
=> < a parser for natural + natural >
( 5 ) "1 + 2" over call lcar .
=> [ [ 1 "+" 2 ] | "" ]
( 6 ) "3+4" over call lcar .
=> [ [ 3 "+" 4 ] | "" ]
( 1 ) " 123" natural-parser call [ . ] leach
=&gt; [ " 123" 0 ]
( 2 ) " 123" natural-parser sp call [ . ] leach
=&gt; [ "" 123 ]
[ "3" 12 ]
[ "23" 1 ]
[ "123" 0 ]
</pre>
<h2>Eval grammar example</h2>
<p>This example presents a simple grammar that will parse a number
followed by an operator and another number. A factor expression that
computes the entered value will be executed.</p>
<pre class="code">
( 1 ) natural
=> < a parser for natural numbers >
( 2 ) "/" token "*" token "+" token "-" token <|> <|> <|>
=> < a parser for the operator >
( 3 ) sp [ unit [ eval ] append unit ] <@
=> < operator parser that skips whitespace and converts to a
( 1 ) natural-parser
=&gt; < a parser for natural numbers >
( 2 ) "/" token "*" token "+" token "-" token &lt;|&gt; &lt;|&gt; &lt;|&gt;
=&gt; < a parser for the operator >
( 3 ) sp [ "\\ " swap cat2 eval unit ] &lt;@
=&gt; < operator parser that skips whitespace and converts to a
factor expression >
( 4 ) natural sp
=> < a whitespace skipping natural parser >
( 5 ) <&> <&> [ call swap call ] <@
=> < a parser that parsers the expression, converts it to
( 4 ) natural-parser sp
=&gt; < a whitespace skipping natural parser >
( 5 ) &lt;&amp;&gt; &lt;&amp;&gt; [ uncons uncons swap append append call ] &lt;@
=&gt; < a parser that parsers the expression, converts it to
factor, calls it and puts the result in the parse tree >
( 6 ) "123 + 456" over call lcar .
=> [ 579 | "" ]
=&gt; [[ "" 579 ]]
( 7 ) "300-100" over call lcar .
=> [ 200 | "" ]
=&gt; [[ "" 200 ]]
( 8 ) "200/2" over call lcar .
=> [ 100 | "" ]
=&gt; [[ "" 100 ]]
</pre>
<p>It looks complicated when expanded as above but the entire parser,
factored a little, looks quite readable:</p>
<pre class="code">
( 1 ) : operator ( -- parser )
"/" token
"*" token <|>
"+" token <|>
"-" token <|>
[ unit [ eval ] append unit ] <@ ;
"*" token &lt;|&gt;
"+" token &lt;|&gt;
"-" token &lt;|&gt;
[ "\\ " swap cat2 eval unit ] &lt;@ ;
( 2 ) : expression ( -- parser )
natural
operator sp <&>
natural sp <&>
[ call swap call ] <@ ;
natural-parser
operator sp &lt;&amp;&gt;
natural-parser sp &lt;&amp;&gt;
[ uncons swap uncons -rot append append reverse call ] &lt;@ ;
( 3 ) "40+2" expression call lcar .
=> [ 42 | "" ]
=&gt; [[ "" 42 ]]
</pre>
<p class="footer">
News and updates to this software can be obtained from the authors