Get lazy lists and parser combinators working with new factor.
parent
19bc49ece3
commit
aa5b8fe510
|
@ -28,17 +28,21 @@ USE: lists
|
|||
USE: combinators
|
||||
USE: kernel
|
||||
USE: logic
|
||||
USE: sequences
|
||||
USE: namespaces
|
||||
|
||||
: lfrom ( n -- llist )
|
||||
#! Return a lazy list of increasing numbers starting
|
||||
#! from the initial value 'n'.
|
||||
dup [ succ lfrom ] curry1 lcons ;
|
||||
dup unit delay swap
|
||||
[ 1 + lfrom ] cons delay lcons ;
|
||||
|
||||
: lfrom-by ( n quot -- llist )
|
||||
#! Return a lazy list of values starting from n, with
|
||||
#! each successive value being the result of applying quot to
|
||||
#! n.
|
||||
dupd [ dup [ call ] dip lfrom-by ] curry2 lcons ;
|
||||
swap dup unit delay -rot
|
||||
[ , dup , \ call , , \ lfrom-by , ] make-list delay lcons ;
|
||||
|
||||
: lnaturals 0 lfrom ;
|
||||
: lpositves 1 lfrom ;
|
||||
|
@ -56,8 +60,9 @@ USE: logic
|
|||
: sieve ( llist - llist )
|
||||
#! Given a lazy list of numbers, use the sieve of eratosthenes
|
||||
#! algorithm to return a lazy list of primes.
|
||||
luncons over [ divisible-by? not ] curry1 lsubset [ sieve ] curry1 lcons ;
|
||||
luncons over [ divisible-by? not ]
|
||||
cons lsubset [ sieve ] cons delay >r unit delay r> lcons ;
|
||||
|
||||
: lprimes 2 lfrom sieve ;
|
||||
|
||||
: first-ten-primes 10 lprimes ltake ;
|
||||
: first-ten-primes 10 lprimes ltake llist>list ;
|
|
@ -21,152 +21,277 @@
|
|||
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
IN: lazy
|
||||
USE: lists
|
||||
USE: stack
|
||||
USE: math
|
||||
USE: stdio
|
||||
USE: prettyprint
|
||||
USE: kernel
|
||||
USE: combinators
|
||||
USE: logic
|
||||
USE: sequences
|
||||
USE: namespaces
|
||||
USE: lists
|
||||
USE: math
|
||||
|
||||
: curry1 ( n quot -- quot )
|
||||
#! Return a quotation that when called will initially
|
||||
#! have 'n' pushed on the stack.
|
||||
cons ;
|
||||
TUPLE: promise quot forced? value ;
|
||||
|
||||
: curry2 ( n1 n2 quot -- quot )
|
||||
#! Return a quotation that when called will initially
|
||||
#! have 'n1' and 'n2' pushed on the stack.
|
||||
cons cons ;
|
||||
: delay ( quot -- <promise> )
|
||||
#! Given a quotation, create a promise which may later be forced.
|
||||
#! When forced the quotation will execute returning the value. Future
|
||||
#! forces of the promise will return that value and not re-execute
|
||||
#! the quotation.
|
||||
f f <promise> ;
|
||||
|
||||
: delay ( value -- promise )
|
||||
#! Return a promise that when 'forced' returns the original value.
|
||||
unit ;
|
||||
: (force) ( <promise> -- value )
|
||||
#! Force the given promise leaving the value of calling the
|
||||
#! promises quotation on the stack. Re-forcing the promise
|
||||
#! will return the same value and not recall the quotation.
|
||||
dup promise-forced? [
|
||||
dup promise-quot call over set-promise-value
|
||||
t over set-promise-forced?
|
||||
] unless
|
||||
promise-value ;
|
||||
|
||||
: force ( promise -- value )
|
||||
#! Return the value associated with the promise.
|
||||
call ;
|
||||
: force ( <promise> -- value )
|
||||
(force) dup promise? [
|
||||
force
|
||||
] when ;
|
||||
|
||||
: lcons ( car promise -- lcons )
|
||||
#! Return a lazy pair, where the cdr is a promise and must
|
||||
#! be forced to return the value.
|
||||
cons ;
|
||||
TUPLE: lcons car cdr ;
|
||||
|
||||
: lunit ( a -- llist )
|
||||
#! Construct a lazy list of one element.
|
||||
[ ] delay lcons ;
|
||||
SYMBOL: lazy-nil
|
||||
DEFER: lnil
|
||||
[ [ ] ] delay lazy-nil set
|
||||
|
||||
: lnil ( -- lcons )
|
||||
#! Return the nil lazy list.
|
||||
lazy-nil get ;
|
||||
|
||||
: lnil? ( lcons -- bool )
|
||||
#! Is the given lazy cons the nil value
|
||||
force not ;
|
||||
|
||||
: lcar ( lcons -- car )
|
||||
#! Return the car of a lazy pair.
|
||||
car ;
|
||||
#! Return the value of the head of the lazy list.
|
||||
dup lnil? [
|
||||
force lcons-car (force)
|
||||
] unless ;
|
||||
|
||||
: lcdr ( lcons -- cdr )
|
||||
#! Return the cdr of a lazy pair, implicitly forcing it.
|
||||
cdr force ;
|
||||
#! Return the value of the rest of the lazy list.
|
||||
#! This is itself a lazy list.
|
||||
dup lnil? [
|
||||
force lcons-cdr (force)
|
||||
] unless ;
|
||||
|
||||
: lcons ( lcar lcdr -- promise )
|
||||
#! Given a car and cdr, both lazy values, return a lazy cons.
|
||||
swap [ , , \ <lcons> , ] make-list delay ;
|
||||
|
||||
: lunit ( lvalue -- llist )
|
||||
#! Given a lazy value (a quotation that when called produces
|
||||
#! the value) produce a lazy list containing that value.
|
||||
[ lnil ] delay lcons ;
|
||||
|
||||
: lnth ( n llist -- value )
|
||||
#! Return the nth item in a lazy list
|
||||
swap [ lcdr ] times lcar ;
|
||||
|
||||
: luncons ( lcons -- car cdr )
|
||||
#! Return the car and forced cdr of the lazy cons.
|
||||
uncons force ;
|
||||
|
||||
: (ltake) ( n llist accum -- list )
|
||||
>r >r pred dup 0 < [
|
||||
drop r> drop r> reverse
|
||||
] [
|
||||
r> luncons swap r> cons (ltake)
|
||||
] ifte ;
|
||||
|
||||
: ltake ( n llist -- list )
|
||||
#! Return a list containing the first n items from
|
||||
#! the lazy list.
|
||||
[ ] (ltake) ;
|
||||
#! Return the car and cdr of the lazy list
|
||||
dup lcar swap lcdr ;
|
||||
|
||||
: lmap ( llist quot -- llist )
|
||||
#! Return a lazy list containing the collected result of calling
|
||||
#! quot on the original lazy list.
|
||||
over [ ] = [
|
||||
2drop [ ]
|
||||
over lnil? [
|
||||
drop
|
||||
] [
|
||||
>r luncons r>
|
||||
dup swapd
|
||||
[ lmap ] curry2
|
||||
>r call r>
|
||||
swap 2dup
|
||||
[ , \ lcdr , , \ lmap , ] make-list delay >r
|
||||
[ , \ lcar , , \ call , ] make-list delay r>
|
||||
lcons
|
||||
] ifte ;
|
||||
|
||||
: ltake ( n llist -- llist )
|
||||
#! Return a lazy list containing the first n items from
|
||||
#! the original lazy list.
|
||||
over 0 = [
|
||||
2drop lnil
|
||||
] [
|
||||
dup lnil? [
|
||||
nip
|
||||
] [
|
||||
swap dupd ( llist llist n -- )
|
||||
[ [ 1 - ] cons , \ call , , \ lcdr , \ ltake , ] make-list delay >r
|
||||
[ , \ lcar , ] make-list delay r>
|
||||
lcons
|
||||
] ifte
|
||||
] ifte ;
|
||||
|
||||
DEFER: lsubset
|
||||
TUPLE: lsubset-state llist pred ;
|
||||
|
||||
: (lsubset-cdr) ( state -- llist )
|
||||
#! Given a predicate and a lazy list, do the cdr
|
||||
#! portion of lsubset.
|
||||
dup lsubset-state-llist lcdr swap lsubset-state-pred lsubset ;
|
||||
|
||||
: (lsubset-car) ( state -- value )
|
||||
#! Given a predicate and a lazy list, do the car
|
||||
#! portion of lsubset.
|
||||
dup lsubset-state-llist lcar over
|
||||
lsubset-state-pred dupd call [ ( state lcar -- )
|
||||
nip
|
||||
] [ ( state lcar -- )
|
||||
drop dup lsubset-state-llist lcdr over set-lsubset-state-llist
|
||||
(lsubset-car)
|
||||
] ifte ;
|
||||
|
||||
: (lsubset-set-first-car) ( state -- bool )
|
||||
#! Set the state to the first valid car. If none found
|
||||
#! return false.
|
||||
dup lsubset-state-llist lcar over
|
||||
lsubset-state-pred dupd call [ ( state lcar -- )
|
||||
2drop t
|
||||
] [ ( state lcar -- )
|
||||
drop dup lsubset-state-llist lcdr dup lnil? [
|
||||
2drop f
|
||||
] [
|
||||
over set-lsubset-state-llist
|
||||
(lsubset-set-first-car)
|
||||
] ifte
|
||||
] ifte ;
|
||||
|
||||
: lsubset ( llist pred -- llist )
|
||||
#! Return a lazy list containing only the items from the original
|
||||
#! lazy list for which the predicate returns a value other than f.
|
||||
over [ ] = [
|
||||
2drop [ ]
|
||||
over lnil? [
|
||||
drop
|
||||
] [
|
||||
>r luncons r>
|
||||
dup swapd
|
||||
[ lsubset ] curry2
|
||||
-rot dupd call [
|
||||
swap lcons
|
||||
<lsubset-state> dup
|
||||
(lsubset-set-first-car) [
|
||||
dup
|
||||
[ (lsubset-cdr) ] cons delay >r
|
||||
[ (lsubset-car) ] cons delay r> lcons
|
||||
] [
|
||||
drop call
|
||||
drop lnil
|
||||
] ifte
|
||||
] ifte ;
|
||||
|
||||
: lappend* ;
|
||||
: (lappend*) ;
|
||||
: lappend-list* ;
|
||||
DEFER: lappend*
|
||||
DEFER: (lappend*)
|
||||
TUPLE: lappend*-state current rest ;
|
||||
|
||||
: lappend-item* ( llists list item -- llist )
|
||||
-rot [ lappend-list* ] curry2 lcons ;
|
||||
USE: stdio
|
||||
|
||||
: lappend-list* ( llists list -- llist )
|
||||
dup [
|
||||
#! non-empty list
|
||||
luncons swap lappend-item*
|
||||
: (lappend*-cdr) ( state -- llist )
|
||||
#! Given the state object, do the cdr portion of the
|
||||
#! lazy append.
|
||||
dup lappend*-state-current dup lnil? [ ( state current -- )
|
||||
nip
|
||||
] [ ( state current -- )
|
||||
lcdr ( state cdr -- )
|
||||
dup lnil? [ ( state cdr -- )
|
||||
drop dup lappend*-state-rest dup lnil? [ ( state rest )
|
||||
nip
|
||||
] [
|
||||
#! empty list
|
||||
drop lappend*
|
||||
nip
|
||||
luncons ( state rest-car rest-cdr -- )
|
||||
<lappend*-state> (lappend*)
|
||||
] ifte
|
||||
] [ ( state cdr -- )
|
||||
swap lappend*-state-rest <lappend*-state> (lappend*)
|
||||
] ifte
|
||||
] ifte ;
|
||||
|
||||
|
||||
: (lappend*) ( llists -- llist )
|
||||
dup lcar [ ( llists )
|
||||
#! Yes, the first item in the list is a valid llist
|
||||
luncons swap lappend-list*
|
||||
] [
|
||||
#! The first item in the list is an empty list.
|
||||
#! Resume passing the next list.
|
||||
lcdr lappend*
|
||||
: (lappend*-car) ( state -- value )
|
||||
#! Given the state object, do the car portion of the
|
||||
#! lazy append.
|
||||
dup lappend*-state-current dup lnil? [ ( state current -- )
|
||||
nip
|
||||
] [ ( state current -- )
|
||||
lcar nip
|
||||
] ifte ;
|
||||
|
||||
: (lappend*) ( state -- llist )
|
||||
#! Do the main work of the lazy list appending using a
|
||||
#! state object.
|
||||
dup
|
||||
[ (lappend*-cdr) ] cons delay >r
|
||||
[ (lappend*-car) ] cons delay r> lcons ;
|
||||
|
||||
: lappend* ( llists -- llist )
|
||||
#! Given a lazy list of lazy lists, return a lazy list that
|
||||
#! works through all of the sub-lists in sequence.
|
||||
dup [
|
||||
(lappend*)
|
||||
] [
|
||||
#! Leave empty list on the stack
|
||||
] ifte ;
|
||||
[ lnil? not ] lsubset
|
||||
dup lnil? [
|
||||
luncons <lappend*-state> (lappend*)
|
||||
] unless ;
|
||||
|
||||
: list>llist ( list -- llist )
|
||||
#! Convert a list to a lazy list.
|
||||
dup [
|
||||
uncons [ list>llist ] curry1 lcons
|
||||
] when ;
|
||||
DEFER: list>llist
|
||||
|
||||
: lappend ( llist1 llist2 -- llist )
|
||||
#! Concatenate two lazy lists such that they appear to be one big lazy list.
|
||||
#! Concatenate two lazy lists such that they appear to be one big
|
||||
#! lazy list.
|
||||
2list list>llist lappend* ;
|
||||
|
||||
: leach ( llist quot -- )
|
||||
#! Call the quotation on each item in the lazy list.
|
||||
#! Warning: If the list is infinite then this will
|
||||
#! never return.
|
||||
over [
|
||||
>r luncons r> tuck >r >r call r> r> leach
|
||||
] [
|
||||
over lnil? [
|
||||
2drop
|
||||
] [
|
||||
>r luncons r> tuck >r >r call r> r> leach
|
||||
] ifte ;
|
||||
|
||||
|
||||
: (llist>list) ( result llist -- list )
|
||||
#! Helper function for llist>list.
|
||||
dup lnil? [
|
||||
drop
|
||||
] [
|
||||
dup lcar ( result llist car )
|
||||
swap lcdr >r swons r> (llist>list)
|
||||
] ifte ;
|
||||
|
||||
: llist>list ( llist -- list )
|
||||
#! Convert a lazy list to a normal list. This will cause
|
||||
#! an infinite loop if the lazy list is an infinite list.
|
||||
f swap (llist>list) reverse ;
|
||||
|
||||
: list>llist ( list -- llist )
|
||||
#! Convert a list to a lazy list.
|
||||
dup [
|
||||
uncons [ list>llist ] cons delay >r unit delay r> lcons
|
||||
] [
|
||||
drop lnil
|
||||
] ifte ;
|
||||
|
||||
M: lcons nth lnth ;
|
||||
|
||||
: test1
|
||||
[ 1 ] list>llist
|
||||
[ 2 ] list>llist
|
||||
2list
|
||||
list>llist
|
||||
lappend* ;
|
||||
|
||||
: test2
|
||||
[ 1 2 ] list>llist
|
||||
[ 3 4 ] list>llist
|
||||
2list
|
||||
list>llist
|
||||
lappend* ;
|
||||
|
||||
: test3
|
||||
[ 1 2 3 ] list>llist
|
||||
[ 4 5 6 ] list>llist
|
||||
[ 7 8 9 ] list>llist
|
||||
3list
|
||||
list>llist
|
||||
lappend* ;
|
||||
|
||||
: test4
|
||||
[ 1 2 3 4 5 ] list>llist
|
||||
[ 2 mod 1 = ] lsubset ;
|
||||
|
||||
: test5 lnil unit delay lunit [ lnil? not ] lsubset ;
|
||||
|
||||
: test6 lnil unit delay lunit lappend* ;
|
||||
|
||||
|
|
|
@ -9,14 +9,22 @@
|
|||
ability to describe infinite structures, and to delay execution of
|
||||
expressions until they are actually used.</p>
|
||||
<p>Lazy lists, like normal lists, are composed of a head and tail. In
|
||||
a lazy list the tail is something called a 'promise'. To convert a
|
||||
a lazy list the head and tail are something called a 'promise'.
|
||||
To convert a
|
||||
'promise' into its actual value a word called 'force' is used. To
|
||||
convert a value into a 'promise' the word to use is 'delay'.</p>
|
||||
<table border="1">
|
||||
<tr><td><a href="#delay">delay</a></td></tr>
|
||||
<tr><td><a href="#force">force</a></td></tr>
|
||||
</table>
|
||||
|
||||
<p>Many of the lazy list words are named similar to the standard list
|
||||
words but with an 'l' suffixed to it. Here are the commonly used
|
||||
words and their equivalent list operation:</p>
|
||||
<table border="1">
|
||||
<tr><th>Lazy List</th><th>Normal List</th></tr>
|
||||
<tr><td><a href="#lnil">lnil</a></td><td>[ ]</td></tr>
|
||||
<tr><td><a href="#lnilp">lnil?</a></td><td>Test for nil value</td></tr>
|
||||
<tr><td><a href="#lcons">lcons</a></td><td>cons</td></tr>
|
||||
<tr><td><a href="#lunit">lunit</a></td><td>unit</td></tr>
|
||||
<tr><td><a href="#lcar">lcar</a></td><td>car</td></tr>
|
||||
|
@ -38,47 +46,122 @@ list.</td></tr>
|
|||
<tr><td><a href="#list>llist">list>llist</a></td><td>Given a normal list, return a lazy list
|
||||
that contains the same elements as the normal list.</td></tr>
|
||||
</table>
|
||||
<p>A couple of helper functions are also provided by the lazy
|
||||
vocabulary.</p>
|
||||
<table border="1">
|
||||
<tr><td>curry1</td><td>Given a value and a quotation, returns a new
|
||||
quotation that when called will have the value on the stack.</td></tr>
|
||||
<tr><td>curry2</td><td>Given two values and a quotation, returns a new
|
||||
quotation that when called will have the two values on the
|
||||
stack.</td></tr>
|
||||
</table>
|
||||
<h2>Reference</h2>
|
||||
<!-- delay description -->
|
||||
<a name="delay">
|
||||
<h3>delay ( quot -- <promise> )</h3>
|
||||
<p>'delay' is used to convert a value or expression into a promise.
|
||||
The word 'force' is used to convert that promise back to its
|
||||
value, or to force evaluation of the expression to return a value.
|
||||
</p>
|
||||
<p>The value on the stack that 'delay' expects must be quoted. This is
|
||||
a requirement to prevent it from being evaluated.
|
||||
</p>
|
||||
<pre class="code">
|
||||
( 1 ) [ 42 ] <a href="#delay">delay</a> dup .
|
||||
=> << promise [ ] [ 42 ] [ ] [ ] >>
|
||||
( 2 ) <a href="#force">force</a> .
|
||||
=> 42
|
||||
</pre>
|
||||
|
||||
<!-- force description -->
|
||||
<a name="force">
|
||||
<h3>force ( <promise> -- value )</h3>
|
||||
<p>'force' will evaluate a promises original expression
|
||||
and leave the value of that expression on the stack.
|
||||
</p>
|
||||
<p>A promise can be forced multiple times but the expression
|
||||
is only evaluated once. Future calls of 'force' on the promise
|
||||
will returned the cached value of the original force. If the
|
||||
expression contains side effects, such as i/o, then that i/o
|
||||
will only occur on the first 'force'. See below for an example
|
||||
(steps 3-5).
|
||||
</p>
|
||||
<p>If a promise is itself delayed, a force will evaluate all promises
|
||||
until a value is returned. Due to this behaviour it is generally not
|
||||
possible to delay a promise. The example below shows what happens
|
||||
in this case.
|
||||
</p>
|
||||
<pre class="code">
|
||||
( 1 ) [ 42 ] <a href="#delay">delay</a> dup .
|
||||
=> << promise [ ] [ 42 ] [ ] [ ] >>
|
||||
( 2 ) <a href="#force">force</a> .
|
||||
=> 42
|
||||
|
||||
#! Multiple forces on a promise returns cached value
|
||||
( 3 ) [ "hello" print 42 ] <a href="#delay">delay</a> dup .
|
||||
=> << promise [ ] [ "hello" print 42 ] [ ] [ ] >>
|
||||
( 4 ) dup <a href="#force">force</a> .
|
||||
=> hello
|
||||
42
|
||||
( 5 ) <a href="#force">force</a> .
|
||||
=> 42
|
||||
|
||||
#! Forcing a delayed promise cascades up to return
|
||||
#! original value, rather than the promise.
|
||||
( 6 ) [ [ 42 ] <a href="#delay">delay</a> ] <a href="#delay">delay</a> dup .
|
||||
=> << promise [ ] [ [ 42 ] delay ] [ ] [ ] >>
|
||||
( 7 ) <a href="#force">force</a> .
|
||||
=> 42
|
||||
</pre>
|
||||
|
||||
<!-- lnil description -->
|
||||
<a name="lnil">
|
||||
<h3>lnil ( -- lcons )</h3>
|
||||
<p>Returns a value representing the empty lazy list.</p>
|
||||
<pre class="code">
|
||||
( 1 ) <a href="#lnil">lnil</a> .
|
||||
=> << promise [ ] [ [ ] ] t [ ] >>
|
||||
</pre>
|
||||
|
||||
<!-- lnil description -->
|
||||
<a name="lnilp">
|
||||
<h3>lnil? ( lcons -- bool )</h3>
|
||||
<p>Returns true if the given lazy cons is the value representing
|
||||
the empty lazy list.</p>
|
||||
<pre class="code">
|
||||
( 1 ) <a href="#lnil">lnil</a> <a href="#lnilp">lnil?</a> .
|
||||
=> t
|
||||
( 2 ) [ 1 ] <a href="#list2llist">list>llist</a> dup <a href="#lnilp">lnil?</a> .
|
||||
=> [ ]
|
||||
( 3 ) <a href="#lcdr">lcdr</a> <a href="#lnilp">lnil?</a> .
|
||||
=> t
|
||||
</pre>
|
||||
|
||||
<!-- lcons description -->
|
||||
<a name="lcons">
|
||||
<h3>lcons ( value promise -- lcons )</h3>
|
||||
<p>Provides the same effect as 'cons' does for normal lists. It
|
||||
creates a cons cell where the first element is the value given and the
|
||||
second element is a promise.</p>
|
||||
<a name="promise">
|
||||
<p>A promise is either a value that has had 'force' called on it, or
|
||||
a quotation that when 'call' is applied to it, returns the actual
|
||||
value.</p>
|
||||
<h3>lcons ( car-promise cdr-promise -- lcons )</h3>
|
||||
<p>Provides the same effect as 'cons' does for normal lists.
|
||||
Both values provided must be promises (ie. expressions that have
|
||||
had <a href="#delay">delay</a> called on them).
|
||||
</p>
|
||||
<p>As the car and cdr passed on the stack are promises, they are not
|
||||
evaluated until <a href="#lcar">lcar</a> or <a href="#lcdr">lcdr</a>
|
||||
are called on the lazy cons.</p>
|
||||
<pre class="code">
|
||||
( 1 ) 5 6 delay <a href="#lcons">lcons</a> dup .
|
||||
=> [ 5 6 ]
|
||||
( 1 ) [ "car" ] <a href="#delay">delay</a> [ "cdr" ] <a href="#delay">delay</a> <a href="#lcons">lcons</a> dup .
|
||||
=> << promise ... >>
|
||||
( 2 ) dup <a href="#lcar">lcar</a> .
|
||||
=> 5
|
||||
=> "car"
|
||||
( 3 ) dup <a href="#lcdr">lcdr</a> .
|
||||
=> 6
|
||||
=> "cdr"
|
||||
</pre>
|
||||
|
||||
<!-- lunit description -->
|
||||
<a name="lunit">
|
||||
<h3>lunit ( value -- llist )</h3>
|
||||
<h3>lunit ( value-promise -- llist )</h3>
|
||||
<p>Provides the same effect as 'unit' does for normal lists. It
|
||||
creates a lazy list where the first element is the value given.</p>
|
||||
<p>Like <a href="#lcons">lcons</a>, the value on the stack must be
|
||||
a promise and is not evaluated until the <a href="#lcar">lcar</a>
|
||||
of the list is requested.</a>
|
||||
<pre class="code">
|
||||
( 1 ) 42 <a href="#lunit">lunit</a> dup .
|
||||
=> [ 42 f ]
|
||||
( 1 ) [ 42 ] <a href="#delay">delay</a> <a href="#lunit">lunit</a> dup .
|
||||
=> << promise ... >>
|
||||
( 2 ) dup <a href="#lcar">lcar</a> .
|
||||
=> 42
|
||||
( 3 ) dup <a href="#lcdr">lcdr</a> .
|
||||
=> f
|
||||
( 3 ) dup <a href="#lcdr">lcdr</a> <a href="#lnilp">lnil?</a> .
|
||||
=> t
|
||||
( 4 ) [ . ] <a href="#leach">leach</a>
|
||||
=> 42
|
||||
</pre>
|
||||
|
@ -87,10 +170,11 @@ creates a lazy list where the first element is the value given.</p>
|
|||
<a name="lcar">
|
||||
<h3>lcar ( lcons -- value )</h3>
|
||||
<p>Provides the same effect as 'car' does for normal lists. It
|
||||
returns the first element in a lazy cons cell.</p>
|
||||
returns the first element in a lazy cons cell. This will force
|
||||
the evaluation of that element.</p>
|
||||
<pre class="code">
|
||||
( 1 ) 42 <a href="#lunit">lunit</a> dup .
|
||||
=> [ 42 f ]
|
||||
( 1 ) [ 42 ] <a href="#delay">delay</a> <a href="#lunit">lunit</a> dup .
|
||||
=> << promise ... >>
|
||||
( 2 ) <a href="#lcar">lcar</a> .
|
||||
=> 42
|
||||
</pre>
|
||||
|
@ -102,15 +186,15 @@ returns the first element in a lazy cons cell.</p>
|
|||
returns the second element in a lazy cons cell and forces it. This
|
||||
causes that element to be evaluated immediately.</p>
|
||||
<pre class="code">
|
||||
( 1 ) 5 [ 5 6 + ] <a href="#lcons">lcons</a> dup .
|
||||
=> [ 5 5 6 + ]
|
||||
( 1 ) [ 1 ] <a href="#delay">delay</a> [ 5 6 + ] <a href="#delay">delay</a> <a href="#lcons">lcons</a> dup .
|
||||
=> << promise ... >>
|
||||
( 2 ) <a href="#lcdr">lcdr</a> .
|
||||
=> 11
|
||||
</pre>
|
||||
|
||||
<pre class="code">
|
||||
( 1 ) 5 lfrom dup .
|
||||
=> [ 5 5 succ lfrom ]
|
||||
( 1 ) 5 <a href="#lfrom">lfrom</a> dup .
|
||||
=> << promise ... >>
|
||||
( 2 ) <a href="#lcdr">lcdr</a> dup <a href="#lcar">lcar</a> .
|
||||
=> 6
|
||||
( 3 ) <a href="#lcdr">lcdr</a> dup <a href="#lcar">lcar</a> .
|
||||
|
@ -126,8 +210,8 @@ causes that element to be evaluated immediately.</p>
|
|||
returns the nth value in the lazy list. It causes all the values up to
|
||||
'n' to be evaluated.</p>
|
||||
<pre class="code">
|
||||
( 1 ) 1 lfrom
|
||||
=> [ 1 1 succ lfrom ]
|
||||
( 1 ) 1 <a href="#lfrom">lfrom</a> dup .
|
||||
=> << promise ... >>
|
||||
( 2 ) 5 swap <a href="#lnth">lnth</a> .
|
||||
=> 6
|
||||
</pre>
|
||||
|
@ -136,32 +220,31 @@ returns the nth value in the lazy list. It causes all the values up to
|
|||
<a name="luncons">
|
||||
<h3>luncons ( lcons -- car cdr )</h3>
|
||||
<p>Provides the same effect as 'uncons' does for normal lists. It
|
||||
returns the car and cdr of the lazy list. Note that cdr is forced
|
||||
resulting in it being evaluated.</p>
|
||||
returns the car and cdr of the lazy list.</p>
|
||||
<pre class="code">
|
||||
( 1 ) 5 [ 6 ] <a href="#lcons">lcons</a> dup .
|
||||
=> [ 5 6 ]
|
||||
( 2 ) <a href="#luncons">luncons</a> .s
|
||||
=> { 5 6 }
|
||||
( 1 ) [ 5 ] <a href="#delay">delay</a> [ 6 ] <a href="#delay">delay</a> <a href="#lcons">lcons</a> dup .
|
||||
=> << promise ... >>
|
||||
( 2 ) <a href="#luncons">luncons</a> . .
|
||||
=> 6
|
||||
5
|
||||
</pre>
|
||||
|
||||
<!-- lmap description -->
|
||||
<a name="lmap">
|
||||
<h3>lmap ( llist quot -- llist )</h3>
|
||||
<p>Provides the same effect as 'map' does for normal lists. It
|
||||
lazily maps over a lazy list applying the quotation to each element.
|
||||
<p>Lazily maps over a lazy list applying the quotation to each element.
|
||||
A new lazy list is returned which contains the results of the
|
||||
quotation.</p>
|
||||
<p>When initially called <a href="#lmap">lmap</a> will only call quot on the first element
|
||||
of the list. It then constructs a lazy list that performs the
|
||||
next '<a href="#lmap">lmap</a>' operation on the next element when it is evaluated. This
|
||||
allows mapping over infinite lists.</p>
|
||||
<p>When intially called nothing in the original lazy list is
|
||||
evaluated. Only when <a href="#lcar">lcar</a> is called will the item
|
||||
in the list be evaluated and applied to the quotation. Ditto with <a
|
||||
href="#lcdr">lcdr</a>, thus allowing infinite lists to be mapped over.</p>
|
||||
<pre class="code">
|
||||
( 1 ) 1 lfrom
|
||||
( 1 ) 1 <a href="#lfrom">lfrom</a>
|
||||
=> < infinite list of incrementing numbers >
|
||||
( 2 ) [ 2 * ] <a href="#lmap">lmap</a>
|
||||
=> < infinite list of numbers incrementing by 2 >
|
||||
( 3 ) 5 swap <a href="#ltake">ltake</a> .
|
||||
( 3 ) 5 swap <a href="#ltake">ltake</a> <a href="#llist2list">llist>list</a> .
|
||||
=> [ 2 4 6 8 10 ]
|
||||
</pre>
|
||||
|
||||
|
@ -174,17 +257,15 @@ element. If that quotation returns true, the element will be included
|
|||
in the resulting lazy list. If it is false, the element will be skipped.
|
||||
A new lazy list is returned which contains all elements where the
|
||||
predicate returned true.</p>
|
||||
<p>When initially called <a href="#lsubset">lsubset</a> will only call
|
||||
the predicate quotation on the first element
|
||||
of the list. It then constructs a lazy list that performs the
|
||||
next '<a href="#lsubset">lsubset</a>' operation on the next element when it is evaluated. This
|
||||
allows subsetting over infinite lists.</p>
|
||||
<p>Like <a href="#lmap">lmap</a>, when initially called no evaluation
|
||||
will occur. A lazy list is returned that when values are retrieved
|
||||
from in then items are evaluated and checked against the predicate.</p>
|
||||
<pre class="code">
|
||||
( 1 ) 1 lfrom
|
||||
( 1 ) 1 <a href="#lfrom">lfrom</a>
|
||||
=> < infinite list of incrementing numbers >
|
||||
( 2 ) [ prime? ] <a href="#lsubset">lsubset</a>
|
||||
( 2 ) [ <a href="#primep">prime?</a> ] <a href="#lsubset">lsubset</a>
|
||||
=> < infinite list of prime numbers >
|
||||
( 3 ) 5 swap <a href="#ltake">ltake</a> .
|
||||
( 3 ) 5 swap <a href="#ltake">ltake</a> <a href="#llist2list">llist>list</a> .
|
||||
=> [ 2 3 5 7 11 ]
|
||||
</pre>
|
||||
|
||||
|
@ -196,7 +277,7 @@ lazily iterates over a lazy list applying the quotation to each
|
|||
element. If this operation is applied to an infinite list it will
|
||||
never return unless the quotation escapes out by calling a continuation.</p>
|
||||
<pre class="code">
|
||||
( 1 ) 1 lfrom
|
||||
( 1 ) 1 <a href="#lfrom">lfrom</a>
|
||||
=> < infinite list of incrementing numbers >
|
||||
( 2 ) [ 2 mod 1 = ] <a href="#lsubset">lsubset</a>
|
||||
=> < infinite list of odd numbers >
|
||||
|
@ -210,13 +291,13 @@ never return unless the quotation escapes out by calling a continuation.</p>
|
|||
|
||||
<!-- ltake description -->
|
||||
<a name="ltake">
|
||||
<h3>ltake ( n llist -- list )</h3>
|
||||
<h3>ltake ( n llist -- llist )</h3>
|
||||
<p>Iterates over the lazy list 'n' times, appending each element to a
|
||||
normal list. The normal list is returned. This provides a convenient
|
||||
way of getting elements out of a lazy list.</p>
|
||||
lazy list. This provides a convenient way of getting elements out of
|
||||
an infinite lazy list.</p>
|
||||
<pre class="code">
|
||||
( 1 ) : ones 1 [ ones ] <a href="#lcons">lcons</a> ;
|
||||
( 2 ) 5 ones <a href="#ltake">ltake</a>
|
||||
( 1 ) : ones [ 1 ] delay [ ones ] delay <a href="#lcons">lcons</a> ;
|
||||
( 2 ) 5 ones <a href="#ltake">ltake</a> <a href="#llist2list">llist>list</a> .
|
||||
=> [ 1 1 1 1 1 ]
|
||||
</pre>
|
||||
|
||||
|
@ -227,7 +308,7 @@ way of getting elements out of a lazy list.</p>
|
|||
lazily on iteration rather than immediately so it works very fast no
|
||||
matter how large the list.</p>
|
||||
<pre class="code">
|
||||
( 1 ) [ 1 2 3 ] <a href="#list>llist">list>llist</a> [ 4 5 6 ] <a href="#list>llist">list>llist</a> <a href="#lappend">lappend</a>
|
||||
( 1 ) [ 1 2 3 ] <a href="#list2llist">list>llist</a> [ 4 5 6 ] <a href="#list2llist">list>llist</a> <a href="#lappend">lappend</a>
|
||||
( 2 ) [ . ] <a href="#leach">leach</a>
|
||||
=> 1
|
||||
2
|
||||
|
@ -244,10 +325,10 @@ matter how large the list.</p>
|
|||
lazy fashion. The actual appending is done lazily on iteration rather
|
||||
than immediately so it works very fast no matter how large the lists.</p>
|
||||
<pre class="code">
|
||||
( 1 ) [ 1 2 3 ] <a href="#list>llist">list>llist</a>
|
||||
( 2 ) [ 4 5 6 ] <a href="#list>llist">list>llist</a>
|
||||
( 3 ) [ 7 8 9 ] <a href="#list>llist">list>llist</a>
|
||||
( 4 ) 3list <a href="#list>llist">list>llist</a> <a href="#lappendstar">lappend*</a>
|
||||
( 1 ) [ 1 2 3 ] <a href="#list2>llist">list>llist</a>
|
||||
( 2 ) [ 4 5 6 ] <a href="#list2llist">list>llist</a>
|
||||
( 3 ) [ 7 8 9 ] <a href="#list2llist">list>llist</a>
|
||||
( 4 ) 3list <a href="#list2llist">list>llist</a> <a href="#lappendstar">lappend*</a>
|
||||
( 5 ) [ . ] <a href="#leach">leach</a>
|
||||
=> 1
|
||||
2
|
||||
|
@ -261,12 +342,12 @@ than immediately so it works very fast no matter how large the lists.</p>
|
|||
</pre>
|
||||
|
||||
<!-- list>llist description -->
|
||||
<a name="list>llist">
|
||||
<h3>list>llist ( list -- llist )</h3>
|
||||
<a name="list2llist">
|
||||
<h3>list>llist ( list -- llist )</h3>
|
||||
<p>Converts a normal list into a lazy list. This is done lazily so the
|
||||
initial list is not iterated through immediately.</p>
|
||||
<pre class="code">
|
||||
( 1 ) [ 1 2 3 ] <a href="#list>llist">list>llist</a>
|
||||
( 1 ) [ 1 2 3 ] <a href="#list2llist">list>llist</a>
|
||||
( 2 ) [ . ] <a href="#leach">leach</a>
|
||||
=> 1
|
||||
2
|
||||
|
|
|
@ -22,66 +22,85 @@
|
|||
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
IN: parser-combinators
|
||||
USE: lazy
|
||||
USE: stack
|
||||
USE: lists
|
||||
USE: strings
|
||||
USE: math
|
||||
USE: logic
|
||||
USE: kernel
|
||||
USE: combinators
|
||||
USE: parser
|
||||
USE: sequences
|
||||
USE: strings
|
||||
USE: lists
|
||||
USE: math
|
||||
|
||||
: phead ( object -- head )
|
||||
GENERIC: phead
|
||||
|
||||
M: string phead ( object -- head )
|
||||
#! Polymorphic head. Return the head item of the object.
|
||||
#! For a string this is the first character.
|
||||
#! For a list this is the car.
|
||||
[
|
||||
[ string? ] [ 0 swap str-nth ]
|
||||
[ list? ] [ car ]
|
||||
] cond ;
|
||||
0 swap nth ;
|
||||
|
||||
: ptail ( object -- tail )
|
||||
M: list phead ( object -- head )
|
||||
#! Polymorphic head. Return the head item of the object.
|
||||
#! For a list this is the car.
|
||||
car ;
|
||||
|
||||
M: cons phead ( object -- head )
|
||||
#! Polymorphic head. Return the head item of the object.
|
||||
#! For a list this is the car.
|
||||
car ;
|
||||
|
||||
GENERIC: ptail
|
||||
|
||||
M: string ptail ( object -- tail )
|
||||
#! Polymorphic tail. Return the tail of the object.
|
||||
#! For a string this is everything but the first character.
|
||||
1 swap string-tail ;
|
||||
|
||||
M: list ptail ( object -- tail )
|
||||
#! Polymorphic tail. Return the tail of the object.
|
||||
#! For a list this is the cdr.
|
||||
[
|
||||
[ string? ] [ 1 swap str-tail ]
|
||||
[ list? ] [ cdr ]
|
||||
] cond ;
|
||||
cdr ;
|
||||
|
||||
M: cons ptail ( object -- tail )
|
||||
#! Polymorphic tail. Return the tail of the object.
|
||||
#! For a list this is the cdr.
|
||||
cdr ;
|
||||
|
||||
: pfirst ( object -- first )
|
||||
#! Polymorphic first
|
||||
#! Polymorphic first. The first item in a collection.
|
||||
phead ;
|
||||
|
||||
: psecond ( object -- second )
|
||||
GENERIC: psecond
|
||||
|
||||
M: string psecond ( object -- second )
|
||||
#! Polymorphic second
|
||||
[
|
||||
[ string? ] [ 1 swap str-nth ]
|
||||
[ list? ] [ cdr car ]
|
||||
] cond ;
|
||||
1 swap nth ;
|
||||
|
||||
M: list psecond ( object -- second )
|
||||
#! Polymorphic second
|
||||
cdr car ;
|
||||
|
||||
: ph:t ( object -- head tail )
|
||||
#! Return the head and tail of the object.
|
||||
dup phead swap ptail ;
|
||||
|
||||
: pempty? ( object -- bool )
|
||||
#! Polymorphic empty test.
|
||||
[
|
||||
[ string? ] [ "" = ]
|
||||
[ list? ] [ not ]
|
||||
] cond ;
|
||||
GENERIC: pempty?
|
||||
|
||||
M: string pempty? ( object -- bool )
|
||||
#! Return true if the collection is empty.
|
||||
string-length 0 = ;
|
||||
|
||||
M: list pempty? ( object -- bool )
|
||||
#! Return true if the collection is empty.
|
||||
not ;
|
||||
|
||||
: string-take ( n string -- string )
|
||||
#! Return a string with the first 'n' characters
|
||||
#! of the original string.
|
||||
dup str-length pick < [
|
||||
dup string-length pick < [
|
||||
2drop ""
|
||||
] [
|
||||
str-head
|
||||
string-head
|
||||
] ifte ;
|
||||
|
||||
: (list-take) ( n list accum -- list )
|
||||
>r >r pred dup 0 < [
|
||||
>r >r 1 - dup 0 < [
|
||||
drop r> drop r> reverse
|
||||
] [
|
||||
r> uncons swap r> cons (list-take)
|
||||
|
@ -92,342 +111,381 @@ USE: parser
|
|||
#! of the original list.
|
||||
[ ] (list-take) ;
|
||||
|
||||
: ptake ( n object -- object )
|
||||
GENERIC: ptake
|
||||
|
||||
M: string ptake ( n object -- object )
|
||||
#! Polymorphic take.
|
||||
#! Return a collection of the first 'n'
|
||||
#! characters from the original collection.
|
||||
[
|
||||
[ string? ] [ string-take ]
|
||||
[ list? ] [ list-take ]
|
||||
] cond ;
|
||||
string-take ;
|
||||
|
||||
M: list ptake ( n object -- object )
|
||||
#! Polymorphic take.
|
||||
#! Return a collection of the first 'n'
|
||||
#! characters from the original collection.
|
||||
list-take ;
|
||||
|
||||
: string-drop ( n string -- string )
|
||||
#! Return a string with the first 'n' characters
|
||||
#! of the original string removed.
|
||||
dup str-length pick < [
|
||||
dup string-length pick < [
|
||||
2drop ""
|
||||
] [
|
||||
str-tail
|
||||
string-tail
|
||||
] ifte ;
|
||||
|
||||
: list-drop ( n list -- list )
|
||||
#! Return a list with the first 'n' items
|
||||
#! of the original list removed.
|
||||
>r pred dup 0 < [
|
||||
>r 1 - dup 0 < [
|
||||
drop r>
|
||||
] [
|
||||
r> cdr list-drop
|
||||
] ifte ;
|
||||
|
||||
: pdrop ( n object -- object )
|
||||
GENERIC: pdrop
|
||||
|
||||
M: string pdrop ( n object -- object )
|
||||
#! Polymorphic drop.
|
||||
#! Return a collection the same as 'object'
|
||||
#! but with the first n items removed.
|
||||
[
|
||||
[ string? ] [ string-drop ]
|
||||
[ list? ] [ list-drop ]
|
||||
] cond ;
|
||||
string-drop ;
|
||||
|
||||
: ifte-head= ( string-or-list ch [ quot1 ] [ quot2 ] -- )
|
||||
#! When the character 'ch' is equal to the head
|
||||
#! of the string or list, run the quot1 otherwise run quot2.
|
||||
r> r> swap phead = r> r> ifte ;
|
||||
M: list pdrop ( n object -- object )
|
||||
#! Polymorphic drop.
|
||||
#! Return a collection the same as 'object'
|
||||
#! but with the first n items removed.
|
||||
list-drop ;
|
||||
|
||||
: symbol ( ch -- parser )
|
||||
#! Return a parser that parses the given symbol.
|
||||
[ ( inp ch -- result )
|
||||
2dup [
|
||||
swap ptail cons lunit
|
||||
: token-parser ( inp sequence -- llist )
|
||||
#! A parser that parses a specific sequence of
|
||||
#! characters.
|
||||
2dup length swap ptake over = [
|
||||
swap over length swap pdrop swons unit delay lunit
|
||||
] [
|
||||
2drop [ ]
|
||||
] ifte-head=
|
||||
] curry1 ;
|
||||
2drop lnil
|
||||
] ifte ;
|
||||
|
||||
: token ( string -- parser )
|
||||
#! Return a parser that parses the given string.
|
||||
[ ( inp string -- result )
|
||||
2dup str-length swap ptake over = [
|
||||
swap over str-length swap pdrop cons lunit
|
||||
#! Return a token parser that parses the given string.
|
||||
[ token-parser ] cons ;
|
||||
|
||||
: satisfy-parser ( inp pred -- llist )
|
||||
#! A parser that succeeds if the predicate,
|
||||
#! when passed the first character in the input, returns
|
||||
#! true.
|
||||
over pempty? [
|
||||
2drop lnil
|
||||
] [
|
||||
2drop [ ]
|
||||
over phead swap call [
|
||||
ph:t swons unit delay lunit
|
||||
] [
|
||||
drop lnil
|
||||
] ifte
|
||||
] curry1 ;
|
||||
] ifte ;
|
||||
|
||||
: satisfy ( p -- parser )
|
||||
#! Return a parser that succeeds if the predicate 'p',
|
||||
#! when passed the first character in the input, returns
|
||||
#! true.
|
||||
[ ( inp p -- result )
|
||||
over pempty? [
|
||||
2drop [ ]
|
||||
] [
|
||||
over phead swap call [
|
||||
ph:t cons lunit
|
||||
] [
|
||||
drop [ ]
|
||||
] ifte
|
||||
] ifte
|
||||
] curry1 ;
|
||||
[ satisfy-parser ] cons ;
|
||||
|
||||
: satisfy2 ( p r -- parser )
|
||||
#! Return a parser that succeeds if the predicate 'p',
|
||||
: satisfy2-parser ( inp pred quot -- llist )
|
||||
#! A parser that succeeds if the predicate,
|
||||
#! when passed the first character in the input, returns
|
||||
#! true. On success the word 'r' is called with the
|
||||
#! successfully parser character on the stack. The result
|
||||
#! of this is returned as the result of the parser.
|
||||
[ ( inp p r -- result )
|
||||
>r over phead swap call [
|
||||
ph:t swap r> call swons lunit
|
||||
#! true. On success the quotation is called with the
|
||||
#! successfully parsed character on the stack. The result
|
||||
#! of that call is returned as the result portion of the
|
||||
#! successfull parse lazy list.
|
||||
-rot over phead swap call [ ( quot inp -- )
|
||||
ph:t >r swap call r> swons unit delay lunit
|
||||
] [
|
||||
r> 2drop [ ]
|
||||
] ifte
|
||||
] curry2 ;
|
||||
2drop lnil
|
||||
] ifte ;
|
||||
|
||||
: satisfy2 ( pred quot -- parser )
|
||||
#! Return a satisfy2-parser.
|
||||
[ satisfy2-parser ] cons cons ;
|
||||
|
||||
: epsilon-parser ( input -- llist )
|
||||
#! A parser that parses the empty string. It
|
||||
#! does not consume any input and always returns
|
||||
#! an empty list as the parse tree with the
|
||||
#! unmodified input.
|
||||
"" cons unit delay lunit ;
|
||||
|
||||
: epsilon ( -- parser )
|
||||
#! A parser that parses the empty string.
|
||||
[ ( inp -- result )
|
||||
"" swap cons lunit
|
||||
] ;
|
||||
#! Return an epsilon parser
|
||||
[ epsilon-parser ] ;
|
||||
|
||||
: succeed ( r -- parser )
|
||||
#! A parser that always returns 'r' and consumes no input.
|
||||
[ ( inp r -- result )
|
||||
swap cons lunit
|
||||
] curry1 ;
|
||||
: succeed-parser ( input result -- llist )
|
||||
#! A parser that always returns 'result' as a
|
||||
#! successful parse with no input consumed.
|
||||
cons unit delay lunit ;
|
||||
|
||||
: succeed ( result -- parser )
|
||||
#! Return a succeed parser.
|
||||
[ succeed-parser ] cons ;
|
||||
|
||||
: fail-parser ( input -- llist )
|
||||
#! A parser that always fails and returns
|
||||
#! an empty list of successes.
|
||||
drop lnil ;
|
||||
|
||||
: fail ( -- parser )
|
||||
#! A parser that always fails
|
||||
[
|
||||
drop [ ]
|
||||
] ;
|
||||
#! Return a fail-parser.
|
||||
[ fail-parser ] ;
|
||||
|
||||
USE: prettyprint
|
||||
USE: unparser
|
||||
: <&>-do-parser3 ( [[ x1 xs2 ]] x -- result )
|
||||
#! Called by <&>-do-parser2 on each result of the
|
||||
#! parse from parser2.
|
||||
>r uncons r> ( x1 xs2 x )
|
||||
swap cons cons ;
|
||||
|
||||
: ensure-list ( a -- [ a ] )
|
||||
#! If 'a' is not a list, make it one.
|
||||
dup list? [ unit ] unless ;
|
||||
: <&>-do-parser2 ( [[ x xs ]] parser2 -- result )
|
||||
#! Called by the <&>-parser on each result of the
|
||||
#! successfull parse of parser1. It's input is the
|
||||
#! cons containing the data parsed and the remaining
|
||||
#! input. This word will parser2 on the remaining input
|
||||
#! returning a new cons cell containing the combined
|
||||
#! parse result.
|
||||
>r unswons r> ( x xs parser2 )
|
||||
call swap ( llist x )
|
||||
[ <&>-do-parser3 ] cons lmap ;
|
||||
|
||||
: ++ ( a b -- [ a b ] )
|
||||
#! Join two items into a list.
|
||||
>r ensure-list r> ensure-list append ;
|
||||
: <&>-parser ( input parser1 parser2 -- llist )
|
||||
#! Parse 'input' by sequentially combining the
|
||||
#! two parsers. First parser1 is applied to the
|
||||
#! input then parser2 is applied to the rest of
|
||||
#! the input strings from the first parser.
|
||||
>r call r> ( [[ x xs ]] p2 -- result )
|
||||
[ <&>-do-parser2 ] cons lmap lappend* ;
|
||||
|
||||
: <&> ( p1 p2 -- parser )
|
||||
: <&> ( parser1 parser2 -- parser )
|
||||
#! Sequentially combine two parsers, returning a parser
|
||||
#! that first calls p1, then p2 all remaining results from
|
||||
#! p1.
|
||||
[ ( inp p1 p2 -- result )
|
||||
>r call r> [ ( [ x | xs ] p2 -- result )
|
||||
>r uncons r> call swap [ ( [ x2 | xs2 ] x -- result )
|
||||
>r uncons swap r> swap ++ swons
|
||||
] curry1 lmap
|
||||
] curry1 lmap lappend*
|
||||
] curry2 ;
|
||||
[ <&>-parser ] cons cons ;
|
||||
|
||||
: <|>-parser ( input parser1 parser2 -- result )
|
||||
#! Return the combined list resulting from the parses
|
||||
#! of parser1 and parser2 being applied to the same
|
||||
#! input. This implements the choice parsing operator.
|
||||
>r dupd call swap r> call lappend ;
|
||||
|
||||
: <|> ( p1 p2 -- parser )
|
||||
#! Choice operator for parsers. Return a parser that does
|
||||
#! p1 or p2 depending on which will succeed.
|
||||
[ ( inp p1 p2 -- result )
|
||||
rot tuck swap call >r swap call r> lappend
|
||||
] curry2 ;
|
||||
[ <|>-parser ] cons cons ;
|
||||
|
||||
: p-abc ( -- parser )
|
||||
#! Test Parser. Parses the string "abc"
|
||||
"a" token "b" token "c" token <&> <&> ;
|
||||
: string-ltrim ( string -- string )
|
||||
#! Return a new string without any leading whitespace
|
||||
#! from the original string.
|
||||
dup phead blank? [ ptail string-ltrim ] when ;
|
||||
|
||||
: parse-skipwhite ( string -- string )
|
||||
dup phead blank? [
|
||||
ptail parse-skipwhite
|
||||
] [
|
||||
] ifte ;
|
||||
: sp-parser ( input parser -- result )
|
||||
#! Skip all leading whitespace from the input then call
|
||||
#! the parser on the remaining input.
|
||||
>r string-ltrim r> call ;
|
||||
|
||||
: sp ( parser -- parser )
|
||||
#! Return a parser that first skips all whitespace before
|
||||
#! parsing.
|
||||
[ ( inp parser -- result )
|
||||
>r parse-skipwhite r> call
|
||||
] curry1 ;
|
||||
#! calling the original parser.
|
||||
[ sp-parser ] cons ;
|
||||
|
||||
: just-parser ( input parser -- result )
|
||||
#! Calls the given parser on the input removes
|
||||
#! from the results anything where the remaining
|
||||
#! input to be parsed is not empty. So ensures a
|
||||
#! fully parsed input string.
|
||||
call [ car pempty? ] lsubset ;
|
||||
|
||||
: just ( parser -- parser )
|
||||
#! Return a parser that works exactly like the input parser
|
||||
#! but guarantees that the rest string is empty.
|
||||
[ ( inp parser -- result )
|
||||
call [ ( [ x | xs ] -- )
|
||||
cdr str-length 0 =
|
||||
] lsubset
|
||||
] curry1 ;
|
||||
#! Return an instance of the just-parser.
|
||||
[ just-parser ] cons ;
|
||||
|
||||
: <@ ( p f -- parser )
|
||||
#! Given a parser p and a quotation f return a parser
|
||||
#! that does the same as p but in addition applies f
|
||||
#! to the resulting parse tree.
|
||||
[ ( inp p f -- result )
|
||||
>r call r> [ ( [ x | xs ] f -- [ fx | xs ] )
|
||||
swap uncons r> swap over [ call ] [ drop ] ifte r> cons
|
||||
] curry1 lmap
|
||||
] curry2 ;
|
||||
: (<@-parser-replace) ( [[ inp result ]] quot -- [[ inp new-result ]] )
|
||||
#! Perform the result replacement step of <@-parser.
|
||||
#! Given a successfull parse result, calls the quotation
|
||||
#! with the result portion on the stack. The result of
|
||||
#! that call is then used as the new result.
|
||||
swap uncons rot call cons ;
|
||||
|
||||
: p-1 ( -- parser )
|
||||
"1" token "123" swap call lcar ;
|
||||
: <@-parser ( input parser quot -- result )
|
||||
#! Calls the parser on the input. For each successfull
|
||||
#! parse the quot is call with the parse result on the stack.
|
||||
#! The result of that quotation then becomes the new parse result.
|
||||
#! This allows modification of parse tree results (like
|
||||
#! converting strings to integers, etc).
|
||||
-rot call dup lnil? [ ( quot lnil -- )
|
||||
nip
|
||||
] [ ( quot result -- )
|
||||
[ (<@-parser-replace) ] rot swons lmap
|
||||
] ifte ;
|
||||
|
||||
: p-2 ( -- parser )
|
||||
"1" token [ str>number ] <@ "123" swap call lcar ;
|
||||
: <@ ( parser quot -- parser )
|
||||
#! Return an <@-parser.
|
||||
[ <@-parser ] cons cons ;
|
||||
|
||||
: some ( parser -- det-parser )
|
||||
#! Given a parser, return a parser that only produces the
|
||||
#! resulting parse tree of the first successful complete parse.
|
||||
[ ( inp parser -- result )
|
||||
just call lcar car
|
||||
] curry1 ;
|
||||
: some-parser ( input parser -- result )
|
||||
#! Calls the parser on the input, guarantees
|
||||
#! the parse is complete (the remaining input is empty),
|
||||
#! picks the first solution and only returns the parse
|
||||
#! tree since the remaining input is empty.
|
||||
just call lcar cdr ;
|
||||
|
||||
: delayed-parser ( [ parser ] -- parser )
|
||||
[ ( inp [ parser ] -- result )
|
||||
call call
|
||||
] curry1 ;
|
||||
: some ( parser -- deterministic-parser )
|
||||
#! Creates a 'some-parser'.
|
||||
[ some-parser ] cons ;
|
||||
|
||||
: parens ;
|
||||
: parens ( -- parser )
|
||||
#! Parse nested parentheses
|
||||
"(" token [ parens ] delayed-parser <&>
|
||||
")" token <&> [ parens ] delayed-parser <&>
|
||||
epsilon <|> ;
|
||||
|
||||
: nesting ( -- parser )
|
||||
#! Count the maximum depth of nested parentheses.
|
||||
"(" token [ nesting ] delayed-parser <&> ")" token <&>
|
||||
[ nesting ] delayed-parser <&> [ .s drop "a" ] <@ epsilon <|> ;
|
||||
: <&-parser ( input parser1 parser2 -- result )
|
||||
#! Same as <&> except discard the results of the second parser.
|
||||
<&> [ phead ] <@ call ;
|
||||
|
||||
: <& ( parser1 parser2 -- parser )
|
||||
#! Same as <&> except only return the first item in the parse tree.
|
||||
<&> [ pfirst ] <@ ;
|
||||
#! Same as <&> except discard the results of the second parser.
|
||||
[ <&-parser ] cons cons ;
|
||||
|
||||
: &>-parser ( input parser1 parser2 -- result )
|
||||
#! Same as <&> except discard the results of the first parser.
|
||||
<&> [ ptail ] <@ call ;
|
||||
|
||||
: &> ( parser1 parser2 -- parser )
|
||||
#! Same as <&> except only return the second item in the parse tree.
|
||||
<&> [ psecond ] <@ ;
|
||||
#! Same as <&> except discard the results of the first parser.
|
||||
[ &>-parser ] cons cons ;
|
||||
|
||||
: lst ( [ x [ xs ] ] -- [x:xs] )
|
||||
#! I need a good name for this word...
|
||||
dup cdr [ uncons car cons ] when unit ;
|
||||
: (a,(b,c))>((a,b,c)) ( list -- list )
|
||||
#! Convert a list where the car is a single value
|
||||
#! and the cdr is a list to a list containing a flattened
|
||||
#! list.
|
||||
uncons car cons unit ;
|
||||
|
||||
: <:&>-parser ( input parser1 parser2 -- result )
|
||||
#! Same as <&> except postprocess the result with
|
||||
#! (a,(b,c))>((a,b,c)).
|
||||
<&> [ (a,(b,c))>((a,b,c)) ] <@ call ;
|
||||
|
||||
: <:&> ( parser1 parser2 -- parser )
|
||||
#! Same as <&> except postprocess the result with
|
||||
#! (a,(b,c))>((a,b,c)).
|
||||
[ <:&>-parser ] cons cons ;
|
||||
|
||||
DEFER: <*>
|
||||
|
||||
: (<*>) ( parser -- parser )
|
||||
#! Non-delayed implementation of <*>
|
||||
dup <*> <:&> [ ] succeed <|> ;
|
||||
|
||||
: <*> ( parser -- parser )
|
||||
#! Return a parser that accepts zero or more occurences of the original
|
||||
#! parser.
|
||||
dup [ <*> ] curry1 delayed-parser <&> [ lst ] <@ [ ] succeed <|> ;
|
||||
[ (<*>) call ] cons ;
|
||||
|
||||
: (<+>) ( parser -- parser )
|
||||
#! Non-delayed implementation of <+>
|
||||
dup <*> <:&> ;
|
||||
|
||||
: <+> ( parser -- parser )
|
||||
#! Return a parser that accepts one or more occurences of the original
|
||||
#! parser.
|
||||
dup [ <*> ] curry1 delayed-parser <&> [ lst ] <@ ;
|
||||
[ (<+>) call ] cons ;
|
||||
|
||||
: (<?>) ( parser -- parser )
|
||||
#! Non-delayed implementation of <?>
|
||||
[ unit ] <@ [ ] succeed <|> ;
|
||||
|
||||
: <?> ( parser -- parser )
|
||||
#! Return a parser where its construct is optional. It may or may not occur.
|
||||
[ ] succeed <|> ;
|
||||
#! Return a parser that optionally uses the parser
|
||||
#! if that parser would be successfull.
|
||||
[ (<?>) call ] cons ;
|
||||
|
||||
: <first> ( parser -- parser )
|
||||
#! Transform a parser into a parser that only returns the first success.
|
||||
[
|
||||
call dup [ lcar lunit ] when
|
||||
] curry1 ;
|
||||
USE: prettyprint
|
||||
USE: parser
|
||||
USE: unparser
|
||||
USE: stdio
|
||||
|
||||
: <!*> ( parser -- parser )
|
||||
#! Version of <*> that only returns the first success.
|
||||
<*> <first> ;
|
||||
! Testing <&>
|
||||
: test1 "abcd" "a" token "b" token <&> call [ . ] leach ;
|
||||
: test1a "abcd" "a" token "b" token <&> "c" token <&> call [ . ] leach ;
|
||||
: test1b "abcd" "a" token "b" token "c" token <&> <&> call [ . ] leach ;
|
||||
: test2 "decd" "a" token "b" token <&> call [ . ] leach ;
|
||||
: test3 "dbcd" "a" token "b" token <&> call [ . ] leach ;
|
||||
: test4 "adcd" "a" token "b" token <&> call [ . ] leach ;
|
||||
|
||||
: <!+> ( parser -- parser )
|
||||
#! Version of <+> that only returns the first success.
|
||||
<+> <first> ;
|
||||
! Testing <|>
|
||||
: test5 "abcd" "a" token "b" token <|> call [ . ] leach ;
|
||||
: test6 "bbcd" "a" token "b" token <|> call [ . ] leach ;
|
||||
: test7 "cbcd" "a" token "b" token <|> call [ . ] leach ;
|
||||
|
||||
: ab-test
|
||||
"a" token <*> "b" token <&> "aaaaab" swap call [ . ] leach ;
|
||||
! Testing sp
|
||||
: test8 " abcd" "a" token call [ . ] leach ;
|
||||
: test9 " abcd" "a" token sp call [ . ] leach ;
|
||||
|
||||
: ab-test2
|
||||
"a" token <*> "b" token <&> [ "a" "a" "a" "b" ] swap call [ . ] leach ;
|
||||
! Testing just
|
||||
: test10 "abcd" "abcd" token "abc" token <|> call [ . ] leach ;
|
||||
: test11 "abcd" "abcd" token "abc" token <|> just call [ . ] leach ;
|
||||
|
||||
: a "a" token "a" token <&> epsilon <|> ;
|
||||
: b "b" token epsilon <|> ;
|
||||
: c "c" token "c" token <&> ;
|
||||
: d "d" token "d" token <&> ;
|
||||
: count-a "a" token [ count-a ] delayed-parser &> "b" token <& [ 1 + ] <@ 0 succeed <|> ;
|
||||
: tca "aaabbb" count-a call [ . ] leach ;
|
||||
! Testing <@
|
||||
: test12 "01234" [ digit? ] satisfy call [ . ] leach ;
|
||||
: test13 "01234" [ digit? ] satisfy [ digit> ] <@ call [ . ] leach ;
|
||||
|
||||
: parse-digit ( -- parser )
|
||||
#! Return a parser for digits
|
||||
[ digit? ] satisfy [ CHAR: 0 - ] <@ ;
|
||||
! Testing some
|
||||
: test14 "begin1" "begin" token call [ . ] leach ;
|
||||
: test15 "This should fail with an error" print
|
||||
"begin1" "begin" token some call . ;
|
||||
: test16 "begin" "begin" token some call . ;
|
||||
|
||||
: (reduce) ( start quot list -- value )
|
||||
#! Call quot with start and the first value in the list.
|
||||
#! quot is then called with the result of quot and the
|
||||
#! next item in the list until the list is exhausted.
|
||||
uncons >r swap dup swap r> call r> r> dup [
|
||||
(reduce)
|
||||
] [
|
||||
2drop
|
||||
] ifte ;
|
||||
! parens test function
|
||||
: parens ( -- parser )
|
||||
#! Return a parser that parses nested parentheses.
|
||||
[ "(" token parens <&> ")" token <&> parens <&> epsilon <|> call ] ;
|
||||
|
||||
: reduce ( list start quot -- value )
|
||||
#! Call quot with start and the first value in the list.
|
||||
#! quot is then called with the result of quot and the
|
||||
#! next item in the list until the list is exhausted.
|
||||
rot (reduce) ;
|
||||
: test17 "" parens call [ . ] leach ;
|
||||
: test18 "()" parens call [ . ] leach ;
|
||||
: test19 "((()))" parens call [ . ] leach ;
|
||||
|
||||
: natural ( -- parser )
|
||||
#! a parser for natural numbers.
|
||||
parse-digit <*> [ car 0 [ swap 10 * + ] reduce unit ] <@ ;
|
||||
! <& parser and &> parser
|
||||
: test20 "abcd" "a" token "b" token <&> call [ . ] leach ;
|
||||
: test21 "abcd" "a" token "b" token <& call [ . ] leach ;
|
||||
: test22 "abcd" "a" token "b" token &> call [ . ] leach ;
|
||||
|
||||
: natural2 ( -- parser )
|
||||
#! a parser for natural numbers.
|
||||
parse-digit <!+> [ car 0 [ swap 10 * + ] reduce unit ] <@ ;
|
||||
! nesting example
|
||||
: parens-open "(" token ;
|
||||
: parens-close ")" token ;
|
||||
: nesting
|
||||
[ parens-open
|
||||
nesting &>
|
||||
parens-close <&
|
||||
nesting <&>
|
||||
[ unswons 1 + max ] <@
|
||||
0 succeed <|>
|
||||
call ] ;
|
||||
|
||||
: integer ( -- parser )
|
||||
#! A parser that can parser possible negative numbers.
|
||||
"-" token <?> [ drop -1 ] <@ natural2 <&> [ 1 [ * ] reduce ] <@ ;
|
||||
: test23 "" nesting just call [ . ] leach ;
|
||||
: test24 "()" nesting just call [ . ] leach ;
|
||||
: test25 "(())" nesting just call [ . ] leach ;
|
||||
: test26 "()(()(()()))()" nesting just call [ . ] leach ;
|
||||
|
||||
: identifier ( -- parser )
|
||||
#! Parse identifiers
|
||||
[ letter? ] satisfy <+> [ car cat ] <@ ;
|
||||
! Testing <*> and <:&>
|
||||
: test27 "1234" "1" token <*> call [ . ] leach ;
|
||||
: test28 "1111234" "1" token <*> call [ . ] leach ;
|
||||
: test28a "1111234" "1" token <*> [ car cat unit ] <@ call [ . ] leach ;
|
||||
: test29 "234" "1" token <*> call [ . ] leach ;
|
||||
: pdigit [ digit? ] satisfy [ digit> ] <@ ;
|
||||
: pnatural pdigit <*> ;
|
||||
: pnatural2 pnatural [ car [ >digit ] map cat dup pempty? [ drop 0 ] [ str>number ] ifte unit ] <@ ;
|
||||
: test30 "12345" pnatural2 call [ . ] leach ;
|
||||
|
||||
: identifier2 ( -- parser )
|
||||
#! Parse identifiers
|
||||
[ letter? ] satisfy <!+> [ car cat ] <@ ;
|
||||
! Testing <+>
|
||||
: test31 "1234" "1" token <+> call [ . ] leach ;
|
||||
: test32 "1111234" "1" token <+> call [ . ] leach ;
|
||||
: test33 "234" "1" token <+> call [ . ] leach ;
|
||||
|
||||
: ints ( -- parser )
|
||||
integer "+" token [ drop [ [ + ] ] ] <@ <&>
|
||||
integer <&> [ call swap call ] <@ ;
|
||||
|
||||
: url-quotable ( -- parser )
|
||||
! [a-zA-Z0-9/_?] re-matches
|
||||
[ letter? ] satisfy
|
||||
[ LETTER? ] satisfy <|>
|
||||
[ digit? ] satisfy <|>
|
||||
CHAR: / symbol <|>
|
||||
CHAR: _ symbol <|>
|
||||
CHAR: ? symbol <|> just ;
|
||||
|
||||
: http-header ( -- parser )
|
||||
[ CHAR: : = not ] satisfy <!+> [ car cat ] <@
|
||||
": " token [ drop f ] <@ <&>
|
||||
[ drop t ] satisfy <!+> [ car cat ] <@ <&> just ;
|
||||
|
||||
: parse-http-header ( string -- [ name value ] )
|
||||
http-header call lcar car ;
|
||||
|
||||
: get-request ( -- parser )
|
||||
"GET" token
|
||||
[ drop t ] satisfy <!+> sp [ car cat ] <@ <&> ;
|
||||
|
||||
: post-request ( -- parser )
|
||||
"POST" token
|
||||
[ drop t ] satisfy <!+> sp [ car cat ] <@ <&> ;
|
||||
|
||||
: all-request ( -- parser )
|
||||
"POST" token
|
||||
[ 32 = not ] satisfy <!+> sp [ car cat ] <@ <&>
|
||||
"HTTP/1.0" token sp <&> ;
|
||||
|
||||
: split-url ( -- parser )
|
||||
"http://" token
|
||||
[ CHAR: / = not ] satisfy <!*> [ car cat ] <@ <&>
|
||||
"/" token <&>
|
||||
[ drop t ] satisfy <!*> [ car cat ] <@ <&> ;
|
||||
! Testing <?>
|
||||
: test34 "ab" "a" token pdigit <?> <&> "b" token <&> call [ . ] leach ;
|
||||
: test35 "ac" "a" token pdigit <?> <&> "b" token <&> call [ . ] leach ;
|
||||
: test36 "a5b" "a" token pdigit <?> <&> "b" token <&> call [ . ] leach ;
|
||||
: pinteger "-" token <?> pnatural2 <&> [ uncons swap [ car -1 * ] when ] <@ ;
|
||||
: test37 "123" pinteger call [ . ] leach ;
|
||||
: test38 "-123" pinteger call [ . ] leach ;
|
||||
|
||||
|
|
|
@ -16,17 +16,17 @@
|
|||
operation.</p>
|
||||
<p>The result returned by a parser is known as a 'list of
|
||||
successes'. It is a lazy list of standard Factor cons cells. Each cons
|
||||
cell is a result of a parse. The car of the cell is the result of the
|
||||
parse operation and the cdr of the cell is the remaining input left to
|
||||
be parsed.</p>
|
||||
<p>A list is used for the result as a parse operation can potentially
|
||||
cell is a result of a parse. The car of the cell is the remaining
|
||||
input left to be parsed and the cdr of the cell is the result of the
|
||||
parsing operation.</p>
|
||||
<p>A lazy list is used for the result as a parse operation can potentially
|
||||
return many successful results. For example, a parser that parses one
|
||||
or more digits will return more than one result for the input "123". A
|
||||
successful parse could be "1", "12" or "123".</p>
|
||||
<p>The list is lazy so if only one parse result is required the
|
||||
remaining results won't actually be processed if they are not
|
||||
requested. This improves efficiency.</p>
|
||||
<p>The car of the result pair can be any value that the parser wishes
|
||||
<p>The cdr of the result pair can be any value that the parser wishes
|
||||
to return. It could be the successful portion of the input string
|
||||
parsed, an abstract syntax tree representing the parsed input, or even
|
||||
a quotation that should get called for later processing.</p>
|
||||
|
@ -39,32 +39,32 @@ general parsers work and the 'list of sucesses' that are returned as a
|
|||
result.</p>
|
||||
<pre class="code">
|
||||
(1) : char-a ( inp -- result )
|
||||
0 over str-nth CHAR: a = [
|
||||
1 str-tail CHAR: a swons lunit
|
||||
0 over string-nth CHAR: a = [
|
||||
1 swap string-tail CHAR: a cons unit delay lunit
|
||||
] [
|
||||
drop f
|
||||
drop lnil
|
||||
] ifte ;
|
||||
(2) "atest" char-a [ [ . ] leach ] when*
|
||||
=> [ 97 | "test" ]
|
||||
=> [[ "test" 97 ]]
|
||||
(3) "test" char-a [ [ . ] leach ] when*
|
||||
=>
|
||||
=>
|
||||
</pre>
|
||||
<p>'char-a' is a parser that only accepts the character 'a' in the
|
||||
input string. When passed an input string with a string with a leading
|
||||
'a' then the 'list of successes' has 1 result value. The car of that
|
||||
result value is the character 'a' successfully parsed, and the cdr is
|
||||
'a' then the 'list of successes' has 1 result value. The cdr of that
|
||||
result value is the character 'a' successfully parsed, and the car is
|
||||
the remaining input string. On failure of the parse an empty list is
|
||||
returned.</p>
|
||||
<p>The parser combinator library provides a combinator, <&>, that takes
|
||||
<p>The parser combinator library provides a combinator, <&>, that takes
|
||||
two parsers off the stack and returns a parser that calls the original
|
||||
two in sequence. An example of use would be calling 'char-a' twice,
|
||||
which would then result in an input string expected with two 'a'
|
||||
characters leading:</p>
|
||||
<pre class="code">
|
||||
(1) "aatest" [ char-a ] [ char-a ] <&> call
|
||||
=> < list of successes >
|
||||
(1) "aatest" [ char-a ] [ char-a ] <&> call
|
||||
=> < list of successes >
|
||||
(2) [ . ] leach
|
||||
=> [ [ 97 97 ] | "test" ]
|
||||
=> [[ "test" [[ 97 97 ]] ]]
|
||||
</pre>
|
||||
<h2>Tokens</h2>
|
||||
<p>Creating parsers for specfic characters and tokens can be a chore
|
||||
|
@ -72,13 +72,13 @@ so there is a word that, given a string token on the stack, returns
|
|||
a parser that parses that particular token:</p>
|
||||
<pre class="code">
|
||||
(1) "begin" token
|
||||
=> < a parser that parses the token "begin" >
|
||||
(2) dup "this should fail" swap call .
|
||||
=> f
|
||||
=> < a parser that parses the token "begin" >
|
||||
(2) dup "this should fail" swap call lnil? .
|
||||
=> t
|
||||
(3) "begin a successfull parse" swap call
|
||||
=> < lazy list >
|
||||
=> < lazy list >
|
||||
(4) [ . ] leach
|
||||
=> [ "begin" | " a successfull parse" ]
|
||||
=> [[ " a successfull parse" "begin" ]]
|
||||
</pre>
|
||||
<h2>Predicate matching</h2>
|
||||
<p>The word 'satisfy' takes a quotation from the top of the stack and
|
||||
|
@ -89,9 +89,9 @@ true then the parse is successful, otherwise it fails:</p>
|
|||
(1) : digit-parser ( -- parser )
|
||||
[ digit? ] satisfy ;
|
||||
(2) "5" digit-parser call [ . ] leach
|
||||
=> [ 53 | "" ]
|
||||
(3) "a" digit-parser call
|
||||
=> f
|
||||
=> [[ "" 53 ]]
|
||||
(3) "a" digit-parser call lnil? .
|
||||
=> t
|
||||
</pre>
|
||||
<p>Note that 'digit-parser' returns a parser, it is not the parser
|
||||
itself. It is really a parser generating word like 'token'. Whereas
|
||||
|
@ -105,14 +105,14 @@ parser parsed. The result of the '<*>' generated parser will be a list
|
|||
of the successful results returned by the original parser.</p>
|
||||
<pre class="code">
|
||||
(1) digit-parser <*>
|
||||
=> < parser >
|
||||
=> < parser >
|
||||
(2) "123" swap call
|
||||
=> < lazy list >
|
||||
=> < lazy list >
|
||||
(3) [ . ] leach
|
||||
=> [ [ [ 49 50 51 ] ] | "" ]
|
||||
[ [ [ 49 50 ] ] | "3" ]
|
||||
[ [ [ 49 ] ] | "23" ]
|
||||
[ f | "123" ]
|
||||
=> [ "" [ 49 50 51 ] ]
|
||||
[ "3" [ 49 50 ] ]
|
||||
[ "23" [ 49 ] ]
|
||||
[ "123" ]
|
||||
</pre>
|
||||
<p>In this case there are multiple successful parses. This is because
|
||||
the occurrence of zero or more digits happens more than once. There is
|
||||
|
@ -124,7 +124,7 @@ remaining parse results are never produced.</p>
|
|||
parsed. Sometimes you want this to be something else, like an abstract
|
||||
syntax tree, or some calculation. For the digit case we may want the
|
||||
actual integer number.</p>
|
||||
<p>For this we can use the '<@' parser
|
||||
<p>For this we can use the '<@' parser
|
||||
combinator. This combinator takes a parser and a quotation on the
|
||||
stack and returns a new parser. When the new parser is called it will
|
||||
call the original parser to produce the results, then it will call the
|
||||
|
@ -132,27 +132,48 @@ quotation on each successfull result, and the result of that quotation
|
|||
will be the result of the parse:</p>
|
||||
<pre class="code">
|
||||
(1) : digit-parser2 ( -- parser )
|
||||
[ digit? ] satisfy [ CHAR: 0 - ] <@ ;
|
||||
[ digit? ] satisfy [ digit> ] <@ ;
|
||||
(2) "5" digit-parser2 call [ . ] leach
|
||||
=> [ 5 | "" ]
|
||||
=> [[ "" 5 ]]
|
||||
</pre>
|
||||
<p>Notice that now the result is the actual integer '5' rather than
|
||||
character code '53'.</p>
|
||||
<pre class="code">
|
||||
(1) : natural-parser ( -- parser )
|
||||
digit-parser2 <*> [ car 0 [ swap 10 * + ] reduce unit ] <@ ;
|
||||
(2) "123" natural-parser call
|
||||
=> < lazy list >
|
||||
(3) [ . ] leach
|
||||
=> [ [ 123 ] | "" ]
|
||||
[ [ 12 ] | "3" ]
|
||||
[ [ 1 ] | "23" ]
|
||||
[ f | "123" ]
|
||||
(1) : digit-list>number ( list -- number )
|
||||
#! Converts a list of digits to a number
|
||||
[ >digit ] map cat dup string-length 0 = [
|
||||
drop 0
|
||||
] [
|
||||
str>number
|
||||
] ifte ;
|
||||
(2) : natural-parser ( -- parser )
|
||||
digit-parser2 <*> [ car digit-list>number unit ] <@ ;
|
||||
(3) "123" natural-parser call
|
||||
=> < lazy list >
|
||||
(4) [ . ] leach
|
||||
=> [ "" 123 ]
|
||||
[ "3" 12 ]
|
||||
[ "23" 1 ]
|
||||
[ "123" 0 ]
|
||||
[ [ 123 ] | "" ]
|
||||
</pre>
|
||||
<p>The number parsed is the actual integer number due to the operation
|
||||
of the '<@' word. This allows parsers to not only parse the input
|
||||
of the '<@' word. This allows parsers to not only parse the input
|
||||
string but perform operations and transformations on the syntax tree
|
||||
returned.</p>
|
||||
<p>A useful debugging method to work out what to use in the quotation
|
||||
passed to <@ is to write an initial version of the parser that just
|
||||
displays the topmost item on the stack:</p>
|
||||
<pre class="code">
|
||||
(1) : natural-parser-debug ( -- parser )
|
||||
digit-parser2 <*> [ "debug: " write dup . ] <@ ;
|
||||
(3) "123" natural-parser-debug call lcar .
|
||||
=> debug: [ [ 1 2 3 ] ]
|
||||
[ "" [ 1 2 3 ] ]
|
||||
</pre>
|
||||
<p>From the debug output we can see how to manipulate the result to
|
||||
get what we want. In this case it's the quotation in the previous example.</p>
|
||||
|
||||
<h2>Sequential combinator</h2>
|
||||
<p>To create a full grammar we need a parser combinator that does
|
||||
sequential compositions. That is, given two parsers, the sequential
|
||||
|
@ -160,41 +181,101 @@ combinator will first run the first parser, and then run the second on
|
|||
the remaining text to be parsed. As the first parser returns a lazy
|
||||
list, the second parser will be run on each item of the lazy list. Of
|
||||
course this is done lazily so it only ends up being done when those
|
||||
list items are requested. The sequential combinator word is <&>.</p>
|
||||
list items are requested. The sequential combinator word is <&>.</p>
|
||||
<pre class="code">
|
||||
( 1 ) "number:" token
|
||||
=> < parser that parses the text 'number:' >
|
||||
( 2 ) natural
|
||||
=> < parser that parses natural numbers >
|
||||
( 3 ) <&>
|
||||
=> < parser that parses 'number:' followed by a natural >
|
||||
( 4 ) "number:1000" swap call
|
||||
=> < list of successes >
|
||||
=> < parser that parses the text 'number:' >
|
||||
( 2 ) natural-parser
|
||||
=> < parser that parses natural numbers >
|
||||
( 3 ) <&>
|
||||
=> < parser that parses 'number:' followed by a natural >
|
||||
( 4 ) "number:100" swap call
|
||||
=> < list of successes >
|
||||
( 5 ) [ . ] leach
|
||||
=> [ [ "number:" 1000 ] | "" ]
|
||||
[ [ "number:" 100 ] | "0" ]
|
||||
[ [ "number:" 10 ] | "00" ]
|
||||
[ [ "number:" 1 ] | "000" ]
|
||||
[ [ "number:" ] | "1000" ]
|
||||
=> [ "" "number:" 100 ]
|
||||
[ "0" "number:" 10 ]
|
||||
[ "00" "number:" 1 ]
|
||||
[ "100" "number:" 0 ]
|
||||
</pre>
|
||||
<p>In this example we might prefer not to have the parse result
|
||||
contain the token, we want just the number. Two alternatives to <&>
|
||||
provide the ability to select which result to use from the two
|
||||
parsers. These operators are <& and &>. The < or > points
|
||||
in the direction of which parser to retain the results from. So our
|
||||
example above could be:</p>
|
||||
<pre class="code">
|
||||
( 1 ) "number:" token
|
||||
=> < parser that parses the text 'number:' >
|
||||
( 2 ) natural-parser
|
||||
=> < parser that parses natural numbers >
|
||||
( 3 ) &>
|
||||
=> < parser that parses 'number:' followed by a natural >
|
||||
( 4 ) "number:100" swap call
|
||||
=> < list of successes >
|
||||
( 5 ) [ . ] leach
|
||||
=> [ "" 100 ]
|
||||
[ "0" 10 ]
|
||||
[ "00" 1 ]
|
||||
[ "100" 0 ]
|
||||
</pre>
|
||||
<p>Notice how the parse result only contains the number due to &>
|
||||
being used to retain the result of the second parser.</p>
|
||||
|
||||
<h2>Choice combinator</h2>
|
||||
<p>As well as a sequential combinator we need an alternative
|
||||
combinator. The word for this is <|>. It takes two parsers from the
|
||||
combinator. The word for this is <|>. It takes two parsers from the
|
||||
stack and returns a parser that will first try the first parser. If it
|
||||
succeeds then the result for that is returned. If it fails then the
|
||||
second parser is tried and its result returned.</p>
|
||||
<pre class="code">
|
||||
( 1 ) "one" token
|
||||
=> < parser that parses the text 'one' >
|
||||
=> < parser that parses the text 'one' >
|
||||
( 2 ) "two" token
|
||||
=> < parser that parses the text 'two' >
|
||||
( 3 ) <|>
|
||||
=> < parser that parses 'one' or 'two' >
|
||||
=> < parser that parses the text 'two' >
|
||||
( 3 ) <|>
|
||||
=> < parser that parses 'one' or 'two' >
|
||||
( 4 ) "one" over call [ . ] leach
|
||||
=> [ "one" | "" ]
|
||||
=> [[ "" "one" ]]
|
||||
( 5 ) "two" swap call [ . ] leach
|
||||
=> [ "two" | "" ]
|
||||
=> [[ "" "two" ]]
|
||||
</pre>
|
||||
|
||||
<h2>Option combinator</h2>
|
||||
<p>The option combinator, <?> allows adding optional elements to
|
||||
a parser. It takes one parser off the stack and if the parse succeeds
|
||||
add it to the result tree, otherwise it will ignore it and
|
||||
continue. The example below extends our natural-parser to parse
|
||||
integers with an optional leading minus sign.</p>
|
||||
<pre class="code">
|
||||
( 1 ) : integer-parser
|
||||
"-" token <?> natural-parser <&> ;
|
||||
( 2 ) "200" integer-parser call [ . ] leach
|
||||
=> [ "" [ ] 200 ]
|
||||
[ "0" [ ] 20 ]
|
||||
[ "00" [ ] 2 ]
|
||||
[ "200" [ ] 0 ]
|
||||
( 3 ) "-200" integer-parser call [ . ] leach
|
||||
=> [ "" [ "-" ] 200 ]
|
||||
[ "0" [ "-" ] 20 ]
|
||||
[ "00" [ "-" ] 2 ]
|
||||
[ "200" [ "-" ] 0 ]
|
||||
[ "-200" [ ] 0 ]
|
||||
( 4 ) : integer-parser2
|
||||
integer-parser [ uncons swap [ car -1 * ] when ] <@ ;
|
||||
( 5 ) "200" integer-parser2 call [ . ] leach
|
||||
=> [ "" 200 ]
|
||||
[ "0" 20 ]
|
||||
[ "00" 2 ]
|
||||
[ "200" 0 ]
|
||||
( 6 ) "-200" integer-parser2 call [ . ] leach
|
||||
=> [ "" -200 ]
|
||||
[ "0" -20 ]
|
||||
[ "00" -2 ]
|
||||
[ "200" 0 ]
|
||||
[ "-200" 0 ]
|
||||
|
||||
</pre>
|
||||
|
||||
<h2>Skipping Whitespace</h2>
|
||||
<p>A parser transformer exists, the word 'sp', that takes an existing
|
||||
parser and returns a new one that will first skip any whitespace
|
||||
|
@ -202,59 +283,54 @@ before calling the original parser. This makes it easy to write
|
|||
grammers that avoid whitespace without having to explicitly code it
|
||||
into the grammar.</p>
|
||||
<pre class="code">
|
||||
( 1 ) natural
|
||||
=> < a parser for natural numbers >
|
||||
( 2 ) "+" token sp
|
||||
=> < parser for '+' which ignores leading whitespace >
|
||||
( 3 ) over sp
|
||||
=> < a parser for natural numbers skipping leading whitespace >
|
||||
( 4 ) <&> <&>
|
||||
=> < a parser for natural + natural >
|
||||
( 5 ) "1 + 2" over call lcar .
|
||||
=> [ [ 1 "+" 2 ] | "" ]
|
||||
( 6 ) "3+4" over call lcar .
|
||||
=> [ [ 3 "+" 4 ] | "" ]
|
||||
( 1 ) " 123" natural-parser call [ . ] leach
|
||||
=> [ " 123" 0 ]
|
||||
( 2 ) " 123" natural-parser sp call [ . ] leach
|
||||
=> [ "" 123 ]
|
||||
[ "3" 12 ]
|
||||
[ "23" 1 ]
|
||||
[ "123" 0 ]
|
||||
</pre>
|
||||
<h2>Eval grammar example</h2>
|
||||
<p>This example presents a simple grammar that will parse a number
|
||||
followed by an operator and another number. A factor expression that
|
||||
computes the entered value will be executed.</p>
|
||||
<pre class="code">
|
||||
( 1 ) natural
|
||||
=> < a parser for natural numbers >
|
||||
( 2 ) "/" token "*" token "+" token "-" token <|> <|> <|>
|
||||
=> < a parser for the operator >
|
||||
( 3 ) sp [ unit [ eval ] append unit ] <@
|
||||
=> < operator parser that skips whitespace and converts to a
|
||||
( 1 ) natural-parser
|
||||
=> < a parser for natural numbers >
|
||||
( 2 ) "/" token "*" token "+" token "-" token <|> <|> <|>
|
||||
=> < a parser for the operator >
|
||||
( 3 ) sp [ "\\ " swap cat2 eval unit ] <@
|
||||
=> < operator parser that skips whitespace and converts to a
|
||||
factor expression >
|
||||
( 4 ) natural sp
|
||||
=> < a whitespace skipping natural parser >
|
||||
( 5 ) <&> <&> [ call swap call ] <@
|
||||
=> < a parser that parsers the expression, converts it to
|
||||
( 4 ) natural-parser sp
|
||||
=> < a whitespace skipping natural parser >
|
||||
( 5 ) <&> <&> [ uncons uncons swap append append call ] <@
|
||||
=> < a parser that parsers the expression, converts it to
|
||||
factor, calls it and puts the result in the parse tree >
|
||||
( 6 ) "123 + 456" over call lcar .
|
||||
=> [ 579 | "" ]
|
||||
=> [[ "" 579 ]]
|
||||
( 7 ) "300-100" over call lcar .
|
||||
=> [ 200 | "" ]
|
||||
=> [[ "" 200 ]]
|
||||
( 8 ) "200/2" over call lcar .
|
||||
=> [ 100 | "" ]
|
||||
=> [[ "" 100 ]]
|
||||
</pre>
|
||||
<p>It looks complicated when expanded as above but the entire parser,
|
||||
factored a little, looks quite readable:</p>
|
||||
<pre class="code">
|
||||
( 1 ) : operator ( -- parser )
|
||||
"/" token
|
||||
"*" token <|>
|
||||
"+" token <|>
|
||||
"-" token <|>
|
||||
[ unit [ eval ] append unit ] <@ ;
|
||||
"*" token <|>
|
||||
"+" token <|>
|
||||
"-" token <|>
|
||||
[ "\\ " swap cat2 eval unit ] <@ ;
|
||||
( 2 ) : expression ( -- parser )
|
||||
natural
|
||||
operator sp <&>
|
||||
natural sp <&>
|
||||
[ call swap call ] <@ ;
|
||||
natural-parser
|
||||
operator sp <&>
|
||||
natural-parser sp <&>
|
||||
[ uncons swap uncons -rot append append reverse call ] <@ ;
|
||||
( 3 ) "40+2" expression call lcar .
|
||||
=> [ 42 | "" ]
|
||||
=> [[ "" 42 ]]
|
||||
</pre>
|
||||
<p class="footer">
|
||||
News and updates to this software can be obtained from the authors
|
||||
|
|
Loading…
Reference in New Issue