diff --git a/contrib/parser-combinators/lazy-examples.factor b/contrib/parser-combinators/lazy-examples.factor new file mode 100644 index 0000000000..5cb82e4b0a --- /dev/null +++ b/contrib/parser-combinators/lazy-examples.factor @@ -0,0 +1,63 @@ +! Copyright (C) 2004 Chris Double. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! 1. Redistributions of source code must retain the above copyright notice, +! this list of conditions and the following disclaimer. +! +! 2. Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +! FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +! OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +IN: lazy-examples +USE: lazy +USE: stack +USE: arithmetic +USE: lists +USE: combinators +USE: kernel +USE: logic + +: lfrom ( n -- llist ) + #! Return a lazy list of increasing numbers starting + #! from the initial value 'n'. + dup [ succ lfrom ] curry1 lcons ; + +: lfrom-by ( n quot -- llist ) + #! Return a lazy list of values starting from n, with + #! each successive value being the result of applying quot to + #! n. + dupd [ dup [ call ] dip lfrom-by ] curry2 lcons ; + +: lnaturals 0 lfrom ; +: lpositves 1 lfrom ; +: levens 0 [ 2 + ] lfrom-by ; +: lodds 1 lfrom [ 2 mod 1 = ] lsubset ; +: lpowers-of-2 1 [ 2 * ] lfrom-by ; +: lones 1 [ ] lfrom-by ; +: lsquares lnaturals [ dup * ] lmap ; +: first-five-squares 5 lsquares ltake ; + +: divisible-by? ( a b -- bool ) + #! Return true if a is divisible by b + mod 0 = ; + +: sieve ( llist - llist ) + #! Given a lazy list of numbers, use the sieve of eratosthenes + #! algorithm to return a lazy list of primes. + luncons over [ divisible-by? not ] curry1 lsubset [ sieve ] curry1 lcons ; + +: lprimes 2 lfrom sieve ; + +: first-ten-primes 10 lprimes ltake ; \ No newline at end of file diff --git a/contrib/parser-combinators/lazy.factor b/contrib/parser-combinators/lazy.factor new file mode 100644 index 0000000000..696e6d4529 --- /dev/null +++ b/contrib/parser-combinators/lazy.factor @@ -0,0 +1,172 @@ +! Copyright (C) 2004 Chris Double. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! 1. Redistributions of source code must retain the above copyright notice, +! this list of conditions and the following disclaimer. +! +! 2. Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +! FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +! OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +IN: lazy +USE: lists +USE: stack +USE: arithmetic +USE: stdio +USE: prettyprint +USE: kernel +USE: combinators +USE: logic + +: curry1 ( n quot -- quot ) + #! Return a quotation that when called will initially + #! have 'n' pushed on the stack. + cons ; + +: curry2 ( n1 n2 quot -- quot ) + #! Return a quotation that when called will initially + #! have 'n1' and 'n2' pushed on the stack. + cons cons ; + +: delay ( value -- promise ) + #! Return a promise that when 'forced' returns the original value. + unit ; + +: force ( promise -- value ) + #! Return the value associated with the promise. + call ; + +: lcons ( car promise -- lcons ) + #! Return a lazy pair, where the cdr is a promise and must + #! be forced to return the value. + cons ; + +: lunit ( a -- llist ) + #! Construct a lazy list of one element. + [ ] delay lcons ; + +: lcar ( lcons -- car ) + #! Return the car of a lazy pair. + car ; + +: lcdr ( lcons -- cdr ) + #! Return the cdr of a lazy pair, implicitly forcing it. + cdr force ; + +: lnth ( n llist -- value ) + #! Return the nth item in a lazy list + swap [ lcdr ] times lcar ; + +: luncons ( lcons -- car cdr ) + #! Return the car and forced cdr of the lazy cons. + uncons force ; + +: (ltake) ( n llist accum -- list ) + >r >r pred dup 0 < [ + drop r> drop r> nreverse + ] [ + r> luncons swap r> cons (ltake) + ] ifte ; + +: ltake ( n llist -- list ) + #! Return a list containing the first n items from + #! the lazy list. + [ ] (ltake) ; + +: lmap ( llist quot -- llist ) + #! Return a lazy list containing the collected result of calling + #! quot on the original lazy list. + over [ ] = [ + 2drop [ ] + ] [ + [ luncons ] dip + dup swapd + [ lmap ] curry2 + [ call ] dip + lcons + ] ifte ; + +: lsubset ( llist pred -- llist ) + #! Return a lazy list containing only the items from the original + #! lazy list for which the predicate returns a value other than f. + over [ ] = [ + 2drop [ ] + ] [ + [ luncons ] dip + dup swapd + [ lsubset ] curry2 + -rot dupd call [ + swap lcons + ] [ + drop call + ] ifte + ] ifte ; + +: lappend* ; +: (lappend*) ; +: lappend-list* ; + +: lappend-item* ( llists list item -- llist ) + -rot [ lappend-list* ] curry2 lcons ; + +: lappend-list* ( llists list -- llist ) + dup [ + #! non-empty list + luncons swap lappend-item* + ] [ + #! empty list + drop lappend* + ] ifte ; + + +: (lappend*) ( llists -- llist ) + dup lcar [ ( llists ) + #! Yes, the first item in the list is a valid llist + luncons swap lappend-list* + ] [ + #! The first item in the list is an empty list. + #! Resume passing the next list. + lcdr lappend* + ] ifte ; + +: lappend* ( llists -- llist ) + #! Given a lazy list of lazy lists, return a lazy list that + #! works through all of the sub-lists in sequence. + dup [ + (lappend*) + ] [ + #! Leave empty list on the stack + ] ifte ; + +: list>llist ( list -- llist ) + #! Convert a list to a lazy list. + dup [ + uncons [ list>llist ] curry1 lcons + ] when ; + +: lappend ( llist1 llist2 -- llist ) + #! Concatenate two lazy lists such that they appear to be one big lazy list. + 2list list>llist lappend* ; + +: leach ( llist quot -- ) + #! Call the quotation on each item in the lazy list. + #! Warning: If the list is infinite then this will + #! never return. + over [ + >r luncons r> tuck >r >r call r> r> leach + ] [ + 2drop + ] ifte ; + diff --git a/contrib/parser-combinators/lazy.html b/contrib/parser-combinators/lazy.html new file mode 100644 index 0000000000..97e9c1faee --- /dev/null +++ b/contrib/parser-combinators/lazy.html @@ -0,0 +1,280 @@ + +
+The 'lazy' vocabulary adds lazy lists to Factor. This provides the + ability to describe infinite structures, and to delay execution of + expressions until they are actually used.
+Lazy lists, like normal lists, are composed of a head and tail. In + a lazy list the tail is something called a 'promise'. To convert a + 'promise' into its actual value a word called 'force' is used. To + convert a value into a 'promise' the word to use is 'delay'.
+Many of the lazy list words are named similar to the standard list + words but with an 'l' suffixed to it. Here are the commonly used + words and their equivalent list operation:
+Lazy List | Normal List |
---|---|
lcons | cons |
lunit | unit |
lcar | car |
lcdr | cdr |
lnth | nth |
luncons | uncons |
lmap | map |
lsubset | subset |
leach | each |
lappend | append |
A few additional words specific to lazy lists are:
+ltake | Returns a normal list containing a specified +number of items from the lazy list. |
lappend* | Given a lazy list of lazy lists, +concatenate them together in a lazy manner, returning a single lazy +list. |
list>llist | Given a normal list, return a lazy list +that contains the same elements as the normal list. |
A couple of helper functions are also provided by the lazy +vocabulary.
+curry1 | Given a value and a quotation, returns a new +quotation that when called will have the value on the stack. |
curry2 | Given two values and a quotation, returns a new +quotation that when called will have the two values on the +stack. |
Provides the same effect as 'cons' does for normal lists. It +creates a cons cell where the first element is the value given and the +second element is a promise.
+ +A promise is either a value that has had 'force' called on it, or +a quotation that when 'call' is applied to it, returns the actual +value.
++ ( 1 ) 5 6 delay lcons dup . + => [ 5 6 ] + ( 2 ) dup lcar . + => 5 + ( 3 ) dup lcdr . + => 6 ++ + + +
Provides the same effect as 'unit' does for normal lists. It +creates a lazy list where the first element is the value given.
++ ( 1 ) 42 lunit dup . + => [ 42 f ] + ( 2 ) dup lcar . + => 42 + ( 3 ) dup lcdr . + => f + ( 4 ) [ . ] leach + => 42 ++ + + +
Provides the same effect as 'car' does for normal lists. It +returns the first element in a lazy cons cell.
++ ( 1 ) 42 lunit dup . + => [ 42 f ] + ( 2 ) lcar . + => 42 ++ + + +
Provides the same effect as 'cdr' does for normal lists. It +returns the second element in a lazy cons cell and forces it. This +causes that element to be evaluated immediately.
++ ( 1 ) 5 [ 5 6 + ] lcons dup . + => [ 5 5 6 + ] + ( 2 ) lcdr . + => 11 ++ +
+ ( 1 ) 5 lfrom dup . + => [ 5 5 succ lfrom ] + ( 2 ) lcdr dup lcar . + => 6 + ( 3 ) lcdr dup lcar . + => 7 + ( 4 ) lcdr dup lcar . + => 8 ++ + + +
Provides the same effect as 'nth' does for normal lists. It +returns the nth value in the lazy list. It causes all the values up to +'n' to be evaluated.
++ ( 1 ) 1 lfrom + => [ 1 1 succ lfrom ] + ( 2 ) 5 swap lnth . + => 6 ++ + + +
Provides the same effect as 'uncons' does for normal lists. It +returns the car and cdr of the lazy list. Note that cdr is forced +resulting in it being evaluated.
++ ( 1 ) 5 [ 6 ] lcons dup . + => [ 5 6 ] + ( 2 ) luncons .s + => { 5 6 } ++ + + +
Provides the same effect as 'map' does for normal lists. It +lazily maps over a lazy list applying the quotation to each element. +A new lazy list is returned which contains the results of the +quotation.
+When initially called lmap will only call quot on the first element +of the list. It then constructs a lazy list that performs the +next 'lmap' operation on the next element when it is evaluated. This +allows mapping over infinite lists.
++ ( 1 ) 1 lfrom + => < infinite list of incrementing numbers > + ( 2 ) [ 2 * ] lmap + => < infinite list of numbers incrementing by 2 > + ( 3 ) 5 swap ltake . + => [ 2 4 6 8 10 ] ++ + + +
Provides the same effect as 'subset' does for normal lists. It +lazily iterates over a lazy list applying the predicate quotation to each +element. If that quotation returns true, the element will be included +in the resulting lazy list. If it is false, the element will be skipped. +A new lazy list is returned which contains all elements where the +predicate returned true.
+When initially called lsubset will only call +the predicate quotation on the first element +of the list. It then constructs a lazy list that performs the +next 'lsubset' operation on the next element when it is evaluated. This +allows subsetting over infinite lists.
++ ( 1 ) 1 lfrom + => < infinite list of incrementing numbers > + ( 2 ) [ prime? ] lsubset + => < infinite list of prime numbers > + ( 3 ) 5 swap ltake . + => [ 2 3 5 7 11 ] ++ + + +
Provides the same effect as 'each' does for normal lists. It +lazily iterates over a lazy list applying the quotation to each +element. If this operation is applied to an infinite list it will +never return unless the quotation escapes out by calling a continuation.
++ ( 1 ) 1 lfrom + => < infinite list of incrementing numbers > + ( 2 ) [ 2 mod 1 = ] lsubset + => < infinite list of odd numbers > + ( 3 ) [ . ] leach + => 1 + 3 + 5 + 7 + ... for ever ... ++ + + +
Iterates over the lazy list 'n' times, appending each element to a +normal list. The normal list is returned. This provides a convenient +way of getting elements out of a lazy list.
++ ( 1 ) : ones 1 [ ones ] lcons ; + ( 2 ) 5 ones ltake + => [ 1 1 1 1 1 ] ++ + + +
Lazily appends two lists together. The actual appending is done +lazily on iteration rather than immediately so it works very fast no +matter how large the list.
++ ( 1 ) [ 1 2 3 ] list>llist [ 4 5 6 ] list>llist lappend + ( 2 ) [ . ] leach + => 1 + 2 + 3 + 4 + 5 + 6 ++ + + +
Given a lazy list of lazy lists, concatenate them together in a +lazy fashion. The actual appending is done lazily on iteration rather +than immediately so it works very fast no matter how large the lists.
++ ( 1 ) [ 1 2 3 ] list>llist + ( 2 ) [ 4 5 6 ] list>llist + ( 3 ) [ 7 8 9 ] list>llist + ( 4 ) 3list list>llist lappend* + ( 5 ) [ . ] leach + => 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 ++ + + +
Converts a normal list into a lazy list. This is done lazily so the +initial list is not iterated through immediately.
++ ( 1 ) [ 1 2 3 ] list>llist + ( 2 ) [ . ] leach + => 1 + 2 + 3 ++ + +
Copyright (c) 2004, Chris Double. All Rights Reserved.
+ diff --git a/contrib/parser-combinators/parser-combinators.factor b/contrib/parser-combinators/parser-combinators.factor new file mode 100644 index 0000000000..6e2bcb65a4 --- /dev/null +++ b/contrib/parser-combinators/parser-combinators.factor @@ -0,0 +1,433 @@ +! Copyright (C) 2004 Chris Double. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! 1. Redistributions of source code must retain the above copyright notice, +! this list of conditions and the following disclaimer. +! +! 2. Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +! FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +! OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +IN: parser-combinators +USE: lazy +USE: stack +USE: lists +USE: strings +USE: arithmetic +USE: logic +USE: kernel +USE: combinators +USE: parser + +: phead ( object -- head ) + #! Polymorphic head. Return the head item of the object. + #! For a string this is the first character. + #! For a list this is the car. + [ + [ string? ] [ 0 swap str-nth ] + [ list? ] [ car ] + ] cond ; + +: ptail ( object -- tail ) + #! Polymorphic tail. Return the tail of the object. + #! For a string this is everything but the first character. + #! For a list this is the cdr. + [ + [ string? ] [ 1 str-tail ] + [ list? ] [ cdr ] + ] cond ; + +: pfirst ( object -- first ) + #! Polymorphic first + phead ; + +: psecond ( object -- second ) + #! Polymorphic second + [ + [ string? ] [ 1 swap str-nth ] + [ list? ] [ cdr car ] + ] cond ; + +: ph:t ( object -- head tail ) + #! Return the head and tail of the object. + dup phead swap ptail ; + +: pempty? ( object -- bool ) + #! Polymorphic empty test. + [ + [ string? ] [ "" = ] + [ list? ] [ not ] + ] cond ; + +: string-take ( n string -- string ) + #! Return a string with the first 'n' characters + #! of the original string. + dup str-length pick < [ + 2drop "" + ] [ + swap str-head + ] ifte ; + +: (list-take) ( n list accum -- list ) + >r >r pred dup 0 < [ + drop r> drop r> nreverse + ] [ + r> uncons swap r> cons (list-take) + ] ifte ; + +: list-take ( n list -- list ) + #! Return a list with the first 'n' characters + #! of the original list. + [ ] (list-take) ; + +: ptake ( n object -- object ) + #! Polymorphic take. + #! Return a collection of the first 'n' + #! characters from the original collection. + [ + [ string? ] [ string-take ] + [ list? ] [ list-take ] + ] cond ; + +: string-drop ( n string -- string ) + #! Return a string with the first 'n' characters + #! of the original string removed. + dup str-length pick < [ + 2drop "" + ] [ + swap str-tail + ] ifte ; + +: list-drop ( n list -- list ) + #! Return a list with the first 'n' items + #! of the original list removed. + >r pred dup 0 < [ + drop r> + ] [ + r> cdr list-drop + ] ifte ; + +: pdrop ( n object -- object ) + #! Polymorphic drop. + #! Return a collection the same as 'object' + #! but with the first n items removed. + [ + [ string? ] [ string-drop ] + [ list? ] [ list-drop ] + ] cond ; + +: ifte-head= ( string-or-list ch [ quot1 ] [ quot2 ] -- ) + #! When the character 'ch' is equal to the head + #! of the string or list, run the quot1 otherwise run quot2. + [ swap phead = ] 2dip ifte ; + +: symbol ( ch -- parser ) + #! Return a parser that parses the given symbol. + [ ( inp ch -- result ) + 2dup [ + swap ptail cons lunit + ] [ + 2drop [ ] + ] ifte-head= + ] curry1 ; + +: token ( string -- parser ) + #! Return a parser that parses the given string. + [ ( inp string -- result ) + 2dup str-length swap ptake over = [ + swap over str-length swap pdrop cons lunit + ] [ + 2drop [ ] + ] ifte + ] curry1 ; + +: satisfy ( p -- parser ) + #! Return a parser that succeeds if the predicate 'p', + #! when passed the first character in the input, returns + #! true. + [ ( inp p -- result ) + over pempty? [ + 2drop [ ] + ] [ + over phead swap call [ + ph:t cons lunit + ] [ + drop [ ] + ] ifte + ] ifte + ] curry1 ; + +: satisfy2 ( p r -- parser ) + #! Return a parser that succeeds if the predicate 'p', + #! when passed the first character in the input, returns + #! true. On success the word 'r' is called with the + #! successfully parser character on the stack. The result + #! of this is returned as the result of the parser. + [ ( inp p r -- result ) + >r over phead swap call [ + ph:t swap r> call swons lunit + ] [ + r> 2drop [ ] + ] ifte + ] curry2 ; + +: epsilon ( -- parser ) + #! A parser that parses the empty string. + [ ( inp -- result ) + "" swap cons lunit + ] ; + +: succeed ( r -- parser ) + #! A parser that always returns 'r' and consumes no input. + [ ( inp r -- result ) + swap cons lunit + ] curry1 ; + +: fail ( -- parser ) + #! A parser that always fails + [ + drop [ ] + ] ; + +USE: prettyprint +USE: unparser + +: ensure-list ( a -- [ a ] ) + #! If 'a' is not a list, make it one. + dup list? [ unit ] unless ; + +: ++ ( a b -- [ a b ] ) + #! Join two items into a list. + >r ensure-list r> ensure-list append ; + +: <&> ( p1 p2 -- parser ) + #! Sequentially combine two parsers, returning a parser + #! that first calls p1, then p2 all remaining results from + #! p1. + [ ( inp p1 p2 -- result ) + >r call r> [ ( [ x | xs ] p2 -- result ) + >r uncons r> call swap [ ( [ x2 | xs2 ] x -- result ) + >r uncons swap r> swap ++ swons + ] curry1 lmap + ] curry1 lmap lappend* + ] curry2 ; + + +: <|> ( p1 p2 -- parser ) + #! Choice operator for parsers. Return a parser that does + #! p1 or p2 depending on which will succeed. + [ ( inp p1 p2 -- result ) + rot tuck swap call >r swap call r> lappend + ] curry2 ; + +: p-abc ( -- parser ) + #! Test Parser. Parses the string "abc" + "a" token "b" token "c" token <&> <&> ; + +: parse-skipwhite ( string -- string ) + dup phead blank? [ + ptail parse-skipwhite + ] [ + ] ifte ; + +: sp ( parser -- parser ) + #! Return a parser that first skips all whitespace before + #! parsing. + [ ( inp parser -- result ) + [ parse-skipwhite ] dip call + ] curry1 ; + +: just ( parser -- parser ) + #! Return a parser that works exactly like the input parser + #! but guarantees that the rest string is empty. + [ ( inp parser -- result ) + call [ ( [ x | xs ] -- ) + cdr str-length 0 = + ] lsubset + ] curry1 ; + +: <@ ( p f -- parser ) + #! Given a parser p and a quotation f return a parser + #! that does the same as p but in addition applies f + #! to the resulting parse tree. + [ ( inp p f -- result ) + >r call r> [ ( [ x | xs ] f -- [ fx | xs ] ) + swap uncons [ swap over [ call ] [ drop ] ifte ] dip cons + ] curry1 lmap + ] curry2 ; + +: p-1 ( -- parser ) + "1" token "123" swap call lcar ; + +: p-2 ( -- parser ) + "1" token [ str>number ] <@ "123" swap call lcar ; + +: some ( parser -- det-parser ) + #! Given a parser, return a parser that only produces the + #! resulting parse tree of the first successful complete parse. + [ ( inp parser -- result ) + just call lcar car + ] curry1 ; + +: delayed-parser ( [ parser ] -- parser ) + [ ( inp [ parser ] -- result ) + call call + ] curry1 ; + +: parens ; +: parens ( -- parser ) + #! Parse nested parentheses + "(" token [ parens ] delayed-parser <&> + ")" token <&> [ parens ] delayed-parser <&> + epsilon <|> ; + +: nesting ( -- parser ) + #! Count the maximum depth of nested parentheses. + "(" token [ nesting ] delayed-parser <&> ")" token <&> + [ nesting ] delayed-parser <&> [ .s drop "a" ] <@ epsilon <|> ; + +: <& ( parser1 parser2 -- parser ) + #! Same as <&> except only return the first item in the parse tree. + <&> [ pfirst ] <@ ; + +: &> ( parser1 parser2 -- parser ) + #! Same as <&> except only return the second item in the parse tree. + <&> [ psecond ] <@ ; + +: lst ( [ x [ xs ] ] -- [x:xs] ) + #! I need a good name for this word... + dup cdr [ uncons car cons ] when unit ; + +: <*> ( parser -- parser ) + #! Return a parser that accepts zero or more occurences of the original + #! parser. + dup [ <*> ] curry1 delayed-parser <&> [ lst ] <@ [ ] succeed <|> ; + +: <+> ( parser -- parser ) + #! Return a parser that accepts one or more occurences of the original + #! parser. + dup [ <*> ] curry1 delayed-parser <&> [ lst ] <@ ; + +: > ( parser -- parser ) + #! Return a parser where its construct is optional. It may or may not occur. + [ ] succeed <|> ; + +:A parser is a word or quotation that, when called, processes + an input string on the stack, performs some parsing operation on + it, and returns a result indicating the success of the parsing + operation.
+The result returned by a parser is known as a 'list of +successes'. It is a lazy list of standard Factor cons cells. Each cons +cell is a result of a parse. The car of the cell is the result of the +parse operation and the cdr of the cell is the remaining input left to +be parsed.
+A list is used for the result as a parse operation can potentially +return many successful results. For example, a parser that parses one +or more digits will return more than one result for the input "123". A +successful parse could be "1", "12" or "123".
+The list is lazy so if only one parse result is required the +remaining results won't actually be processed if they are not +requested. This improves efficiency.
+The car of the result pair can be any value that the parser wishes +to return. It could be the successful portion of the input string +parsed, an abstract syntax tree representing the parsed input, or even +a quotation that should get called for later processing.
+A Parser Combinator is a word that takes one or more parsers and +returns a parser that when called uses the original parsers in some +manner.
+The following are some very simple parsers that demonstrate how +general parsers work and the 'list of sucesses' that are returned as a +result.
++ (1) : char-a ( inp -- result ) + 0 over str-nth CHAR: a = [ + 1 str-tail CHAR: a swons lunit + ] [ + drop f + ] ifte ; + (2) "atest" char-a [ [ . ] leach ] when* + => [ 97 | "test" ] + (3) "test" char-a [ [ . ] leach ] when* + => ++
'char-a' is a parser that only accepts the character 'a' in the +input string. When passed an input string with a string with a leading +'a' then the 'list of successes' has 1 result value. The car of that +result value is the character 'a' successfully parsed, and the cdr is +the remaining input string. On failure of the parse an empty list is +returned.
+The parser combinator library provides a combinator, <&>, that takes +two parsers off the stack and returns a parser that calls the original +two in sequence. An example of use would be calling 'char-a' twice, +which would then result in an input string expected with two 'a' +characters leading:
++ (1) "aatest" [ char-a ] [ char-a ] <&> call + => < list of successes > + (2) [ . ] leach + => [ [ 97 97 ] | "test" ] ++
Creating parsers for specfic characters and tokens can be a chore +so there is a word that, given a string token on the stack, returns +a parser that parses that particular token:
++ (1) "begin" token + => < a parser that parses the token "begin" > + (2) dup "this should fail" swap call . + => f + (3) "begin a successfull parse" swap call + => < lazy list > + (4) [ . ] leach + => [ "begin" | " a successfull parse" ] ++
The word 'satisfy' takes a quotation from the top of the stack and +returns a parser than when called will call the quotation with the +first item in the input string on the stack. If the quotation returns +true then the parse is successful, otherwise it fails:
++ (1) : digit-parser ( -- parser ) + [ digit? ] satisfy ; + (2) "5" digit-parser call [ . ] leach + => [ 53 | "" ] + (3) "a" digit-parser call + => f ++
Note that 'digit-parser' returns a parser, it is not the parser +itself. It is really a parser generating word like 'token'. Whereas +our 'char-a' word defined originally was a parser itself.
+Now that we can parse single digits it would be nice to easily +parse a string of them. The '<*>' parser combinator word will do +this. It accepts a parser on the top of the stack and produces a +parser that parses zero or more of the constructs that the original +parser parsed. The result of the '<*>' generated parser will be a list +list of the successful results returned by the original parser.
++ (1) digit-parser <*> + => < parser > + (2) "123" swap call + => < lazy list > + (3) [ . ] leach + => [ [ [ 49 50 51 ] ] | "" ] + [ [ [ 49 50 ] ] | "3" ] + [ [ [ 49 ] ] | "23" ] + [ f | "123" ] ++
In this case there are multiple successful parses. This is because +the occurrence of zero or more digits happens more than once. There is +also the 'f' case where zero digits is parsed. If only the 'longest +match' is required then the lcar of the lazy list can be used and the +remaining parse results are never produced.
+The result of the parse above is the list of characters +parsed. Sometimes you want this to be something else, like an abstract +syntax tree, or some calculation. For the digit case we may want the +actual integer number.
+For this we can use the '<@' parser +combinator. This combinator takes a parser and a quotation on the +stack and returns a new parser. When the new parser is called it will +call the original parser to produce the results, then it will call the +quotation on each successfull result, and the result of that quotation +will be the result of the parse:
++ (1) : digit-parser2 ( -- parser ) + [ digit? ] satisfy [ CHAR: 0 - ] <@ ; + (2) "5" digit-parser2 call [ . ] leach + => [ 5 | "" ] ++
Notice that now the result is the actual integer '5' rather than +character code '53'.
++ (1) : natural-parser ( -- parser ) + digit-parser2 <*> [ car 0 [ swap 10 * + ] reduce unit ] <@ ; + (2) "123" natural-parser call + => < lazy list > + (3) [ . ] leach + => [ [ 123 ] | "" ] + [ [ 12 ] | "3" ] + [ [ 1 ] | "23" ] + [ f | "123" ] ++
The number parsed is the actual integer number due to the operation +of the '<@' word. This allows parsers to not only parse the input +string but perform operations and transformations on the syntax tree +returned.
+ + +Copyright (c) 2004, Chris Double. All Rights Reserved.
+ diff --git a/contrib/parser-combinators/style.css b/contrib/parser-combinators/style.css new file mode 100644 index 0000000000..207afdceb7 --- /dev/null +++ b/contrib/parser-combinators/style.css @@ -0,0 +1,28 @@ + body { background: white; color: black; } + p { margin-left: 10%; margin-right: 10%; + font: normal 100% Verdana, Arial, Helvetica; } + td { margin-left: 10%; margin-right: 10%; + font: normal 100% Verdana, Arial, Helvetica; } + table { margin-left: 10%; margin-right: 10%; } + ul { margin-left: 10%; margin-right: 10%; + font: normal 100% Verdana, Arial, Helvetica; } + ol { margin-left: 10%; margin-right: 10%; + font: normal 100% Verdana, Arial, Helvetica; } + h1 { text-align: center; margin-bottom: 0; margin-top: 1em; } + h2 { margin: 0 5% 0 7.5%; font-size: 120%; font-style: italic; } + h3 { border: 2px solid blue; border-width: 2px 0.5em 2px 0.5em; + padding: 0.2em 0.2em 0.2em 0.5em; background: #fafafa; + margin-left: 10%; margin-right: 10%; margin-top: 2em; + font-size: 100%; } + .note { border: 2px solid blue; border-width: 2px 2px 2px 2em; + padding: 0.5em 0.5em 0.5em 1em; background: #ffe; } + .code { border: 1px solid black; border-width: 1px; + padding: 0.5em; background: #ffe; + margin-left: 10%; margin-right: 10%; } + blockquote { margin-left: 25%; margin-right: 25%; + font-style: italic; } + .highlite { color: red; } + .footer { margin-top: 2.5em; border-top: 1px solid gray; color: + #AAA; font-size: 85%; padding-top: 0.33em; } + #copyright { text-align: center; color: #AAA; + font-size: 65%; } \ No newline at end of file