492 lines
15 KiB
Factor
492 lines
15 KiB
Factor
! Copyright (C) 2004 Chris Double.
|
|
!
|
|
! Redistribution and use in source and binary forms, with or without
|
|
! modification, are permitted provided that the following conditions are met:
|
|
!
|
|
! 1. Redistributions of source code must retain the above copyright notice,
|
|
! this list of conditions and the following disclaimer.
|
|
!
|
|
! 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
! this list of conditions and the following disclaimer in the documentation
|
|
! and/or other materials provided with the distribution.
|
|
!
|
|
! THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
|
! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
! FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
! DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
|
! OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
IN: parser-combinators
|
|
USE: lazy
|
|
USE: kernel
|
|
USE: sequences
|
|
USE: strings
|
|
USE: lists
|
|
USE: math
|
|
|
|
GENERIC: phead
|
|
|
|
M: string phead ( object -- head )
|
|
#! Polymorphic head. Return the head item of the object.
|
|
#! For a string this is the first character.
|
|
0 swap nth ;
|
|
|
|
M: list phead ( object -- head )
|
|
#! Polymorphic head. Return the head item of the object.
|
|
#! For a list this is the car.
|
|
car ;
|
|
|
|
M: cons phead ( object -- head )
|
|
#! Polymorphic head. Return the head item of the object.
|
|
#! For a list this is the car.
|
|
car ;
|
|
|
|
GENERIC: ptail
|
|
|
|
M: string ptail ( object -- tail )
|
|
#! Polymorphic tail. Return the tail of the object.
|
|
#! For a string this is everything but the first character.
|
|
1 swap tail ;
|
|
|
|
M: list ptail ( object -- tail )
|
|
#! Polymorphic tail. Return the tail of the object.
|
|
#! For a list this is the cdr.
|
|
cdr ;
|
|
|
|
M: cons ptail ( object -- tail )
|
|
#! Polymorphic tail. Return the tail of the object.
|
|
#! For a list this is the cdr.
|
|
cdr ;
|
|
|
|
: pfirst ( object -- first )
|
|
#! Polymorphic first. The first item in a collection.
|
|
phead ;
|
|
|
|
GENERIC: psecond
|
|
|
|
M: string psecond ( object -- second )
|
|
#! Polymorphic second
|
|
1 swap nth ;
|
|
|
|
M: list psecond ( object -- second )
|
|
#! Polymorphic second
|
|
cdr car ;
|
|
|
|
: ph:t ( object -- head tail )
|
|
#! Return the head and tail of the object.
|
|
dup phead swap ptail ;
|
|
|
|
GENERIC: pempty?
|
|
|
|
M: string pempty? ( object -- bool )
|
|
#! Return true if the collection is empty.
|
|
length 0 = ;
|
|
|
|
M: list pempty? ( object -- bool )
|
|
#! Return true if the collection is empty.
|
|
not ;
|
|
|
|
: string-take ( n string -- string )
|
|
#! Return a string with the first 'n' characters
|
|
#! of the original string.
|
|
dup length pick < [
|
|
2drop ""
|
|
] [
|
|
head
|
|
] ifte ;
|
|
|
|
: (list-take) ( n list accum -- list )
|
|
>r >r 1 - dup 0 < [
|
|
drop r> drop r> reverse
|
|
] [
|
|
r> uncons swap r> cons (list-take)
|
|
] ifte ;
|
|
|
|
: list-take ( n list -- list )
|
|
#! Return a list with the first 'n' characters
|
|
#! of the original list.
|
|
[ ] (list-take) ;
|
|
|
|
GENERIC: ptake
|
|
|
|
M: string ptake ( n object -- object )
|
|
#! Polymorphic take.
|
|
#! Return a collection of the first 'n'
|
|
#! characters from the original collection.
|
|
string-take ;
|
|
|
|
M: list ptake ( n object -- object )
|
|
#! Polymorphic take.
|
|
#! Return a collection of the first 'n'
|
|
#! characters from the original collection.
|
|
list-take ;
|
|
|
|
: string-drop ( n string -- string )
|
|
#! Return a string with the first 'n' characters
|
|
#! of the original string removed.
|
|
dup length pick < [
|
|
2drop ""
|
|
] [
|
|
tail
|
|
] ifte ;
|
|
|
|
: list-drop ( n list -- list )
|
|
#! Return a list with the first 'n' items
|
|
#! of the original list removed.
|
|
>r 1 - dup 0 < [
|
|
drop r>
|
|
] [
|
|
r> cdr list-drop
|
|
] ifte ;
|
|
|
|
GENERIC: pdrop
|
|
|
|
M: string pdrop ( n object -- object )
|
|
#! Polymorphic drop.
|
|
#! Return a collection the same as 'object'
|
|
#! but with the first n items removed.
|
|
string-drop ;
|
|
|
|
M: list pdrop ( n object -- object )
|
|
#! Polymorphic drop.
|
|
#! Return a collection the same as 'object'
|
|
#! but with the first n items removed.
|
|
list-drop ;
|
|
|
|
: token-parser ( inp sequence -- llist )
|
|
#! A parser that parses a specific sequence of
|
|
#! characters.
|
|
2dup length swap ptake over = [
|
|
swap over length swap pdrop swons unit delay lunit
|
|
] [
|
|
2drop lnil
|
|
] ifte ;
|
|
|
|
: token ( string -- parser )
|
|
#! Return a token parser that parses the given string.
|
|
[ token-parser ] cons ;
|
|
|
|
: satisfy-parser ( inp pred -- llist )
|
|
#! A parser that succeeds if the predicate,
|
|
#! when passed the first character in the input, returns
|
|
#! true.
|
|
over pempty? [
|
|
2drop lnil
|
|
] [
|
|
over phead swap call [
|
|
ph:t swons unit delay lunit
|
|
] [
|
|
drop lnil
|
|
] ifte
|
|
] ifte ;
|
|
|
|
: satisfy ( p -- parser )
|
|
#! Return a parser that succeeds if the predicate 'p',
|
|
#! when passed the first character in the input, returns
|
|
#! true.
|
|
[ satisfy-parser ] cons ;
|
|
|
|
: satisfy2-parser ( inp pred quot -- llist )
|
|
#! A parser that succeeds if the predicate,
|
|
#! when passed the first character in the input, returns
|
|
#! true. On success the quotation is called with the
|
|
#! successfully parsed character on the stack. The result
|
|
#! of that call is returned as the result portion of the
|
|
#! successfull parse lazy list.
|
|
-rot over phead swap call [ ( quot inp -- )
|
|
ph:t >r swap call r> swons unit delay lunit
|
|
] [
|
|
2drop lnil
|
|
] ifte ;
|
|
|
|
: satisfy2 ( pred quot -- parser )
|
|
#! Return a satisfy2-parser.
|
|
[ satisfy2-parser ] cons cons ;
|
|
|
|
: epsilon-parser ( input -- llist )
|
|
#! A parser that parses the empty string. It
|
|
#! does not consume any input and always returns
|
|
#! an empty list as the parse tree with the
|
|
#! unmodified input.
|
|
"" cons unit delay lunit ;
|
|
|
|
: epsilon ( -- parser )
|
|
#! Return an epsilon parser
|
|
[ epsilon-parser ] ;
|
|
|
|
: succeed-parser ( input result -- llist )
|
|
#! A parser that always returns 'result' as a
|
|
#! successful parse with no input consumed.
|
|
cons unit delay lunit ;
|
|
|
|
: succeed ( result -- parser )
|
|
#! Return a succeed parser.
|
|
[ succeed-parser ] cons ;
|
|
|
|
: fail-parser ( input -- llist )
|
|
#! A parser that always fails and returns
|
|
#! an empty list of successes.
|
|
drop lnil ;
|
|
|
|
: fail ( -- parser )
|
|
#! Return a fail-parser.
|
|
[ fail-parser ] ;
|
|
|
|
: <&>-do-parser3 ( [[ x1 xs2 ]] x -- result )
|
|
#! Called by <&>-do-parser2 on each result of the
|
|
#! parse from parser2.
|
|
>r uncons r> ( x1 xs2 x )
|
|
swap cons cons ;
|
|
|
|
: <&>-do-parser2 ( [[ x xs ]] parser2 -- result )
|
|
#! Called by the <&>-parser on each result of the
|
|
#! successfull parse of parser1. It's input is the
|
|
#! cons containing the data parsed and the remaining
|
|
#! input. This word will parser2 on the remaining input
|
|
#! returning a new cons cell containing the combined
|
|
#! parse result.
|
|
>r unswons r> ( x xs parser2 )
|
|
call swap ( llist x )
|
|
[ <&>-do-parser3 ] cons lmap ;
|
|
|
|
: <&>-parser ( input parser1 parser2 -- llist )
|
|
#! Parse 'input' by sequentially combining the
|
|
#! two parsers. First parser1 is applied to the
|
|
#! input then parser2 is applied to the rest of
|
|
#! the input strings from the first parser.
|
|
>r call r> ( [[ x xs ]] p2 -- result )
|
|
[ <&>-do-parser2 ] cons lmap lappend* ;
|
|
|
|
: <&> ( parser1 parser2 -- parser )
|
|
#! Sequentially combine two parsers, returning a parser
|
|
#! that first calls p1, then p2 all remaining results from
|
|
#! p1.
|
|
[ <&>-parser ] cons cons ;
|
|
|
|
: <|>-parser ( input parser1 parser2 -- result )
|
|
#! Return the combined list resulting from the parses
|
|
#! of parser1 and parser2 being applied to the same
|
|
#! input. This implements the choice parsing operator.
|
|
>r dupd call swap r> call lappend ;
|
|
|
|
: <|> ( p1 p2 -- parser )
|
|
#! Choice operator for parsers. Return a parser that does
|
|
#! p1 or p2 depending on which will succeed.
|
|
[ <|>-parser ] cons cons ;
|
|
|
|
: string-ltrim ( string -- string )
|
|
#! Return a new string without any leading whitespace
|
|
#! from the original string.
|
|
dup phead blank? [ ptail string-ltrim ] when ;
|
|
|
|
: sp-parser ( input parser -- result )
|
|
#! Skip all leading whitespace from the input then call
|
|
#! the parser on the remaining input.
|
|
>r string-ltrim r> call ;
|
|
|
|
: sp ( parser -- parser )
|
|
#! Return a parser that first skips all whitespace before
|
|
#! calling the original parser.
|
|
[ sp-parser ] cons ;
|
|
|
|
: just-parser ( input parser -- result )
|
|
#! Calls the given parser on the input removes
|
|
#! from the results anything where the remaining
|
|
#! input to be parsed is not empty. So ensures a
|
|
#! fully parsed input string.
|
|
call [ car pempty? ] lsubset ;
|
|
|
|
: just ( parser -- parser )
|
|
#! Return an instance of the just-parser.
|
|
[ just-parser ] cons ;
|
|
|
|
: (<@-parser-replace) ( [[ inp result ]] quot -- [[ inp new-result ]] )
|
|
#! Perform the result replacement step of <@-parser.
|
|
#! Given a successfull parse result, calls the quotation
|
|
#! with the result portion on the stack. The result of
|
|
#! that call is then used as the new result.
|
|
swap uncons rot call cons ;
|
|
|
|
: <@-parser ( input parser quot -- result )
|
|
#! Calls the parser on the input. For each successfull
|
|
#! parse the quot is call with the parse result on the stack.
|
|
#! The result of that quotation then becomes the new parse result.
|
|
#! This allows modification of parse tree results (like
|
|
#! converting strings to integers, etc).
|
|
-rot call dup lnil? [ ( quot lnil -- )
|
|
nip
|
|
] [ ( quot result -- )
|
|
[ (<@-parser-replace) ] rot swons lmap
|
|
] ifte ;
|
|
|
|
: <@ ( parser quot -- parser )
|
|
#! Return an <@-parser.
|
|
[ <@-parser ] cons cons ;
|
|
|
|
: some-parser ( input parser -- result )
|
|
#! Calls the parser on the input, guarantees
|
|
#! the parse is complete (the remaining input is empty),
|
|
#! picks the first solution and only returns the parse
|
|
#! tree since the remaining input is empty.
|
|
just call lcar cdr ;
|
|
|
|
: some ( parser -- deterministic-parser )
|
|
#! Creates a 'some-parser'.
|
|
[ some-parser ] cons ;
|
|
|
|
: <&-parser ( input parser1 parser2 -- result )
|
|
#! Same as <&> except discard the results of the second parser.
|
|
<&> [ phead ] <@ call ;
|
|
|
|
: <& ( parser1 parser2 -- parser )
|
|
#! Same as <&> except discard the results of the second parser.
|
|
[ <&-parser ] cons cons ;
|
|
|
|
: &>-parser ( input parser1 parser2 -- result )
|
|
#! Same as <&> except discard the results of the first parser.
|
|
<&> [ ptail ] <@ call ;
|
|
|
|
: &> ( parser1 parser2 -- parser )
|
|
#! Same as <&> except discard the results of the first parser.
|
|
[ &>-parser ] cons cons ;
|
|
|
|
: (a,(b,c))>((a,b,c)) ( list -- list )
|
|
#! Convert a list where the car is a single value
|
|
#! and the cdr is a list to a list containing a flattened
|
|
#! list.
|
|
uncons car cons unit ;
|
|
|
|
: <:&>-parser ( input parser1 parser2 -- result )
|
|
#! Same as <&> except postprocess the result with
|
|
#! (a,(b,c))>((a,b,c)).
|
|
<&> [ (a,(b,c))>((a,b,c)) ] <@ call ;
|
|
|
|
: <:&> ( parser1 parser2 -- parser )
|
|
#! Same as <&> except postprocess the result with
|
|
#! (a,(b,c))>((a,b,c)).
|
|
[ <:&>-parser ] cons cons ;
|
|
|
|
DEFER: <*>
|
|
|
|
: (<*>) ( parser -- parser )
|
|
#! Non-delayed implementation of <*>
|
|
dup <*> <:&> [ ] succeed <|> ;
|
|
|
|
: <*> ( parser -- parser )
|
|
#! Return a parser that accepts zero or more occurences of the original
|
|
#! parser.
|
|
[ (<*>) call ] cons ;
|
|
|
|
: (<+>) ( parser -- parser )
|
|
#! Non-delayed implementation of <+>
|
|
dup <*> <:&> ;
|
|
|
|
: <+> ( parser -- parser )
|
|
#! Return a parser that accepts one or more occurences of the original
|
|
#! parser.
|
|
[ (<+>) call ] cons ;
|
|
|
|
: (<?>) ( parser -- parser )
|
|
#! Non-delayed implementation of <?>
|
|
[ unit ] <@ [ ] succeed <|> ;
|
|
|
|
: <?> ( parser -- parser )
|
|
#! Return a parser that optionally uses the parser
|
|
#! if that parser would be successfull.
|
|
[ (<?>) call ] cons ;
|
|
|
|
USE: prettyprint
|
|
USE: parser
|
|
USE: unparser
|
|
USE: stdio
|
|
|
|
! Testing <&>
|
|
: test1 "abcd" "a" token "b" token <&> call [ . ] leach ;
|
|
: test1a "abcd" "a" token "b" token <&> "c" token <&> call [ . ] leach ;
|
|
: test1b "abcd" "a" token "b" token "c" token <&> <&> call [ . ] leach ;
|
|
: test2 "decd" "a" token "b" token <&> call [ . ] leach ;
|
|
: test3 "dbcd" "a" token "b" token <&> call [ . ] leach ;
|
|
: test4 "adcd" "a" token "b" token <&> call [ . ] leach ;
|
|
|
|
! Testing <|>
|
|
: test5 "abcd" "a" token "b" token <|> call [ . ] leach ;
|
|
: test6 "bbcd" "a" token "b" token <|> call [ . ] leach ;
|
|
: test7 "cbcd" "a" token "b" token <|> call [ . ] leach ;
|
|
|
|
! Testing sp
|
|
: test8 " abcd" "a" token call [ . ] leach ;
|
|
: test9 " abcd" "a" token sp call [ . ] leach ;
|
|
|
|
! Testing just
|
|
: test10 "abcd" "abcd" token "abc" token <|> call [ . ] leach ;
|
|
: test11 "abcd" "abcd" token "abc" token <|> just call [ . ] leach ;
|
|
|
|
! Testing <@
|
|
: test12 "01234" [ digit? ] satisfy call [ . ] leach ;
|
|
: test13 "01234" [ digit? ] satisfy [ digit> ] <@ call [ . ] leach ;
|
|
|
|
! Testing some
|
|
: test14 "begin1" "begin" token call [ . ] leach ;
|
|
: test15 "This should fail with an error" print
|
|
"begin1" "begin" token some call . ;
|
|
: test16 "begin" "begin" token some call . ;
|
|
|
|
! parens test function
|
|
: parens ( -- parser )
|
|
#! Return a parser that parses nested parentheses.
|
|
[ "(" token parens <&> ")" token <&> parens <&> epsilon <|> call ] ;
|
|
|
|
: test17 "" parens call [ . ] leach ;
|
|
: test18 "()" parens call [ . ] leach ;
|
|
: test19 "((()))" parens call [ . ] leach ;
|
|
|
|
! <& parser and &> parser
|
|
: test20 "abcd" "a" token "b" token <&> call [ . ] leach ;
|
|
: test21 "abcd" "a" token "b" token <& call [ . ] leach ;
|
|
: test22 "abcd" "a" token "b" token &> call [ . ] leach ;
|
|
|
|
! nesting example
|
|
: parens-open "(" token ;
|
|
: parens-close ")" token ;
|
|
: nesting
|
|
[ parens-open
|
|
nesting &>
|
|
parens-close <&
|
|
nesting <&>
|
|
[ unswons 1 + max ] <@
|
|
0 succeed <|>
|
|
call ] ;
|
|
|
|
: test23 "" nesting just call [ . ] leach ;
|
|
: test24 "()" nesting just call [ . ] leach ;
|
|
: test25 "(())" nesting just call [ . ] leach ;
|
|
: test26 "()(()(()()))()" nesting just call [ . ] leach ;
|
|
|
|
! Testing <*> and <:&>
|
|
: test27 "1234" "1" token <*> call [ . ] leach ;
|
|
: test28 "1111234" "1" token <*> call [ . ] leach ;
|
|
: test28a "1111234" "1" token <*> [ car concat unit ] <@ call [ . ] leach ;
|
|
: test29 "234" "1" token <*> call [ . ] leach ;
|
|
: pdigit [ digit? ] satisfy [ digit> ] <@ ;
|
|
: pnatural pdigit <*> ;
|
|
: pnatural2 pnatural [ car [ >digit ] map >string dup pempty? [ drop 0 ] [ str>number ] ifte unit ] <@ ;
|
|
: test30 "12345" pnatural2 call [ . ] leach ;
|
|
|
|
! Testing <+>
|
|
: test31 "1234" "1" token <+> call [ . ] leach ;
|
|
: test32 "1111234" "1" token <+> call [ . ] leach ;
|
|
: test33 "234" "1" token <+> call [ . ] leach ;
|
|
|
|
! Testing <?>
|
|
: test34 "ab" "a" token pdigit <?> <&> "b" token <&> call [ . ] leach ;
|
|
: test35 "ac" "a" token pdigit <?> <&> "b" token <&> call [ . ] leach ;
|
|
: test36 "a5b" "a" token pdigit <?> <&> "b" token <&> call [ . ] leach ;
|
|
: pinteger "-" token <?> pnatural2 <&> [ uncons swap [ car -1 * ] when ] <@ ;
|
|
: test37 "123" pinteger call [ . ] leach ;
|
|
: test38 "-123" pinteger call [ . ] leach ;
|
|
|