First cut at peg style packrat parser
							parent
							
								
									0714bb7a86
								
							
						
					
					
						commit
						aacf88a72d
					
				| 
						 | 
				
			
			@ -0,0 +1,11 @@
 | 
			
		|||
! Copyright (C) 2007 Chris Double.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: help.markup help.syntax peg ;
 | 
			
		||||
 | 
			
		||||
HELP: token
 | 
			
		||||
{ $values 
 | 
			
		||||
  { "string" "a string" } }
 | 
			
		||||
{ $description 
 | 
			
		||||
    "A parser generator that returns a parser that matches the given string." }
 | 
			
		||||
{ $example "\"begin foo end\" \"begin\" token parse" "result-here" } ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,55 @@
 | 
			
		|||
! Copyright (C) 2007 Chris Double.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
!
 | 
			
		||||
USING: kernel tools.test strings namespaces arrays peg ;
 | 
			
		||||
IN: temporary
 | 
			
		||||
 | 
			
		||||
{ 0 1 2 } [
 | 
			
		||||
  0 next-id set-global get-next-id get-next-id get-next-id 
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ "0123456789" } [
 | 
			
		||||
  "0123456789" 0 <parse-state> 0 state-tail parse-state-input >string
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ "56789" } [
 | 
			
		||||
  "0123456789" 5 <parse-state> 0 state-tail parse-state-input >string
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ "789" } [
 | 
			
		||||
  "0123456789" 5 <parse-state> 2 state-tail parse-state-input >string
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ f } [
 | 
			
		||||
  "endbegin" 0 <parse-state> "begin" token parse
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ "begin" "begin" "end" } [
 | 
			
		||||
  "beginend" 0 <parse-state> "begin" token parse 
 | 
			
		||||
  { parse-result-matched parse-result-ast parse-result-remaining } get-slots
 | 
			
		||||
  parse-state-input >string
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ f } [
 | 
			
		||||
  "" 0 <parse-state> CHAR: a CHAR: z range parse
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ f } [
 | 
			
		||||
  "1bcd" 0 <parse-state> CHAR: a CHAR: z range parse
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ CHAR: a } [
 | 
			
		||||
  "abcd" 0 <parse-state> CHAR: a CHAR: z range parse parse-result-ast
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ CHAR: z } [
 | 
			
		||||
  "zbcd" 0 <parse-state> CHAR: a CHAR: z range parse parse-result-ast
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ f } [
 | 
			
		||||
  "bad" 0 <parse-state> "a" token "b" token 2array seq parse
 | 
			
		||||
] unit-test
 | 
			
		||||
 | 
			
		||||
{ "go" } [
 | 
			
		||||
  "good" 0 <parse-state> "g" token "o" token 2array seq parse parse-result-matched
 | 
			
		||||
] unit-test
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,92 @@
 | 
			
		|||
! Copyright (C) 2007 Chris Double.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: kernel sequences strings namespaces math assocs combinators.lib ;
 | 
			
		||||
IN: peg
 | 
			
		||||
 | 
			
		||||
TUPLE: parse-state input cache ;
 | 
			
		||||
 | 
			
		||||
: <parse-state> ( input index -- state )
 | 
			
		||||
  tail-slice { set-parse-state-input } parse-state construct ;
 | 
			
		||||
 | 
			
		||||
: get-cached ( pid state -- result )
 | 
			
		||||
  tuck parse-state-cache at [
 | 
			
		||||
    swap parse-state-input slice-from swap nth     
 | 
			
		||||
  ] [ 
 | 
			
		||||
    drop f
 | 
			
		||||
  ] if* ;
 | 
			
		||||
 | 
			
		||||
: state-tail ( state n -- state )
 | 
			
		||||
  dupd [ parse-state-cache ] dipd
 | 
			
		||||
  [ parse-state-input ] dip tail-slice 
 | 
			
		||||
  { set-parse-state-cache set-parse-state-input } parse-state construct ; 
 | 
			
		||||
 | 
			
		||||
TUPLE: parse-result remaining matched ast ;
 | 
			
		||||
 | 
			
		||||
: <parse-result> ( remaining matched ast -- parse-result )
 | 
			
		||||
  parse-result construct-boa ;
 | 
			
		||||
 | 
			
		||||
SYMBOL: next-id 
 | 
			
		||||
 | 
			
		||||
: get-next-id ( -- number )
 | 
			
		||||
  next-id get-global 0 or dup 1+ next-id set-global ;
 | 
			
		||||
 | 
			
		||||
TUPLE: parser id ;
 | 
			
		||||
 | 
			
		||||
: init-parser ( parser -- parser )
 | 
			
		||||
  get-next-id parser construct-boa over set-delegate ;
 | 
			
		||||
 | 
			
		||||
GENERIC: parse ( state parser -- result )
 | 
			
		||||
 | 
			
		||||
TUPLE: token-parser symbol ;
 | 
			
		||||
 | 
			
		||||
M: token-parser parse ( state parser -- result )
 | 
			
		||||
  token-parser-symbol 2dup >r parse-state-input r> head? [
 | 
			
		||||
    dup >r length state-tail r>  dup <parse-result>
 | 
			
		||||
  ] [
 | 
			
		||||
    2drop f
 | 
			
		||||
  ] if ;
 | 
			
		||||
 | 
			
		||||
: token ( string -- parser )
 | 
			
		||||
  token-parser construct-boa init-parser ;      
 | 
			
		||||
 | 
			
		||||
TUPLE: range-parser min max ;
 | 
			
		||||
 | 
			
		||||
M: range-parser parse ( state parser -- result )
 | 
			
		||||
  over parse-state-input empty? [
 | 
			
		||||
    2drop f
 | 
			
		||||
  ] [
 | 
			
		||||
    0 pick parse-state-input nth dup rot 
 | 
			
		||||
    { range-parser-min range-parser-max } get-slots between? [
 | 
			
		||||
      [ 1 state-tail ] dip dup <parse-result>
 | 
			
		||||
    ] [
 | 
			
		||||
      2drop f
 | 
			
		||||
    ] if
 | 
			
		||||
  ] if ;
 | 
			
		||||
 | 
			
		||||
: range ( min max -- parser )
 | 
			
		||||
  range-parser construct-boa init-parser ;
 | 
			
		||||
 | 
			
		||||
TUPLE: seq-parser parsers ;
 | 
			
		||||
 | 
			
		||||
: do-seq-parser ( result parser -- result )
 | 
			
		||||
  [ dup parse-result-remaining ] dip parse [
 | 
			
		||||
    [ parse-result-remaining swap set-parse-result-remaining ] 2keep  
 | 
			
		||||
    [ parse-result-ast swap parse-result-ast push ] 2keep
 | 
			
		||||
    parse-result-matched swap [ parse-result-matched swap append ] keep [ set-parse-result-matched ] keep
 | 
			
		||||
  
 | 
			
		||||
  ] [
 | 
			
		||||
    drop f
 | 
			
		||||
  ] if* ;
 | 
			
		||||
 | 
			
		||||
: (seq-parser) ( result parsers -- result )
 | 
			
		||||
  dup empty? not pick and [
 | 
			
		||||
    unclip swap [ do-seq-parser ] dip (seq-parser)
 | 
			
		||||
  ] [
 | 
			
		||||
    drop   
 | 
			
		||||
  ] if ;
 | 
			
		||||
 | 
			
		||||
M: seq-parser parse ( state parser -- result )
 | 
			
		||||
  seq-parser-parsers [ "" V{ } clone <parse-result> ] dip  (seq-parser) ;
 | 
			
		||||
 | 
			
		||||
: seq ( seq -- parser )
 | 
			
		||||
  seq-parser construct-boa init-parser ;
 | 
			
		||||
		Loading…
	
		Reference in New Issue