372 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			Factor
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			372 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			Factor
		
	
	
		
			Executable File
		
	
! Copyright (C) 2007 Chris Double.
 | 
						|
! See http://factorcode.org/license.txt for BSD license.
 | 
						|
USING: kernel sequences strings namespaces math assocs shuffle 
 | 
						|
       vectors arrays combinators.lib memoize math.parser match
 | 
						|
       unicode.categories sequences.lib compiler.units parser
 | 
						|
       words ;
 | 
						|
IN: peg
 | 
						|
 | 
						|
TUPLE: parse-result remaining ast ;
 | 
						|
 | 
						|
GENERIC: compile ( parser -- quot )
 | 
						|
 | 
						|
: (parse) ( state parser -- result )
 | 
						|
  compile call ;
 | 
						|
 | 
						|
 | 
						|
<PRIVATE
 | 
						|
 | 
						|
SYMBOL: packrat-cache
 | 
						|
SYMBOL: ignore 
 | 
						|
SYMBOL: not-in-cache
 | 
						|
 | 
						|
: not-in-cache? ( result -- ? )
 | 
						|
  not-in-cache = ;
 | 
						|
 | 
						|
: <parse-result> ( remaining ast -- parse-result )
 | 
						|
  parse-result construct-boa ;
 | 
						|
 | 
						|
SYMBOL: next-id 
 | 
						|
 | 
						|
: get-next-id ( -- number )
 | 
						|
  next-id get-global 0 or dup 1+ next-id set-global ;
 | 
						|
 | 
						|
TUPLE: parser id ;
 | 
						|
 | 
						|
: init-parser ( parser -- parser )
 | 
						|
  get-next-id parser construct-boa over set-delegate ;
 | 
						|
 | 
						|
: from ( slice-or-string -- index )
 | 
						|
  dup slice? [ slice-from ] [ drop 0 ] if ;
 | 
						|
 | 
						|
: get-cached ( input parser -- result )
 | 
						|
  [ from ] dip parser-id packrat-cache get at at* [ 
 | 
						|
    drop not-in-cache 
 | 
						|
  ] unless ;
 | 
						|
 | 
						|
: put-cached ( result input parser -- )
 | 
						|
  parser-id dup packrat-cache get at [ 
 | 
						|
    nip
 | 
						|
  ] [ 
 | 
						|
    H{ } clone dup >r swap packrat-cache get set-at r>
 | 
						|
  ] if* 
 | 
						|
  [ from ] dip set-at ;
 | 
						|
 | 
						|
PRIVATE>
 | 
						|
 | 
						|
: parse ( input parser -- result )
 | 
						|
  packrat-cache get [
 | 
						|
    2dup get-cached dup not-in-cache? [ 
 | 
						|
!      "cache missed: " write over parser-id number>string write " - " write nl ! pick .
 | 
						|
      drop 
 | 
						|
      #! Protect against left recursion blowing the callstack
 | 
						|
      #! by storing a failed parse in the cache.
 | 
						|
      [ f ] dipd  [ put-cached ] 2keep
 | 
						|
      [ (parse) dup ] 2keep put-cached
 | 
						|
    ] [ 
 | 
						|
!      "cache hit: " write over parser-id number>string write " - " write nl ! pick . 
 | 
						|
      2nip
 | 
						|
    ] if
 | 
						|
  ] [
 | 
						|
    (parse)
 | 
						|
  ] if ;
 | 
						|
 | 
						|
: packrat-parse ( input parser -- result )
 | 
						|
  H{ } clone packrat-cache [ parse ] with-variable ;
 | 
						|
 | 
						|
<PRIVATE
 | 
						|
 | 
						|
TUPLE: token-parser symbol ;
 | 
						|
 | 
						|
MATCH-VARS: ?token ;
 | 
						|
 | 
						|
: token-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    ?token 2dup head? [
 | 
						|
      dup >r length tail-slice r> <parse-result>
 | 
						|
    ] [
 | 
						|
      2drop f
 | 
						|
    ] if 
 | 
						|
  ] ;
 | 
						|
  
 | 
						|
M: token-parser compile ( parser -- quot )
 | 
						|
  token-parser-symbol \ ?token token-pattern match-replace ;
 | 
						|
      
 | 
						|
TUPLE: satisfy-parser quot ;
 | 
						|
 | 
						|
MATCH-VARS: ?quot ;
 | 
						|
 | 
						|
: satisfy-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    dup empty? [
 | 
						|
      drop f 
 | 
						|
    ] [
 | 
						|
      unclip-slice dup ?quot call [  
 | 
						|
        <parse-result>
 | 
						|
      ] [
 | 
						|
        2drop f
 | 
						|
      ] if
 | 
						|
    ] if 
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: satisfy-parser compile ( parser -- quot )
 | 
						|
  satisfy-parser-quot \ ?quot satisfy-pattern match-replace ;
 | 
						|
 | 
						|
TUPLE: range-parser min max ;
 | 
						|
 | 
						|
MATCH-VARS: ?min ?max ;
 | 
						|
 | 
						|
: range-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    dup empty? [
 | 
						|
      drop f
 | 
						|
    ] [
 | 
						|
      0 over nth dup 
 | 
						|
      ?min ?max between? [
 | 
						|
         [ 1 tail-slice ] dip <parse-result>
 | 
						|
      ] [
 | 
						|
        2drop f
 | 
						|
      ] if
 | 
						|
    ] if 
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: range-parser compile ( parser -- quot )
 | 
						|
  T{ range-parser _ ?min ?max } range-pattern match-replace ;
 | 
						|
 | 
						|
TUPLE: seq-parser parsers ;
 | 
						|
 | 
						|
: seq-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    dup [
 | 
						|
      dup parse-result-remaining ?quot call [
 | 
						|
        [ parse-result-remaining swap set-parse-result-remaining ] 2keep
 | 
						|
        parse-result-ast dup ignore = [ 
 | 
						|
          drop  
 | 
						|
        ] [ 
 | 
						|
          swap [ parse-result-ast push ] keep 
 | 
						|
        ] if
 | 
						|
      ] [
 | 
						|
        drop f 
 | 
						|
      ] if*
 | 
						|
    ] [
 | 
						|
      drop f
 | 
						|
    ] if  
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: seq-parser compile ( parser -- quot )
 | 
						|
  [
 | 
						|
    [ V{ } clone <parse-result> ] %
 | 
						|
    seq-parser-parsers [ compile \ ?quot seq-pattern match-replace % ] each 
 | 
						|
  ] [ ] make ;
 | 
						|
 | 
						|
TUPLE: choice-parser parsers ;
 | 
						|
 | 
						|
: choice-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    dup [
 | 
						|
          
 | 
						|
    ] [
 | 
						|
      drop dup ?quot call   
 | 
						|
    ] if
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: choice-parser compile ( parser -- quot )
 | 
						|
  [
 | 
						|
    f ,
 | 
						|
    choice-parser-parsers [ compile \ ?quot choice-pattern match-replace % ] each
 | 
						|
    \ nip ,
 | 
						|
  ] [ ] make ;
 | 
						|
 | 
						|
TUPLE: repeat0-parser p1 ;
 | 
						|
 | 
						|
: (repeat0) ( quot result -- result )
 | 
						|
  2dup parse-result-remaining swap call [
 | 
						|
    [ parse-result-remaining swap set-parse-result-remaining ] 2keep 
 | 
						|
    parse-result-ast swap [ parse-result-ast push ] keep
 | 
						|
    (repeat0) 
 | 
						|
 ] [
 | 
						|
    nip
 | 
						|
  ] if* ; inline
 | 
						|
 | 
						|
: repeat0-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    ?quot swap (repeat0) 
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: repeat0-parser compile ( parser -- quot )
 | 
						|
  [
 | 
						|
    [ V{ } clone <parse-result> ] %
 | 
						|
    repeat0-parser-p1 compile \ ?quot repeat0-pattern match-replace %        
 | 
						|
  ] [ ] make ;
 | 
						|
 | 
						|
TUPLE: repeat1-parser p1 ;
 | 
						|
 | 
						|
: repeat1-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    ?quot swap (repeat0) [
 | 
						|
      dup parse-result-ast empty? [
 | 
						|
        drop f
 | 
						|
      ] when  
 | 
						|
    ] [
 | 
						|
      f 
 | 
						|
    ] if*
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: repeat1-parser compile ( parser -- quot )
 | 
						|
  [
 | 
						|
    [ V{ } clone <parse-result> ] %
 | 
						|
    repeat1-parser-p1 compile \ ?quot repeat1-pattern match-replace % 
 | 
						|
  ] [ ] make ;
 | 
						|
 | 
						|
TUPLE: optional-parser p1 ;
 | 
						|
 | 
						|
: optional-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    dup ?quot call swap f <parse-result> or 
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: optional-parser compile ( parser -- quot )
 | 
						|
  optional-parser-p1 compile \ ?quot optional-pattern match-replace ;
 | 
						|
 | 
						|
TUPLE: ensure-parser p1 ;
 | 
						|
 | 
						|
: ensure-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    dup ?quot call [
 | 
						|
      ignore <parse-result>
 | 
						|
    ] [
 | 
						|
      drop f
 | 
						|
    ] if
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: ensure-parser compile ( parser -- quot )
 | 
						|
  ensure-parser-p1 compile \ ?quot ensure-pattern match-replace ;
 | 
						|
 | 
						|
TUPLE: ensure-not-parser p1 ;
 | 
						|
 | 
						|
: ensure-not-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    dup ?quot call [
 | 
						|
      drop f
 | 
						|
    ] [
 | 
						|
      ignore <parse-result>
 | 
						|
    ] if
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: ensure-not-parser compile ( parser -- quot )
 | 
						|
  ensure-not-parser-p1 compile \ ?quot ensure-not-pattern match-replace ;
 | 
						|
 | 
						|
TUPLE: action-parser p1 quot ;
 | 
						|
 | 
						|
MATCH-VARS: ?action ;
 | 
						|
 | 
						|
: action-pattern ( -- quot )
 | 
						|
  [
 | 
						|
    ?quot call dup [ 
 | 
						|
      dup parse-result-ast ?action call
 | 
						|
      swap [ set-parse-result-ast ] keep
 | 
						|
    ] when 
 | 
						|
  ] ;
 | 
						|
 | 
						|
M: action-parser compile ( parser -- quot )
 | 
						|
  { action-parser-p1 action-parser-quot } get-slots [ compile ] dip 
 | 
						|
  2array { ?quot ?action } action-pattern match-replace ;
 | 
						|
 | 
						|
: left-trim-slice ( string -- string )
 | 
						|
  #! Return a new string without any leading whitespace
 | 
						|
  #! from the original string.
 | 
						|
  dup empty? [
 | 
						|
    dup first blank? [ 1 tail-slice left-trim-slice ] when
 | 
						|
  ] unless ;
 | 
						|
 | 
						|
TUPLE: sp-parser p1 ;
 | 
						|
 | 
						|
M: sp-parser compile ( parser -- quot )
 | 
						|
  [
 | 
						|
    \ left-trim-slice , sp-parser-p1 compile % 
 | 
						|
  ] [ ] make ;
 | 
						|
 | 
						|
TUPLE: delay-parser quot ;
 | 
						|
 | 
						|
M: delay-parser compile ( parser -- quot )
 | 
						|
  [
 | 
						|
    delay-parser-quot % \ compile , \ call ,
 | 
						|
  ] [ ] make ;
 | 
						|
 | 
						|
PRIVATE>
 | 
						|
 | 
						|
MEMO: token ( string -- parser )
 | 
						|
  token-parser construct-boa init-parser ;      
 | 
						|
 | 
						|
: satisfy ( quot -- parser )
 | 
						|
  satisfy-parser construct-boa init-parser ;
 | 
						|
 | 
						|
MEMO: range ( min max -- parser )
 | 
						|
  range-parser construct-boa init-parser ;
 | 
						|
 | 
						|
: seq ( seq -- parser )
 | 
						|
  seq-parser construct-boa init-parser ;
 | 
						|
 | 
						|
: 2seq ( parser1 parser2 -- parser )
 | 
						|
  2array seq ;
 | 
						|
 | 
						|
: 3seq ( parser1 parser2 parser3 -- parser )
 | 
						|
  3array seq ;
 | 
						|
 | 
						|
: 4seq ( parser1 parser2 parser3 parser4 -- parser )
 | 
						|
  4array seq ;
 | 
						|
 | 
						|
: seq* ( quot -- paser )
 | 
						|
  { } make seq ; inline 
 | 
						|
 | 
						|
: choice ( seq -- parser )
 | 
						|
  choice-parser construct-boa init-parser ;
 | 
						|
 | 
						|
: 2choice ( parser1 parser2 -- parser )
 | 
						|
  2array choice ;
 | 
						|
 | 
						|
: 3choice ( parser1 parser2 parser3 -- parser )
 | 
						|
  3array choice ;
 | 
						|
 | 
						|
: 4choice ( parser1 parser2 parser3 parser4 -- parser )
 | 
						|
  4array choice ;
 | 
						|
 | 
						|
: choice* ( quot -- paser )
 | 
						|
  { } make choice ; inline 
 | 
						|
 | 
						|
MEMO: repeat0 ( parser -- parser )
 | 
						|
  repeat0-parser construct-boa init-parser ;
 | 
						|
 | 
						|
MEMO: repeat1 ( parser -- parser )
 | 
						|
  repeat1-parser construct-boa init-parser ;
 | 
						|
 | 
						|
MEMO: optional ( parser -- parser )
 | 
						|
  optional-parser construct-boa init-parser ;
 | 
						|
 | 
						|
MEMO: ensure ( parser -- parser )
 | 
						|
  ensure-parser construct-boa init-parser ;
 | 
						|
 | 
						|
MEMO: ensure-not ( parser -- parser )
 | 
						|
  ensure-not-parser construct-boa init-parser ;
 | 
						|
 | 
						|
: action ( parser quot -- parser )
 | 
						|
  action-parser construct-boa init-parser ;
 | 
						|
 | 
						|
MEMO: sp ( parser -- parser )
 | 
						|
  sp-parser construct-boa init-parser ;
 | 
						|
 | 
						|
MEMO: hide ( parser -- parser )
 | 
						|
  [ drop ignore ] action ;
 | 
						|
 | 
						|
MEMO: delay ( quot -- parser )
 | 
						|
  delay-parser construct-boa init-parser ;
 | 
						|
 | 
						|
: PEG:
 | 
						|
  (:) [
 | 
						|
    [
 | 
						|
        call compile
 | 
						|
        [ dup [ parse-result-ast ] [ "Parse failed" throw ] if ]
 | 
						|
        append define
 | 
						|
    ] with-compilation-unit
 | 
						|
  ] 2curry over push-all ; parsing
 |