factor/extra/peg/peg.factor

! Copyright (C) 2007 Chris Double.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences strings namespaces math assocs shuffle
       vectors arrays combinators.lib math.parser match
       unicode.categories sequences.lib compiler.units parser
       words quotations effects memoize accessors
       combinators.cleave locals ;
IN: peg

USE: prettyprint

TUPLE: parse-result remaining ast ;

SYMBOL: ignore

: <parse-result> ( remaining ast -- parse-result )
  parse-result construct-boa ;

SYMBOL: packrat
SYMBOL: pos
SYMBOL: input
SYMBOL: fail

TUPLE: memo-entry ans pos ;
C: <memo-entry> memo-entry

: rule-parser ( rule -- parser )
  #! A rule is the parser compiled down to a word. It has
  #! a "peg" property containing the original parser.
  "peg" word-prop ;

: input-slice ( -- slice )
  #! Return a slice of the input from the current parse position
  input get pos get tail-slice ;

: input-from ( input -- n )
  #! Return the index from the original string that the
  #! input slice is based on.
  dup slice? [ slice-from ] [ drop 0 ] if ;

: input-cache ( parser -- cache )
  #! From the packrat cache, obtain the cache for the parser
  #! that maps the position to the parser result.
  id>> packrat get [ drop H{ } clone ] cache ;

: eval-rule ( rule -- ast )
  #! Evaluate a rule, return an ast resulting from it.
  #! Return fail if the rule failed. The rule has
  #! stack effect ( input -- parse-result )
  pos get swap
  execute [
    nip
    [ ast>> ] [ remaining>> ] bi
    input-from pos set
  ] [
    pos set
    fail
  ] if* ;

: memo ( pos rule -- memo-entry )
  #! Return the result from the memo cache.
  rule-parser input-cache at ;

: set-memo ( memo-entry pos rule -- )
  #! Store an entry in the cache
  rule-parser input-cache set-at ;

:: apply-non-memo-rule ( r p -- ast )
  [let* |
          m   [ fail p <memo-entry> dup p r set-memo ]
          ans [ r eval-rule ]
        |
    ans m (>>ans)
    pos get m (>>pos)
    ans
  ] ;

: apply-memo-rule ( m -- ast )
  [ ans>> ] [ pos>> ] bi pos set ;

:: apply-rule ( r p -- ast )
  [let* |
          m [ p r memo ]
        |
    m [
      m apply-memo-rule
    ] [
      r p apply-non-memo-rule
    ] if
  ] ;

: with-packrat ( input quot -- result )
  #! Run the quotation with a packrat cache active.
  swap [
    input set
    0 pos set
    H{ } clone packrat set
  ] H{ } make-assoc swap bind ;


: compiled-parsers ( -- cache )
  \ compiled-parsers get-global [ H{ } clone dup \ compiled-parsers set-global ] unless* ;

: reset-compiled-parsers ( -- )
  H{ } clone \ compiled-parsers set-global ;

reset-compiled-parsers

GENERIC: (compile) ( parser -- quot )


:: parser-body ( parser -- quot )
  #! Return the body of the word that is the compiled version
  #! of the parser.
  [let* | rule [ parser (compile) define-temp dup parser "peg" set-word-prop ]
        |
    [
      rule pos get apply-rule dup fail = [
        drop f
      ] [
        input-slice swap <parse-result>
      ] if
    ]
  ] ;

: compiled-parser ( parser -- word )
  #! Look to see if the given parser has been compiled.
  #! If not, compile it to a temporary word, cache it,
  #! and return it. Otherwise return the existing one.
  #! Circular parsers are supported by getting the word
  #! name and storing it in the cache, before compiling,
  #! so it is picked up when re-entered.
  dup id>> compiled-parsers [
    drop dup gensym swap 2dup id>> compiled-parsers set-at
    2dup parser-body define
    dupd "peg" set-word-prop
  ] cache nip ;

: compile ( parser -- word )
  [ compiled-parser ] with-compilation-unit ;

: parse ( state parser -- result )
  dup word? [ compile ] unless
  [ execute ] curry with-packrat ;

<PRIVATE

SYMBOL: id

: next-id ( -- n )
  #! Return the next unique id for a parser
  id get-global [
    dup 1+ id set-global
  ] [
    1 id set-global 0
  ] if* ;

TUPLE: parser id ;
M: parser equal? [ id>> ] 2apply = ;
C: <parser> parser

: delegates ( -- cache )
  \ delegates get-global [ H{ } clone dup \ delegates set-global ] unless* ;

: reset-delegates ( -- )
  H{ } clone \ delegates set-global ;

reset-delegates

: init-parser ( parser -- parser )
  #! Set the delegate for the parser. Equivalent parsers
  #! get a delegate with the same id.
  dup clone delegates [
    drop next-id <parser>
  ] cache over set-delegate ;

TUPLE: token-parser symbol ;

MATCH-VARS: ?token ;

: parse-token ( input string -- result )
  #! Parse the string, returning a parse result
  2dup head? [
    dup >r length tail-slice r> <parse-result>
  ] [
    2drop f
  ] if ;

M: token-parser (compile) ( parser -- quot )
  [ \ input-slice , symbol>> , \ parse-token , ] [ ] make ;

TUPLE: satisfy-parser quot ;

MATCH-VARS: ?quot ;

: satisfy-pattern ( -- quot )
  [
    input-slice dup empty? [
      drop f
    ] [
      unclip-slice dup ?quot call [
        <parse-result>
      ] [
        2drop f
      ] if
    ] if
  ] ;

M: satisfy-parser (compile) ( parser -- quot )
  quot>> \ ?quot satisfy-pattern match-replace ;

TUPLE: range-parser min max ;

MATCH-VARS: ?min ?max ;

: range-pattern ( -- quot )
  [
    input-slice dup empty? [
      drop f
    ] [
      0 over nth dup
      ?min ?max between? [
         [ 1 tail-slice ] dip <parse-result>
      ] [
        2drop f
      ] if
    ] if
  ] ;

M: range-parser (compile) ( parser -- quot )
  T{ range-parser _ ?min ?max } range-pattern match-replace ;

TUPLE: seq-parser parsers ;

: seq-pattern ( -- quot )
  [
    dup [
      ?quot [
        [ remaining>> swap (>>remaining) ] 2keep
        ast>> dup ignore = [
          drop
        ] [
          swap [ ast>> push ] keep
        ] if
      ] [
        drop f
      ] if*
    ] [
      drop f
    ] if
  ] ;

M: seq-parser (compile) ( parser -- quot )
  [
    [ input-slice V{ } clone <parse-result> ] %
    parsers>> [ compiled-parser \ ?quot seq-pattern match-replace % ] each
  ] [ ] make ;

TUPLE: choice-parser parsers ;

: choice-pattern ( -- quot )
  [
    [ ?quot ] unless*
  ] ;

M: choice-parser (compile) ( parser -- quot )
  [
    f ,
    parsers>> [ compiled-parser \ ?quot choice-pattern match-replace % ] each
  ] [ ] make ;

TUPLE: repeat0-parser p1 ;

: (repeat0) ( quot result -- result )
  over call [
    [ remaining>> swap (>>remaining) ] 2keep
    ast>> swap [ ast>> push ] keep
    (repeat0)
 ] [
    nip
  ] if* ; inline

: repeat0-pattern ( -- quot )
  [
    [ ?quot ] swap (repeat0)
  ] ;

M: repeat0-parser (compile) ( parser -- quot )
  [
    [ input-slice V{ } clone <parse-result> ] %
    p1>> compiled-parser \ ?quot repeat0-pattern match-replace %
  ] [ ] make ;

TUPLE: repeat1-parser p1 ;

: repeat1-pattern ( -- quot )
  [
    [ ?quot ] swap (repeat0) [
      dup ast>> empty? [
        drop f
      ] when
    ] [
      f
    ] if*
  ] ;

M: repeat1-parser (compile) ( parser -- quot )
  [
    [ input-slice V{ } clone <parse-result> ] %
    p1>> compiled-parser \ ?quot repeat1-pattern match-replace %
  ] [ ] make ;

TUPLE: optional-parser p1 ;

: optional-pattern ( -- quot )
  [
    ?quot [ input-slice f <parse-result> ] unless*
  ] ;

M: optional-parser (compile) ( parser -- quot )
  p1>> compiled-parser \ ?quot optional-pattern match-replace ;

TUPLE: ensure-parser p1 ;

: ensure-pattern ( -- quot )
  [
    input-slice ?quot [
      ignore <parse-result>
    ] [
      drop f
    ] if
  ] ;

M: ensure-parser (compile) ( parser -- quot )
  p1>> compiled-parser \ ?quot ensure-pattern match-replace ;

TUPLE: ensure-not-parser p1 ;

: ensure-not-pattern ( -- quot )
  [
    input-slice ?quot [
      drop f
    ] [
      ignore <parse-result>
    ] if
  ] ;

M: ensure-not-parser (compile) ( parser -- quot )
  p1>> compiled-parser \ ?quot ensure-not-pattern match-replace ;

TUPLE: action-parser p1 quot ;

MATCH-VARS: ?action ;

: action-pattern ( -- quot )
  [
    ?quot dup [
      dup ast>> ?action call
      >>ast
    ] when
  ] ;

M: action-parser (compile) ( parser -- quot )
  [ p1>> compiled-parser ] [ quot>> ] bi
  2array { ?quot ?action } action-pattern match-replace ;

: left-trim-slice ( string -- string )
  #! Return a new string without any leading whitespace
  #! from the original string.
  dup empty? [
    dup first blank? [ 1 tail-slice left-trim-slice ] when
  ] unless ;

TUPLE: sp-parser p1 ;

M: sp-parser (compile) ( parser -- quot )
  [
    \ input-slice , \ left-trim-slice , \ input-from , \ pos , \ set , p1>> compiled-parser ,
  ] [ ] make ;

TUPLE: delay-parser quot ;

M: delay-parser (compile) ( parser -- quot )
  #! For efficiency we memoize the quotation.
  #! This way it is run only once and the
  #! parser constructed once at run time.
  [
    quot>> % \ compile ,
  ] [ ] make
  { } { "word" } <effect> memoize-quot
  [ % \ execute , ] [ ] make ;

PRIVATE>

: token ( string -- parser )
  token-parser construct-boa init-parser ;

: satisfy ( quot -- parser )
  satisfy-parser construct-boa init-parser ;

: range ( min max -- parser )
  range-parser construct-boa init-parser ;

: seq ( seq -- parser )
  seq-parser construct-boa init-parser ;

: 2seq ( parser1 parser2 -- parser )
  2array seq ;

: 3seq ( parser1 parser2 parser3 -- parser )
  3array seq ;

: 4seq ( parser1 parser2 parser3 parser4 -- parser )
  4array seq ;

: seq* ( quot -- paser )
  { } make seq ; inline

: choice ( seq -- parser )
  choice-parser construct-boa init-parser ;

: 2choice ( parser1 parser2 -- parser )
  2array choice ;

: 3choice ( parser1 parser2 parser3 -- parser )
  3array choice ;

: 4choice ( parser1 parser2 parser3 parser4 -- parser )
  4array choice ;

: choice* ( quot -- paser )
  { } make choice ; inline

: repeat0 ( parser -- parser )
  repeat0-parser construct-boa init-parser ;

: repeat1 ( parser -- parser )
  repeat1-parser construct-boa init-parser ;

: optional ( parser -- parser )
  optional-parser construct-boa init-parser ;

: ensure ( parser -- parser )
  ensure-parser construct-boa init-parser ;

: ensure-not ( parser -- parser )
  ensure-not-parser construct-boa init-parser ;

: action ( parser quot -- parser )
  action-parser construct-boa init-parser ;

: sp ( parser -- parser )
  sp-parser construct-boa init-parser ;

: hide ( parser -- parser )
  [ drop ignore ] action ;

: delay ( quot -- parser )
  delay-parser construct-boa init-parser ;

: PEG:
  (:) [
    [
        call compile 1quotation
        [ dup [ parse-result-ast ] [ "Parse failed" throw ] if ]
        append define
    ] with-compilation-unit
  ] 2curry over push-all ; parsing