factor/library/syntax/parser.factor

! Copyright (C) 2005 Slava Pestov.
! See http://factor.sf.net/license.txt for BSD license.
IN: parser
USING: errors kernel lists math namespaces streams strings words
unparser ;

! The parser uses a number of variables:
! line - the line being parsed
! pos  - position in the line
! use  - list of vocabularies
! in   - vocabulary for new words
!
! When a token is scanned, it is searched for in the 'use' list
! of vocabularies. If it is a parsing word, it is executed
! immediately. Otherwise it is appended to the parse tree.

SYMBOL: file

: parsing? ( word -- ? )
    dup word? [ "parsing" word-prop ] [ drop f ] ifte ;

: skip ( n line quot -- n )
    #! Find the next character that satisfies the quotation,
    #! which should have stack effect ( ch -- ? ).
    >r 2dup string-length < [
        2dup string-nth r> dup >r call [
            r> 2drop
        ] [
            >r 1 + r> r> skip
        ] ifte
    ] [
        r> drop nip string-length
    ] ifte ; inline

: skip-blank ( n line -- n )
    [ blank? not ] skip ;

: denotation? ( ch -- ? )
    #! Hard-coded for now. Make this customizable later.
    #! A 'denotation' is a character that is treated as its
    #! own word, eg:
    #!
    #! "hello world"
    #!
    #! Will call the parsing word ".
    "\"" string-contains? ;

: skip-word ( n line -- n )
    2dup string-nth denotation? [
        drop 1 +
    ] [
        [ blank? ] skip
    ] ifte ;

: (scan) ( n line -- start end )
    [ skip-blank dup ] keep
    2dup string-length < [ skip-word ] [ drop ] ifte ;

: scan ( -- token )
    "col" get "line" get dup >r (scan) dup "col" set
    2dup = [ r> 3drop f ] [ r> substring ] ifte ;

! If this variable is on, the parser does not internalize words;
! it just appends strings to the parse tree as they are read.
SYMBOL: string-mode
global [ string-mode off ] bind

: scan-word ( -- obj )
    scan dup [
        dup ";" = not string-mode get and [
            dup "use" get search [ ] [ str>number ] ?ifte
        ] unless
    ] when ;

: parse-loop ( -- )
    scan-word [
        dup parsing? [ execute ] [ swons ] ifte  parse-loop
    ] when* ;

: (parse) ( str -- )
    "line" set 0 "col" set
    parse-loop
    "line" off "col" off ;

: parse ( str -- code )
    #! Parse the string into a parse tree that can be executed.
    f swap (parse) reverse ;

: eval ( "X" -- X )
    parse call ;

! Used by parsing words
: ch-search ( ch -- index )
    "col" get "line" get rot index-of* ;

: (until) ( index -- str )
    "col" get swap dup 1 + "col" set "line" get substring ;

: until ( ch -- str )
    ch-search (until) ;

: (until-eol) ( -- index ) 
    "\n" ch-search dup -1 = [ drop "line" get string-length ] when ;

: until-eol ( -- str )
    #! This is just a hack to get "eval" to work with multiline
    #! strings from jEdit with EOL comments. Normally, input to
    #! the parser is already line-tokenized.
    (until-eol) (until) ;

: save-location ( word -- )
    #! Remember where this word was defined.
    dup set-word
    dup line-number get "line" set-word-prop
    dup "col" get "col"  set-word-prop
    file get "file" set-word-prop ;

: create-in "in" get create dup save-location ;

: CREATE ( -- word ) scan create-in ;

: escape ( ch -- esc )
    [
        [[ CHAR: e  CHAR: \e ]]
        [[ CHAR: n  CHAR: \n ]]
        [[ CHAR: r  CHAR: \r ]]
        [[ CHAR: t  CHAR: \t ]]
        [[ CHAR: s  CHAR: \s ]]
        [[ CHAR: \s CHAR: \s ]]
        [[ CHAR: 0  CHAR: \0 ]]
        [[ CHAR: \\ CHAR: \\ ]]
        [[ CHAR: \" CHAR: \" ]]
    ] assoc dup [ "Bad escape" throw ] unless ;

: next-escape ( n str -- ch n )
    2dup string-nth CHAR: u = [
        swap 1 + dup 4 + [ rot substring hex> ] keep
    ] [
        over 1 + >r string-nth escape r>
    ] ifte ;

: next-char ( n str -- ch n )
    2dup string-nth CHAR: \\ = [
        >r 1 + r> next-escape
    ] [
        over 1 + >r string-nth r>
    ] ifte ;

: doc-comment-here? ( parsed -- ? )
    not "in-definition" get and ;

: parsed-stack-effect ( parsed str -- parsed )
    over doc-comment-here? [
        word "stack-effect" word-prop [
            drop
        ] [
            word swap "stack-effect" set-word-prop
        ] ifte
    ] [
        drop
    ] ifte ;

: documentation+ ( word str -- )
    over "documentation" word-prop [
        swap "\n" swap cat3
    ] when*
    "documentation" set-word-prop ;

: parsed-documentation ( parsed str -- parsed )
    over doc-comment-here? [
        word swap documentation+
    ] [
        drop
    ] ifte ;
tuples gracefully handle changing shape 2005-02-09 22:35:11 -05:00			`! Copyright (C) 2005 Slava Pestov.`
			`! See http://factor.sf.net/license.txt for BSD license.`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00			`IN: parser`
inlining method body if type of object passed to generic is known 2005-02-24 20:52:17 -05:00			`USING: errors kernel lists math namespaces streams strings words`
tuples gracefully handle changing shape 2005-02-09 22:35:11 -05:00			`unparser ;`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00
			`! The parser uses a number of variables:`
			`! line - the line being parsed`
			`! pos - position in the line`
			`! use - list of vocabularies`
			`! in - vocabulary for new words`
			`!`
			`! When a token is scanned, it is searched for in the 'use' list`
			`! of vocabularies. If it is a parsing word, it is executed`
			`! immediately. Otherwise it is appended to the parse tree.`

PowerPC backend functional 2005-03-21 20:53:26 -05:00			`SYMBOL: file`

work on native factor, httpd now uses catch 2004-07-21 19:26:41 -04:00			`: parsing? ( word -- ? )`
renaming word-parameter to word-def; renaming word-property to word-prop 2005-03-05 14:45:23 -05:00			`dup word? [ "parsing" word-prop ] [ drop f ] ifte ;`
work on native factor, httpd now uses catch 2004-07-21 19:26:41 -04:00
guard pages 2004-08-12 02:13:43 -04:00			`: skip ( n line quot -- n )`
			`#! Find the next character that satisfies the quotation,`
			`#! which should have stack effect ( ch -- ? ).`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`>r 2dup string-length < [`
			`2dup string-nth r> dup >r call [`
guard pages 2004-08-12 02:13:43 -04:00			`r> 2drop`
			`] [`
type inference changes, comment out smart-terminal reference in win32-console, win32-compatible factor plugin 2004-12-29 03:35:46 -05:00			`>r 1 + r> r> skip`
guard pages 2004-08-12 02:13:43 -04:00			`] ifte`
			`] [`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`r> drop nip string-length`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00			`] ifte ; inline`
guard pages 2004-08-12 02:13:43 -04:00
			`: skip-blank ( n line -- n )`
			`[ blank? not ] skip ;`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00
guard pages 2004-08-12 02:13:43 -04:00			`: denotation? ( ch -- ? )`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00			`#! Hard-coded for now. Make this customizable later.`
guard pages 2004-08-12 02:13:43 -04:00			`#! A 'denotation' is a character that is treated as its`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00			`#! own word, eg:`
			`#!`
			`#! "hello world"`
			`#!`
			`#! Will call the parsing word ".`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`"\"" string-contains? ;`
guard pages 2004-08-12 02:13:43 -04:00
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`: skip-word ( n line -- n )`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`2dup string-nth denotation? [`
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`drop 1 +`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00			`] [`
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`[ blank? ] skip`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00			`] ifte ;`

more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`: (scan) ( n line -- start end )`
			`[ skip-blank dup ] keep`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`2dup string-length < [ skip-word ] [ drop ] ifte ;`
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00
guard pages 2004-08-12 02:13:43 -04:00			`: scan ( -- token )`
client socket works 2004-08-18 19:22:15 -04:00			`"col" get "line" get dup >r (scan) dup "col" set`
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`2dup = [ r> 3drop f ] [ r> substring ] ifte ;`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00
USING: parsing word more compact than multiple USE: 2005-01-29 14:18:28 -05:00			`! If this variable is on, the parser does not internalize words;`
			`! it just appends strings to the parse tree as they are read.`
			`SYMBOL: string-mode`
			`global [ string-mode off ] bind`

some parse-number cleanup, documentation work 2004-09-14 23:23:05 -04:00			`: scan-word ( -- obj )`
			`scan dup [`
USING: parsing word more compact than multiple USE: 2005-01-29 14:18:28 -05:00			`dup ";" = not string-mode get and [`
removed ?when and ?unless 2005-03-21 14:39:46 -05:00			`dup "use" get search [ ] [ str>number ] ?ifte`
USING: parsing word more compact than multiple USE: 2005-01-29 14:18:28 -05:00			`] unless`
some parse-number cleanup, documentation work 2004-09-14 23:23:05 -04:00			`] when ;`
work on native factor, httpd now uses catch 2004-07-21 19:26:41 -04:00
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`: parse-loop ( -- )`
			`scan-word [`
			`dup parsing? [ execute ] [ swons ] ifte parse-loop`
			`] when* ;`

some parse-number cleanup, documentation work 2004-09-14 23:23:05 -04:00			`: (parse) ( str -- )`
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`"line" set 0 "col" set`
			`parse-loop`
			`"line" off "col" off ;`
parse-stream and better catch 2004-07-18 19:52:01 -04:00
			`: parse ( str -- code )`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00			`#! Parse the string into a parse tree that can be executed.`
possibly controversial: removed destructive list manipulation; other cleanups 2004-10-12 01:11:35 -04:00			`f swap (parse) reverse ;`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00
			`: eval ( "X" -- X )`
			`parse call ;`

CHAR: notation for literal chars, native parser work 2004-07-22 19:48:50 -04:00			`! Used by parsing words`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00			`: ch-search ( ch -- index )`
client socket works 2004-08-18 19:22:15 -04:00			`"col" get "line" get rot index-of* ;`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00
			`: (until) ( index -- str )`
type inference changes, comment out smart-terminal reference in win32-console, win32-compatible factor plugin 2004-12-29 03:35:46 -05:00			`"col" get swap dup 1 + "col" set "line" get substring ;`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00
			`: until ( ch -- str )`
			`ch-search (until) ;`

a few bug fixes 2004-10-27 23:13:00 -04:00			`: (until-eol) ( -- index )`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`"\n" ch-search dup -1 = [ drop "line" get string-length ] when ;`
a few bug fixes 2004-10-27 23:13:00 -04:00
CHAR: notation for literal chars, native parser work 2004-07-22 19:48:50 -04:00			`: until-eol ( -- str )`
a few bug fixes 2004-10-27 23:13:00 -04:00			`#! This is just a hack to get "eval" to work with multiline`
			`#! strings from jEdit with EOL comments. Normally, input to`
			`#! the parser is already line-tokenized.`
			`(until-eol) (until) ;`
First import of Factor 0.60 2004-07-16 02:26:21 -04:00
tuples gracefully handle changing shape 2005-02-09 22:35:11 -05:00			`: save-location ( word -- )`
			`#! Remember where this word was defined.`
			`dup set-word`
renaming word-parameter to word-def; renaming word-property to word-prop 2005-03-05 14:45:23 -05:00			`dup line-number get "line" set-word-prop`
			`dup "col" get "col" set-word-prop`
			`file get "file" set-word-prop ;`
tuples gracefully handle changing shape 2005-02-09 22:35:11 -05:00
plugin improvements 2005-03-23 22:49:40 -05:00			`: create-in "in" get create dup save-location ;`
tuples gracefully handle changing shape 2005-02-09 22:35:11 -05:00
plugin improvements 2005-03-23 22:49:40 -05:00			`: CREATE ( -- word ) scan create-in ;`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`: escape ( ch -- esc )`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00			`[`
[[ car cdr ]] syntax replaces [ car \| cdr ] 2005-01-13 19:49:47 -05:00			`[[ CHAR: e CHAR: \e ]]`
			`[[ CHAR: n CHAR: \n ]]`
			`[[ CHAR: r CHAR: \r ]]`
			`[[ CHAR: t CHAR: \t ]]`
			`[[ CHAR: s CHAR: \s ]]`
			`[[ CHAR: \s CHAR: \s ]]`
			`[[ CHAR: 0 CHAR: \0 ]]`
			`[[ CHAR: \\ CHAR: \\ ]]`
			`[[ CHAR: \" CHAR: \" ]]`
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`] assoc dup [ "Bad escape" throw ] unless ;`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`: next-escape ( n str -- ch n )`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`2dup string-nth CHAR: u = [`
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`swap 1 + dup 4 + [ rot substring hex> ] keep`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00			`] [`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`over 1 + >r string-nth escape r>`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00			`] ifte ;`

more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`: next-char ( n str -- ch n )`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`2dup string-nth CHAR: \\ = [`
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`>r 1 + r> next-escape`
			`] [`
hashtables were sized wrong; rename str- words to string- 2005-03-05 16:33:40 -05:00			`over 1 + >r string-nth r>`
more parser cleanups; stack inference cleanups 2005-01-14 14:56:19 -05:00			`] ifte ;`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00
			`: doc-comment-here? ( parsed -- ? )`
			`not "in-definition" get and ;`

			`: parsed-stack-effect ( parsed str -- parsed )`
			`over doc-comment-here? [`
renaming word-parameter to word-def; renaming word-property to word-prop 2005-03-05 14:45:23 -05:00			`word "stack-effect" word-prop [`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00			`drop`
			`] [`
renaming word-parameter to word-def; renaming word-property to word-prop 2005-03-05 14:45:23 -05:00			`word swap "stack-effect" set-word-prop`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00			`] ifte`
			`] [`
			`drop`
			`] ifte ;`
various cleanups, code primitive now shows code space usage, :get and &get, working on inferencer 2004-11-20 16:57:01 -05:00
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00			`: documentation+ ( word str -- )`
renaming word-parameter to word-def; renaming word-property to word-prop 2005-03-05 14:45:23 -05:00			`over "documentation" word-prop [`
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00			`swap "\n" swap cat3`
			`] when*`
renaming word-parameter to word-def; renaming word-property to word-prop 2005-03-05 14:45:23 -05:00			`"documentation" set-word-prop ;`
various cleanups, code primitive now shows code space usage, :get and &get, working on inferencer 2004-11-20 16:57:01 -05:00
reworked bootstrap code, a lot of cleanups 2004-12-15 16:57:29 -05:00			`: parsed-documentation ( parsed str -- parsed )`
			`over doc-comment-here? [`
			`word swap documentation+`
			`] [`
			`drop`
			`] ifte ;`