260 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Factor
		
	
	
			
		
		
	
	
			260 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Factor
		
	
	
! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman.
 | 
						|
! See http://factorcode.org/license.txt for BSD license.
 | 
						|
USING: namespaces math kernel sequences accessors fry circular
 | 
						|
unicode.case unicode.categories locals combinators.short-circuit
 | 
						|
make combinators io splitting math.parser math.ranges
 | 
						|
generalizations sorting.functor math.order sorting.slots ;
 | 
						|
IN: sequence-parser
 | 
						|
 | 
						|
TUPLE: sequence-parser sequence n ;
 | 
						|
 | 
						|
: <sequence-parser> ( sequence -- sequence-parser )
 | 
						|
    sequence-parser new
 | 
						|
        swap >>sequence
 | 
						|
        0 >>n ;
 | 
						|
 | 
						|
:: with-sequence-parser ( sequence-parser quot -- seq/f )
 | 
						|
    sequence-parser n>> :> n
 | 
						|
    sequence-parser quot call [
 | 
						|
        n sequence-parser (>>n) f
 | 
						|
    ] unless* ; inline
 | 
						|
 | 
						|
: offset  ( sequence-parser offset -- char/f )
 | 
						|
    swap
 | 
						|
    [ n>> + ] [ sequence>> ?nth ] bi ; inline
 | 
						|
 | 
						|
: current ( sequence-parser -- char/f ) 0 offset ; inline
 | 
						|
 | 
						|
: previous ( sequence-parser -- char/f ) -1 offset ; inline
 | 
						|
 | 
						|
: peek-next ( sequence-parser -- char/f ) 1 offset ; inline
 | 
						|
 | 
						|
: advance ( sequence-parser -- sequence-parser )
 | 
						|
    [ 1 + ] change-n ; inline
 | 
						|
 | 
						|
: advance* ( sequence-parser -- )
 | 
						|
    advance drop ; inline
 | 
						|
 | 
						|
: get+increment ( sequence-parser -- char/f )
 | 
						|
    [ current ] [ advance drop ] bi ; inline
 | 
						|
 | 
						|
:: skip-until ( sequence-parser quot: ( obj -- ? ) -- )
 | 
						|
    sequence-parser current [
 | 
						|
        sequence-parser quot call
 | 
						|
        [ sequence-parser advance quot skip-until ] unless
 | 
						|
    ] when ; inline recursive
 | 
						|
 | 
						|
: sequence-parse-end? ( sequence-parser -- ? ) current not ;
 | 
						|
 | 
						|
: take-until ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
 | 
						|
    over sequence-parse-end? [
 | 
						|
        2drop f
 | 
						|
    ] [
 | 
						|
        [ drop n>> ]
 | 
						|
        [ skip-until ]
 | 
						|
        [ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq f like
 | 
						|
    ] if ; inline
 | 
						|
 | 
						|
: take-while ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
 | 
						|
    [ not ] compose take-until ; inline
 | 
						|
 | 
						|
: <safe-slice> ( from to seq -- slice/f )
 | 
						|
    3dup {
 | 
						|
        [ 2drop 0 < ]
 | 
						|
        [ [ drop ] 2dip length > ]
 | 
						|
        [ drop > ]
 | 
						|
    } 3|| [ 3drop f ] [ slice boa ] if ; inline
 | 
						|
 | 
						|
:: take-sequence ( sequence-parser sequence -- obj/f )
 | 
						|
    sequence-parser [ n>> dup sequence length + ] [ sequence>> ] bi
 | 
						|
    <safe-slice> sequence sequence= [
 | 
						|
        sequence
 | 
						|
        sequence-parser [ sequence length + ] change-n drop
 | 
						|
    ] [
 | 
						|
        f
 | 
						|
    ] if ;
 | 
						|
 | 
						|
: take-sequence* ( sequence-parser sequence -- )
 | 
						|
    take-sequence drop ;
 | 
						|
 | 
						|
:: take-until-sequence ( sequence-parser sequence -- sequence'/f )
 | 
						|
    sequence-parser n>> :> saved
 | 
						|
    sequence length <growing-circular> :> growing
 | 
						|
    sequence-parser
 | 
						|
    [
 | 
						|
        current growing push-growing-circular
 | 
						|
        sequence growing sequence=
 | 
						|
    ] take-until :> found
 | 
						|
    growing sequence sequence= [
 | 
						|
        found dup length
 | 
						|
        growing length 1- - head
 | 
						|
        sequence-parser [ growing length - 1 + ] change-n drop
 | 
						|
        ! sequence-parser advance drop
 | 
						|
    ] [
 | 
						|
        saved sequence-parser (>>n)
 | 
						|
        f
 | 
						|
    ] if ;
 | 
						|
 | 
						|
:: take-until-sequence* ( sequence-parser sequence -- sequence'/f )
 | 
						|
    sequence-parser sequence take-until-sequence :> out
 | 
						|
    out [
 | 
						|
        sequence-parser [ sequence length + ] change-n drop
 | 
						|
    ] when out ;
 | 
						|
 | 
						|
: skip-whitespace ( sequence-parser -- sequence-parser )
 | 
						|
    [ [ current blank? not ] take-until drop ] keep ;
 | 
						|
 | 
						|
: skip-whitespace-eol ( sequence-parser -- sequence-parser )
 | 
						|
    [ [ current " \t\r" member? not ] take-until drop ] keep ;
 | 
						|
 | 
						|
: take-c-comment ( sequence-parser -- seq/f )
 | 
						|
    [
 | 
						|
        dup "/*" take-sequence [
 | 
						|
            "*/" take-until-sequence*
 | 
						|
        ] [
 | 
						|
            drop f
 | 
						|
        ] if
 | 
						|
    ] with-sequence-parser ;
 | 
						|
 | 
						|
: take-c++-comment ( sequence-parser -- seq/f )
 | 
						|
    [
 | 
						|
        dup "//" take-sequence [
 | 
						|
            [
 | 
						|
                [
 | 
						|
                    { [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
 | 
						|
                ] take-until
 | 
						|
            ] [
 | 
						|
                advance drop
 | 
						|
            ] bi
 | 
						|
        ] [
 | 
						|
            drop f
 | 
						|
        ] if
 | 
						|
    ] with-sequence-parser ;
 | 
						|
 | 
						|
: skip-whitespace/comments ( sequence-parser -- sequence-parser )
 | 
						|
    skip-whitespace-eol
 | 
						|
    {
 | 
						|
        { [ dup take-c-comment ] [ skip-whitespace/comments ] }
 | 
						|
        { [ dup take-c++-comment ] [ skip-whitespace/comments ] }
 | 
						|
        [ ]
 | 
						|
    } cond ;
 | 
						|
 | 
						|
: take-define-identifier ( sequence-parser -- string )
 | 
						|
    skip-whitespace/comments
 | 
						|
    [ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
 | 
						|
 | 
						|
: take-rest-slice ( sequence-parser -- sequence/f )
 | 
						|
    [ sequence>> ] [ n>> ] bi
 | 
						|
    2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
 | 
						|
 | 
						|
: take-rest ( sequence-parser -- sequence )
 | 
						|
    [ take-rest-slice ] [ sequence>> like ] bi ;
 | 
						|
 | 
						|
: take-until-object ( sequence-parser obj -- sequence )
 | 
						|
    '[ current _ = ] take-until ;
 | 
						|
 | 
						|
: parse-sequence ( sequence quot -- )
 | 
						|
    [ <sequence-parser> ] dip call ; inline
 | 
						|
 | 
						|
:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
 | 
						|
    sequence-parser n>> :> start-n
 | 
						|
    sequence-parser advance
 | 
						|
    [
 | 
						|
        {
 | 
						|
            [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
 | 
						|
            [ current quote-char = not ]
 | 
						|
        } 1||
 | 
						|
    ] take-while :> string
 | 
						|
    sequence-parser current quote-char = [
 | 
						|
        sequence-parser advance* string
 | 
						|
    ] [
 | 
						|
        start-n sequence-parser (>>n) f
 | 
						|
    ] if ;
 | 
						|
 | 
						|
: (take-token) ( sequence-parser -- string )
 | 
						|
    skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
 | 
						|
 | 
						|
:: take-token* ( sequence-parser escape-char quote-char -- string/f )
 | 
						|
    sequence-parser skip-whitespace
 | 
						|
    dup current {
 | 
						|
        { quote-char [ escape-char quote-char take-quoted-string ] }
 | 
						|
        { f [ drop f ] }
 | 
						|
        [ drop (take-token) ]
 | 
						|
    } case ;
 | 
						|
 | 
						|
: take-token ( sequence-parser -- string/f )
 | 
						|
    CHAR: \ CHAR: " take-token* ;
 | 
						|
 | 
						|
: take-integer ( sequence-parser -- n/f )
 | 
						|
    [ current digit? ] take-while ;
 | 
						|
 | 
						|
:: take-n ( sequence-parser n -- seq/f )
 | 
						|
    n sequence-parser [ n>> + ] [ sequence>> length ] bi > [
 | 
						|
        f
 | 
						|
    ] [
 | 
						|
        sequence-parser n>> dup n + sequence-parser sequence>> subseq
 | 
						|
        sequence-parser [ n + ] change-n drop
 | 
						|
    ] if ;
 | 
						|
 | 
						|
: c-identifier-begin? ( ch -- ? )
 | 
						|
    CHAR: a CHAR: z [a,b]
 | 
						|
    CHAR: A CHAR: Z [a,b]
 | 
						|
    { CHAR: _ } 3append member? ;
 | 
						|
 | 
						|
: c-identifier-ch? ( ch -- ? )
 | 
						|
    CHAR: a CHAR: z [a,b]
 | 
						|
    CHAR: A CHAR: Z [a,b]
 | 
						|
    CHAR: 0 CHAR: 9 [a,b]
 | 
						|
    { CHAR: _ } 4 nappend member? ;
 | 
						|
 | 
						|
: (take-c-identifier) ( sequence-parser -- string/f )
 | 
						|
    dup current c-identifier-begin? [
 | 
						|
        [ current c-identifier-ch? ] take-while
 | 
						|
    ] [
 | 
						|
        drop f
 | 
						|
    ] if ;
 | 
						|
 | 
						|
: take-c-identifier ( sequence-parser -- string/f )
 | 
						|
    [ (take-c-identifier) ] with-sequence-parser ;
 | 
						|
 | 
						|
<< "length" [ length ] define-sorting >>
 | 
						|
 | 
						|
: sort-tokens ( seq -- seq' )
 | 
						|
    { length>=< <=> } sort-by ;
 | 
						|
 | 
						|
: take-first-matching ( sequence-parser seq -- seq )
 | 
						|
    swap
 | 
						|
    '[ _ [ swap take-sequence ] with-sequence-parser ] find nip ;
 | 
						|
 | 
						|
 | 
						|
: take-longest ( sequence-parser seq -- seq )
 | 
						|
    sort-tokens take-first-matching ;
 | 
						|
 | 
						|
: take-c-integer ( sequence-parser -- string/f )
 | 
						|
    [
 | 
						|
        dup take-integer [
 | 
						|
            swap
 | 
						|
            { "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
 | 
						|
            take-longest [ append ] when*
 | 
						|
        ] [
 | 
						|
            drop f
 | 
						|
        ] if*
 | 
						|
    ] with-sequence-parser ;
 | 
						|
 | 
						|
CONSTANT: c-punctuators
 | 
						|
    {
 | 
						|
        "[" "]" "(" ")" "{" "}" "." "->"
 | 
						|
        "++" "--" "&" "*" "+" "-" "~" "!"
 | 
						|
        "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
 | 
						|
        "?" ":" ";" "..."
 | 
						|
        "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
 | 
						|
        "," "#" "##"
 | 
						|
        "<:" ":>" "<%" "%>" "%:" "%:%:"
 | 
						|
    }
 | 
						|
 | 
						|
: take-c-punctuator ( sequence-parser -- string/f )
 | 
						|
    c-punctuators take-longest ;
 | 
						|
 | 
						|
: write-full ( sequence-parser -- ) sequence>> write ;
 | 
						|
: write-rest ( sequence-parser -- ) take-rest write ;
 |