better parsing for anchors
parent
73f6691f75
commit
46aa56730b
|
@ -233,15 +233,22 @@ ERROR: invalid-range a b ;
|
||||||
SINGLETON: beginning-of-input
|
SINGLETON: beginning-of-input
|
||||||
SINGLETON: end-of-input
|
SINGLETON: end-of-input
|
||||||
|
|
||||||
! : beginning-of-input ( -- obj )
|
: newlines ( -- obj1 obj2 obj3 )
|
||||||
: handle-front-anchor ( -- ) front-anchor push-stack ;
|
|
||||||
: end-of-line ( -- obj )
|
|
||||||
end-of-input
|
|
||||||
CHAR: \r <constant>
|
CHAR: \r <constant>
|
||||||
CHAR: \n <constant>
|
CHAR: \n <constant>
|
||||||
2dup 2array <concatenation> 4array <alternation> lookahead boa ;
|
2dup 2array <concatenation> ;
|
||||||
|
|
||||||
: handle-back-anchor ( -- ) end-of-line push-stack ;
|
: beginning-of-line ( -- obj )
|
||||||
|
beginning-of-input newlines 4array <alternation> lookbehind boa ;
|
||||||
|
|
||||||
|
: end-of-line ( -- obj )
|
||||||
|
end-of-input newlines 4array <alternation> lookahead boa ;
|
||||||
|
|
||||||
|
: handle-front-anchor ( -- )
|
||||||
|
get-multiline beginning-of-line beginning-of-input ? push-stack ;
|
||||||
|
|
||||||
|
: handle-back-anchor ( -- )
|
||||||
|
get-multiline end-of-line end-of-input ? push-stack ;
|
||||||
|
|
||||||
ERROR: bad-character-class obj ;
|
ERROR: bad-character-class obj ;
|
||||||
ERROR: expected-posix-class ;
|
ERROR: expected-posix-class ;
|
||||||
|
@ -412,16 +419,11 @@ DEFER: handle-left-bracket
|
||||||
[ [ push ] keep current-regexp get (>>stack) ]
|
[ [ push ] keep current-regexp get (>>stack) ]
|
||||||
[ finish-regexp-parse push-stack ] bi* ;
|
[ finish-regexp-parse push-stack ] bi* ;
|
||||||
|
|
||||||
|
|
||||||
: parse-regexp-token ( token -- ? )
|
: parse-regexp-token ( token -- ? )
|
||||||
{
|
{
|
||||||
! todo: only match these at beginning/end of regexp
|
{ CHAR: ( [ handle-left-parenthesis t ] } ! handle (?..) at beginning?
|
||||||
{ CHAR: ^ [ handle-front-anchor t ] }
|
|
||||||
{ CHAR: $ [ handle-back-anchor t ] }
|
|
||||||
|
|
||||||
{ CHAR: . [ handle-dot t ] }
|
|
||||||
{ CHAR: ( [ handle-left-parenthesis t ] }
|
|
||||||
{ CHAR: ) [ handle-right-parenthesis f ] }
|
{ CHAR: ) [ handle-right-parenthesis f ] }
|
||||||
|
{ CHAR: . [ handle-dot t ] }
|
||||||
{ CHAR: | [ handle-pipe t ] }
|
{ CHAR: | [ handle-pipe t ] }
|
||||||
{ CHAR: ? [ handle-question t ] }
|
{ CHAR: ? [ handle-question t ] }
|
||||||
{ CHAR: * [ handle-star t ] }
|
{ CHAR: * [ handle-star t ] }
|
||||||
|
@ -429,16 +431,28 @@ DEFER: handle-left-bracket
|
||||||
{ CHAR: { [ handle-left-brace t ] }
|
{ CHAR: { [ handle-left-brace t ] }
|
||||||
{ CHAR: [ [ handle-left-bracket t ] }
|
{ CHAR: [ [ handle-left-bracket t ] }
|
||||||
{ CHAR: \ [ handle-escape t ] }
|
{ CHAR: \ [ handle-escape t ] }
|
||||||
[ <constant> push-stack t ]
|
[
|
||||||
|
dup CHAR: $ = peek1 f = and [
|
||||||
|
drop
|
||||||
|
handle-back-anchor f
|
||||||
|
] [
|
||||||
|
<constant> push-stack t
|
||||||
|
] if
|
||||||
|
]
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
: (parse-regexp) ( -- )
|
: (parse-regexp) ( -- )
|
||||||
read1 [ parse-regexp-token [ (parse-regexp) ] when ] when* ;
|
read1 [ parse-regexp-token [ (parse-regexp) ] when ] when* ;
|
||||||
|
|
||||||
|
: parse-regexp-beginning ( -- )
|
||||||
|
peek1 CHAR: ^ = [ drop1 handle-front-anchor ] when ;
|
||||||
|
|
||||||
: parse-regexp ( regexp -- )
|
: parse-regexp ( regexp -- )
|
||||||
dup current-regexp [
|
dup current-regexp [
|
||||||
raw>> [
|
raw>> [
|
||||||
<string-reader> [ (parse-regexp) ] with-input-stream
|
<string-reader> [
|
||||||
|
parse-regexp-beginning (parse-regexp)
|
||||||
|
] with-input-stream
|
||||||
] unless-empty
|
] unless-empty
|
||||||
current-regexp get
|
current-regexp get
|
||||||
stack finish-regexp-parse
|
stack finish-regexp-parse
|
||||||
|
|
Loading…
Reference in New Issue