Add support for positive, negative lookahead, word boundaries to regexp
parent
058e7f40a4
commit
aa55747647
|
@ -199,3 +199,26 @@ IN: regexp-tests
|
||||||
"(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
|
"(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
|
||||||
f <regexp> drop
|
f <regexp> drop
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[ t ] [ "fxxbar" "(?!foo).{3}bar" f <regexp> matches? ] unit-test
|
||||||
|
[ f ] [ "foobar" "(?!foo).{3}bar" f <regexp> matches? ] unit-test
|
||||||
|
|
||||||
|
[ 3 ] [ "foobar" "foo(?=bar)" f <regexp> match-head ] unit-test
|
||||||
|
[ f ] [ "foobxr" "foo(?=bar)" f <regexp> match-head ] unit-test
|
||||||
|
|
||||||
|
[ f ] [ "foobxr" "foo\\z" f <regexp> match-head ] unit-test
|
||||||
|
[ 3 ] [ "foo" "foo\\z" f <regexp> match-head ] unit-test
|
||||||
|
|
||||||
|
[ 3 ] [ "foo bar" "foo\\b" f <regexp> match-head ] unit-test
|
||||||
|
[ f ] [ "fooxbar" "foo\\b" f <regexp> matches? ] unit-test
|
||||||
|
[ t ] [ "foo" "foo\\b" f <regexp> matches? ] unit-test
|
||||||
|
[ t ] [ "foo bar" "foo\\b bar" f <regexp> matches? ] unit-test
|
||||||
|
[ f ] [ "fooxbar" "foo\\bxbar" f <regexp> matches? ] unit-test
|
||||||
|
[ f ] [ "foo" "foo\\bbar" f <regexp> matches? ] unit-test
|
||||||
|
|
||||||
|
[ f ] [ "foo bar" "foo\\B" f <regexp> matches? ] unit-test
|
||||||
|
[ 3 ] [ "fooxbar" "foo\\B" f <regexp> match-head ] unit-test
|
||||||
|
[ t ] [ "foo" "foo\\B" f <regexp> matches? ] unit-test
|
||||||
|
[ f ] [ "foo bar" "foo\\B bar" f <regexp> matches? ] unit-test
|
||||||
|
[ t ] [ "fooxbar" "foo\\Bxbar" f <regexp> matches? ] unit-test
|
||||||
|
[ f ] [ "foo" "foo\\Bbar" f <regexp> matches? ] unit-test
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
USING: arrays combinators kernel lazy-lists math math.parser
|
USING: arrays combinators kernel lazy-lists math math.parser
|
||||||
namespaces parser parser-combinators parser-combinators.simple
|
namespaces parser parser-combinators parser-combinators.simple
|
||||||
promises quotations sequences combinators.lib strings
|
promises quotations sequences combinators.lib strings
|
||||||
assocs prettyprint.backend ;
|
assocs prettyprint.backend memoize ;
|
||||||
USE: io
|
USE: io
|
||||||
IN: regexp
|
IN: regexp
|
||||||
|
|
||||||
|
@ -148,10 +148,22 @@ TUPLE: group-result str ;
|
||||||
C: <group-result> group-result
|
C: <group-result> group-result
|
||||||
|
|
||||||
: 'non-capturing-group' ( -- parser )
|
: 'non-capturing-group' ( -- parser )
|
||||||
'regexp' "(?:" ")" surrounded-by ;
|
"?:" token 'regexp' &> ;
|
||||||
|
|
||||||
|
: 'positive-lookahead-group' ( -- parser )
|
||||||
|
"?=" token 'regexp' &> [ ensure ] <@ ;
|
||||||
|
|
||||||
|
: 'negative-lookahead-group' ( -- parser )
|
||||||
|
"?!" token 'regexp' &> [ ensure-not ] <@ ;
|
||||||
|
|
||||||
|
: 'simple-group' ( -- parser )
|
||||||
|
'regexp' [ [ <group-result> ] <@ ] <@ ;
|
||||||
|
|
||||||
: 'group' ( -- parser )
|
: 'group' ( -- parser )
|
||||||
'regexp' [ [ <group-result> ] <@ ] <@
|
'non-capturing-group'
|
||||||
|
'positive-lookahead-group'
|
||||||
|
'negative-lookahead-group'
|
||||||
|
'simple-group' <|> <|> <|>
|
||||||
"(" ")" surrounded-by ;
|
"(" ")" surrounded-by ;
|
||||||
|
|
||||||
: 'range' ( -- parser )
|
: 'range' ( -- parser )
|
||||||
|
@ -181,12 +193,21 @@ C: <group-result> group-result
|
||||||
[ ignore-case? get <token-parser> ] <@
|
[ ignore-case? get <token-parser> ] <@
|
||||||
"\\Q" "\\E" surrounded-by ;
|
"\\Q" "\\E" surrounded-by ;
|
||||||
|
|
||||||
|
: 'break' ( quot -- parser )
|
||||||
|
satisfy ensure epsilon just <|> ;
|
||||||
|
|
||||||
|
: 'break-escape' ( -- parser )
|
||||||
|
"$" token [ "\r\n" member? ] 'break' <@literal
|
||||||
|
"\\b" token [ blank? ] 'break' <@literal <|>
|
||||||
|
"\\B" token [ blank? not ] 'break' <@literal <|>
|
||||||
|
"\\z" token epsilon just <@literal <|> ;
|
||||||
|
|
||||||
: 'simple' ( -- parser )
|
: 'simple' ( -- parser )
|
||||||
'escaped-seq'
|
'escaped-seq'
|
||||||
'non-capturing-group' <|>
|
'break-escape' <|>
|
||||||
'group' <|>
|
'group' <|>
|
||||||
'char' <|>
|
'character-class' <|>
|
||||||
'character-class' <|> ;
|
'char' <|> ;
|
||||||
|
|
||||||
: 'exactly-n' ( -- parser )
|
: 'exactly-n' ( -- parser )
|
||||||
'integer' [ exactly-n ] <@delay ;
|
'integer' [ exactly-n ] <@delay ;
|
||||||
|
@ -226,7 +247,7 @@ C: <group-result> group-result
|
||||||
: 'dummy' ( -- parser )
|
: 'dummy' ( -- parser )
|
||||||
epsilon [ ] <@literal ;
|
epsilon [ ] <@literal ;
|
||||||
|
|
||||||
: 'term' ( -- parser )
|
MEMO: 'term' ( -- parser )
|
||||||
'simple'
|
'simple'
|
||||||
'repetition' 'interval' 'dummy' <|> <|> <&> [ first2 call ] <@
|
'repetition' 'interval' 'dummy' <|> <|> <&> [ first2 call ] <@
|
||||||
<!+> [ <and-parser> ] <@ ;
|
<!+> [ <and-parser> ] <@ ;
|
||||||
|
|
Loading…
Reference in New Issue