Merge branch 'master' of git://factorcode.org/git/factor
commit
a1b9d84849
|
@ -549,10 +549,10 @@ ERROR: custom-error ;
|
|||
{ 1 0 } [ [ ] map-children ] must-infer-as
|
||||
|
||||
! Corner case
|
||||
[ [ [ f dup ] [ dup ] [ ] unfold ] infer ] must-fail
|
||||
! [ [ [ f dup ] [ dup ] [ ] unfold ] infer ] must-fail
|
||||
|
||||
[ [ [ f dup ] [ ] [ ] while ] infer ] must-fail
|
||||
! [ [ [ f dup ] [ ] [ ] while ] infer ] must-fail
|
||||
|
||||
: erg's-inference-bug ( -- ) f dup [ erg's-inference-bug ] when ; inline
|
||||
! : erg's-inference-bug ( -- ) f dup [ erg's-inference-bug ] when ; inline
|
||||
|
||||
[ [ erg's-inference-bug ] infer ] must-fail
|
||||
! [ [ erg's-inference-bug ] infer ] must-fail
|
||||
|
|
|
@ -97,6 +97,7 @@ $nl
|
|||
"Certain shuffle words can also be expressed in terms of the spread combinators. Internalizing such identities can help with understanding and writing code using spread combinators:"
|
||||
{ $code
|
||||
": dip [ ] bi* ;"
|
||||
": 2dip [ ] [ ] tri* ;"
|
||||
""
|
||||
": slip [ call ] [ ] bi* ;"
|
||||
": 2slip [ call ] [ ] [ ] tri* ;"
|
||||
|
@ -164,8 +165,9 @@ ARTICLE: "slip-keep-combinators" "The slip and keep combinators"
|
|||
{ $subsection slip }
|
||||
{ $subsection 2slip }
|
||||
{ $subsection 3slip }
|
||||
"The dip combinator invokes the quotation at the top of the stack, hiding the value underneath:"
|
||||
"The dip combinators invoke the quotation at the top of the stack, hiding the values underneath:"
|
||||
{ $subsection dip }
|
||||
{ $subsection 2dip }
|
||||
"The keep combinators invoke a quotation which takes a number of values off the stack, and then they restore those values:"
|
||||
{ $subsection keep }
|
||||
{ $subsection 2keep }
|
||||
|
@ -876,6 +878,14 @@ HELP: dip
|
|||
{ $code "[ foo bar ] dip" }
|
||||
} ;
|
||||
|
||||
HELP: 2dip
|
||||
{ $values { "obj1" object } { "obj2" object } { "quot" quotation } }
|
||||
{ $description "Calls " { $snippet "quot" } " with " { $snippet "obj1" } " and " { $snippet "obj2" } " hidden on the retain stack." }
|
||||
{ $notes "The following are equivalent:"
|
||||
{ $code ">r >r foo bar r> r>" }
|
||||
{ $code "[ foo bar ] 2dip" }
|
||||
} ;
|
||||
|
||||
HELP: while
|
||||
{ $values { "pred" "a quotation with stack effect " { $snippet "( -- ? )" } } { "body" "a quotation" } { "tail" "a quotation" } }
|
||||
{ $description "Repeatedly calls " { $snippet "pred" } ". If it yields " { $link f } ", iteration stops, otherwise " { $snippet "body" } " is called. After iteration stops, " { $snippet "tail" } " is called." }
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
|
||||
USING: kernel system
|
||||
combinators
|
||||
vectors sequences assocs
|
||||
math math.functions
|
||||
prettyprint unicode.case
|
||||
accessors
|
||||
combinators.cleave
|
||||
newfx
|
||||
dns ;
|
||||
|
||||
IN: dns.cache
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: cache ( -- table ) H{ } ;
|
||||
|
||||
! key: 'name type class' (as string)
|
||||
! val: entry
|
||||
|
||||
TUPLE: entry time data ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: query->key ( query -- key )
|
||||
{ [ name>> >lower ] [ type>> unparse ] [ class>> unparse ] } <arr> " " join ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: table-get ( query -- result ) query->key cache of ;
|
||||
|
||||
: table-check ( query -- ? ) query->key cache key? ;
|
||||
|
||||
: table-add ( query value -- ) [ query->key ] [ ] bi* cache at-mutate ;
|
||||
|
||||
: table-rem ( query -- ) query->key cache delete-key-of drop ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: now ( -- seconds ) millis 1000.0 / round >integer ;
|
||||
|
||||
: ttl->time ( ttl -- seconds ) now + ;
|
||||
|
||||
: time->ttl ( time -- ttl ) now - ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
SYMBOL: NX
|
||||
|
||||
: cache-nx ( query ttl -- )
|
||||
ttl->time NX entry boa
|
||||
table-add ;
|
||||
|
||||
: nx? ( obj -- ? )
|
||||
dup entry?
|
||||
[ data>> NX = ]
|
||||
[ drop f ]
|
||||
if ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: query->rr ( query -- rr ) [ name>> ] [ type>> ] [ class>> ] tri f f rr boa ;
|
||||
|
||||
: query+entry->rrs ( query entry -- rrs )
|
||||
swap ! entry query
|
||||
query->rr ! entry rr
|
||||
over ! entry rr entry
|
||||
time>> time->ttl >>ttl ! entry rr
|
||||
swap ! rr entry
|
||||
data>> [ >r dup clone r> >>rdata ] map
|
||||
nip ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: entry-expired? ( entry -- ? ) time>> time->ttl 0 <= ;
|
||||
|
||||
: cache-get ( query -- result )
|
||||
dup table-get ! query result
|
||||
{
|
||||
{
|
||||
[ dup f = ] ! not in the cache
|
||||
[ 2drop f ]
|
||||
}
|
||||
{
|
||||
[ dup entry-expired? ] ! here but expired
|
||||
[ drop table-rem f ]
|
||||
}
|
||||
{
|
||||
[ dup nx? ] ! negative result has been cached
|
||||
[ 2drop NX ]
|
||||
}
|
||||
{
|
||||
[ t ]
|
||||
[ query+entry->rrs ]
|
||||
}
|
||||
}
|
||||
cond ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: rr->entry ( rr -- entry )
|
||||
[ ttl>> ttl->time ] [ rdata>> {1} >vector ] bi entry boa ;
|
||||
|
||||
: maybe-pushed-on ( obj seq -- )
|
||||
2dup member-of?
|
||||
[ 2drop ]
|
||||
[ pushed-on ]
|
||||
if ;
|
||||
|
||||
: add-rr-to-entry ( rr entry -- )
|
||||
over ttl>> ttl->time >>time
|
||||
[ rdata>> ] [ data>> ] bi* maybe-pushed-on ;
|
||||
|
||||
: cache-add ( query rr -- )
|
||||
over table-get ! query rr entry
|
||||
{
|
||||
{
|
||||
[ dup f = ] ! not in the cache
|
||||
[ drop rr->entry table-add ]
|
||||
}
|
||||
{
|
||||
[ dup nx? ]
|
||||
[ drop over table-rem rr->entry table-add ]
|
||||
}
|
||||
{
|
||||
[ dup entry-expired? ]
|
||||
[ drop rr->entry table-add ]
|
||||
}
|
||||
{
|
||||
[ t ]
|
||||
[ rot drop add-rr-to-entry ]
|
||||
}
|
||||
}
|
||||
cond ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: rr->query ( rr -- query ) [ name>> ] [ type>> ] [ class>> ] tri query boa ;
|
||||
|
||||
: cache-add-rr ( rr -- ) [ rr->query ] [ ] bi cache-add ;
|
||||
|
||||
: cache-add-rrs ( rrs -- ) [ cache-add-rr ] each ;
|
|
@ -0,0 +1,462 @@
|
|||
|
||||
USING: kernel byte-arrays combinators strings arrays sequences splitting
|
||||
math math.functions math.parser random
|
||||
destructors
|
||||
io io.binary io.sockets io.encodings.binary
|
||||
accessors
|
||||
combinators.cleave
|
||||
newfx
|
||||
symbols
|
||||
;
|
||||
|
||||
IN: dns
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
TUPLE: query name type class ;
|
||||
|
||||
TUPLE: rr name type class ttl rdata ;
|
||||
|
||||
TUPLE: hinfo cpu os ;
|
||||
|
||||
TUPLE: mx preference exchange ;
|
||||
|
||||
TUPLE: soa mname rname serial refresh retry expire minimum ;
|
||||
|
||||
TUPLE: message
|
||||
id qr opcode aa tc rd ra z rcode
|
||||
question-section
|
||||
answer-section
|
||||
authority-section
|
||||
additional-section ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: random-id ( -- id ) 2 16 ^ random ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! TYPE
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
SYMBOLS: A NS MD MF CNAME SOA MB MG MR NULL WKS PTR HINFO MINFO MX TXT ;
|
||||
|
||||
: type-table ( -- table )
|
||||
{
|
||||
{ A 1 }
|
||||
{ NS 2 }
|
||||
{ MD 3 }
|
||||
{ MF 4 }
|
||||
{ CNAME 5 }
|
||||
{ SOA 6 }
|
||||
{ MB 7 }
|
||||
{ MG 8 }
|
||||
{ MR 9 }
|
||||
{ NULL 10 }
|
||||
{ WKS 11 }
|
||||
{ PTR 12 }
|
||||
{ HINFO 13 }
|
||||
{ MINFO 14 }
|
||||
{ MX 15 }
|
||||
{ TXT 16 }
|
||||
} ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! CLASS
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
SYMBOLS: IN CS CH HS ;
|
||||
|
||||
: class-table ( -- table )
|
||||
{
|
||||
{ IN 1 }
|
||||
{ CS 2 }
|
||||
{ CH 3 }
|
||||
{ HS 4 }
|
||||
} ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! OPCODE
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
SYMBOLS: QUERY IQUERY STATUS ;
|
||||
|
||||
: opcode-table ( -- table )
|
||||
{
|
||||
{ QUERY 0 }
|
||||
{ IQUERY 1 }
|
||||
{ STATUS 2 }
|
||||
} ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! RCODE
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
SYMBOLS: NO-ERROR FORMAT-ERROR SERVER-FAILURE NAME-ERROR NOT-IMPLEMENTED
|
||||
REFUSED ;
|
||||
|
||||
: rcode-table ( -- table )
|
||||
{
|
||||
{ NO-ERROR 0 }
|
||||
{ FORMAT-ERROR 1 }
|
||||
{ SERVER-FAILURE 2 }
|
||||
{ NAME-ERROR 3 }
|
||||
{ NOT-IMPLEMENTED 4 }
|
||||
{ REFUSED 5 }
|
||||
} ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: <message> ( -- message )
|
||||
message new
|
||||
random-id >>id
|
||||
0 >>qr
|
||||
QUERY >>opcode
|
||||
0 >>aa
|
||||
0 >>tc
|
||||
1 >>rd
|
||||
0 >>ra
|
||||
0 >>z
|
||||
NO-ERROR >>rcode
|
||||
{ } >>question-section
|
||||
{ } >>answer-section
|
||||
{ } >>authority-section
|
||||
{ } >>additional-section ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: ip->ba ( ip -- ba ) "." split [ string>number ] map >byte-array ;
|
||||
|
||||
: label->ba ( label -- ba ) [ >byte-array ] [ length ] bi prefix ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: uint8->ba ( n -- ba ) 1 >be ;
|
||||
: uint16->ba ( n -- ba ) 2 >be ;
|
||||
: uint32->ba ( n -- ba ) 4 >be ;
|
||||
: uint64->ba ( n -- ba ) 8 >be ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: dn->ba ( dn -- ba ) "." split [ label->ba ] map concat ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: query->ba ( query -- ba )
|
||||
{
|
||||
[ name>> dn->ba ]
|
||||
[ type>> type-table of uint16->ba ]
|
||||
[ class>> class-table of uint16->ba ]
|
||||
}
|
||||
<arr> concat ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: hinfo->ba ( rdata -- ba )
|
||||
[ cpu>> label->ba ]
|
||||
[ os>> label->ba ]
|
||||
bi append ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: mx->ba ( rdata -- ba )
|
||||
[ preference>> uint16->ba ]
|
||||
[ exchange>> dn->ba ]
|
||||
bi append ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: soa->ba ( rdata -- ba )
|
||||
{
|
||||
[ mname>> dn->ba ]
|
||||
[ rname>> dn->ba ]
|
||||
[ serial>> uint32->ba ]
|
||||
[ refresh>> uint32->ba ]
|
||||
[ retry>> uint32->ba ]
|
||||
[ expire>> uint32->ba ]
|
||||
[ minimum>> uint32->ba ]
|
||||
}
|
||||
<arr> concat ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: rdata->ba ( type rdata -- ba )
|
||||
swap
|
||||
{
|
||||
{ CNAME [ dn->ba ] }
|
||||
{ HINFO [ hinfo->ba ] }
|
||||
{ MX [ mx->ba ] }
|
||||
{ NS [ dn->ba ] }
|
||||
{ PTR [ dn->ba ] }
|
||||
{ SOA [ soa->ba ] }
|
||||
{ A [ ip->ba ] }
|
||||
}
|
||||
case ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: rr->ba ( rr -- ba )
|
||||
{
|
||||
[ name>> dn->ba ]
|
||||
[ type>> type-table of uint16->ba ]
|
||||
[ class>> class-table of uint16->ba ]
|
||||
[ ttl>> uint32->ba ]
|
||||
[
|
||||
[ type>> ] [ rdata>> ] bi rdata->ba
|
||||
[ length uint16->ba ] [ ] bi append
|
||||
]
|
||||
}
|
||||
<arr> concat ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: header-bits-ba ( message -- ba )
|
||||
{
|
||||
[ qr>> 15 shift ]
|
||||
[ opcode>> opcode-table of 11 shift ]
|
||||
[ aa>> 10 shift ]
|
||||
[ tc>> 9 shift ]
|
||||
[ rd>> 8 shift ]
|
||||
[ ra>> 7 shift ]
|
||||
[ z>> 4 shift ]
|
||||
[ rcode>> rcode-table of 0 shift ]
|
||||
}
|
||||
<arr> sum uint16->ba ;
|
||||
|
||||
: message->ba ( message -- ba )
|
||||
{
|
||||
[ id>> uint16->ba ]
|
||||
[ header-bits-ba ]
|
||||
[ question-section>> length uint16->ba ]
|
||||
[ answer-section>> length uint16->ba ]
|
||||
[ authority-section>> length uint16->ba ]
|
||||
[ additional-section>> length uint16->ba ]
|
||||
[ question-section>> [ query->ba ] map concat ]
|
||||
[ answer-section>> [ rr->ba ] map concat ]
|
||||
[ authority-section>> [ rr->ba ] map concat ]
|
||||
[ additional-section>> [ rr->ba ] map concat ]
|
||||
}
|
||||
<arr> concat ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: get-single ( ba i -- n ) at ;
|
||||
: get-double ( ba i -- n ) dup 2 + subseq be> ;
|
||||
: get-quad ( ba i -- n ) dup 4 + subseq be> ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: label-length ( ba i -- length ) get-single ;
|
||||
|
||||
: skip-label ( ba i -- ba i ) 2dup label-length + 1 + ;
|
||||
|
||||
: null-label? ( ba i -- ? ) get-single 0 = ;
|
||||
|
||||
: get-label ( ba i -- label ) [ 1 + ] [ skip-label nip ] 2bi subseq >string ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: bit-test ( a b -- ? ) bitand 0 = not ;
|
||||
|
||||
: pointer? ( ba i -- ? ) get-single BIN: 11000000 bit-test ;
|
||||
|
||||
: pointer ( ba i -- val ) get-double BIN: 0011111111111111 bitand ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: skip-name ( ba i -- ba i )
|
||||
{
|
||||
{ [ 2dup null-label? ] [ 1 + ] }
|
||||
{ [ 2dup pointer? ] [ 2 + ] }
|
||||
{ [ t ] [ skip-label skip-name ] }
|
||||
}
|
||||
cond ;
|
||||
|
||||
: get-name ( ba i -- name )
|
||||
{
|
||||
{ [ 2dup null-label? ] [ 2drop "" ] }
|
||||
{ [ 2dup pointer? ] [ dupd pointer get-name ] }
|
||||
{
|
||||
[ t ]
|
||||
[
|
||||
[ get-label ]
|
||||
[ skip-label get-name ]
|
||||
2bi
|
||||
"." swap 3append
|
||||
]
|
||||
}
|
||||
}
|
||||
cond ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: get-query ( ba i -- query )
|
||||
[ get-name ]
|
||||
[
|
||||
skip-name
|
||||
[ 0 + get-double type-table key-of ]
|
||||
[ 2 + get-double class-table key-of ]
|
||||
2bi
|
||||
]
|
||||
2bi query boa ;
|
||||
|
||||
: skip-query ( ba i -- ba i ) skip-name 4 + ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: get-mx ( ba i -- mx ) [ get-double ] [ 2 + get-double ] 2bi mx boa ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: get-soa ( ba i -- soa )
|
||||
{
|
||||
[ get-name ]
|
||||
[ skip-name get-name ]
|
||||
[
|
||||
skip-name
|
||||
skip-name
|
||||
{
|
||||
[ 0 + get-quad ]
|
||||
[ 4 + get-quad ]
|
||||
[ 8 + get-quad ]
|
||||
[ 12 + get-quad ]
|
||||
[ 16 + get-quad ]
|
||||
}
|
||||
2cleave
|
||||
]
|
||||
}
|
||||
2cleave soa boa ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: get-ip ( ba i -- ip ) dup 4 + subseq >array [ number>string ] map "." join ;
|
||||
|
||||
: get-rdata ( ba i type -- rdata )
|
||||
{
|
||||
{ CNAME [ get-name ] }
|
||||
{ NS [ get-name ] }
|
||||
{ PTR [ get-name ] }
|
||||
{ MX [ get-mx ] }
|
||||
{ SOA [ get-soa ] }
|
||||
{ A [ get-ip ] }
|
||||
}
|
||||
case ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: get-rr ( ba i -- rr )
|
||||
[ get-name ]
|
||||
[
|
||||
skip-name
|
||||
{
|
||||
[ 0 + get-double type-table key-of ]
|
||||
[ 2 + get-double class-table key-of ]
|
||||
[ 4 + get-quad ]
|
||||
[ [ 10 + ] [ get-double type-table key-of ] 2bi get-rdata ]
|
||||
}
|
||||
2cleave
|
||||
]
|
||||
2bi rr boa ;
|
||||
|
||||
: skip-rr ( ba i -- ba i ) skip-name 8 + 2dup get-double + 2 + ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: get-question-section ( ba i count -- seq ba i )
|
||||
[ drop [ skip-query ] [ get-query ] 2bi ] map -rot ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: get-rr-section ( ba i count -- seq ba i )
|
||||
[ drop [ skip-rr ] [ get-rr ] 2bi ] map -rot ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: >> neg shift ;
|
||||
|
||||
: get-header-bits ( ba i -- qr opcode aa tc rd ra z rcode )
|
||||
get-double
|
||||
{
|
||||
[ 15 >> BIN: 1 bitand ]
|
||||
[ 11 >> BIN: 111 bitand opcode-table key-of ]
|
||||
[ 10 >> BIN: 1 bitand ]
|
||||
[ 9 >> BIN: 1 bitand ]
|
||||
[ 8 >> BIN: 1 bitand ]
|
||||
[ 7 >> BIN: 1 bitand ]
|
||||
[ 4 >> BIN: 111 bitand ]
|
||||
[ BIN: 1111 bitand rcode-table key-of ]
|
||||
}
|
||||
cleave ;
|
||||
|
||||
: parse-message ( ba -- message )
|
||||
0
|
||||
{
|
||||
[ get-double ]
|
||||
[ 2 + get-header-bits ]
|
||||
[
|
||||
4 +
|
||||
{
|
||||
[ 8 + ]
|
||||
[ 0 + get-double ]
|
||||
[ 2 + get-double ]
|
||||
[ 4 + get-double ]
|
||||
[ 6 + get-double ]
|
||||
}
|
||||
2cleave
|
||||
>r >r >r
|
||||
get-question-section r>
|
||||
get-rr-section r>
|
||||
get-rr-section r>
|
||||
get-rr-section
|
||||
2drop
|
||||
]
|
||||
}
|
||||
2cleave message boa ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: send-receive-udp ( ba server -- ba )
|
||||
f 0 <inet4> <datagram>
|
||||
[
|
||||
[ send ] [ receive drop ] bi
|
||||
]
|
||||
with-disposal ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: send-receive-tcp ( ba server -- ba )
|
||||
[ dup length 2 >be prepend ] [ ] bi*
|
||||
binary
|
||||
[
|
||||
write flush
|
||||
2 read be> read
|
||||
]
|
||||
with-client ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: >dns-inet4 ( obj -- inet4 )
|
||||
dup string?
|
||||
[ 53 <inet4> ]
|
||||
[ ]
|
||||
if ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: ask-server ( message server -- message )
|
||||
[ message->ba ] [ >dns-inet4 ] bi*
|
||||
2dup
|
||||
send-receive-udp parse-message
|
||||
dup tc>> 1 =
|
||||
[ drop send-receive-tcp parse-message ]
|
||||
[ nip nip ]
|
||||
if ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: dns-servers ( -- seq ) V{ } ;
|
||||
|
||||
: dns-server ( -- server ) dns-servers random ;
|
||||
|
||||
: ask ( message -- message ) dns-server ask-server ;
|
||||
|
||||
: <query-message> ( query -- message ) <message> swap {1} >>question-section ;
|
|
@ -160,6 +160,16 @@ METHOD: as-mutate { object object assoc } set-at ;
|
|||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: subseq ( seq from to -- subseq ) rot sequences:subseq ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: key ( table val -- key ) swap assocs:value-at ;
|
||||
|
||||
: key-of ( val table -- key ) assocs:value-at ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: 1st 0 at ;
|
||||
: 2nd 1 at ;
|
||||
: 3rd 2 at ;
|
||||
|
|
|
@ -155,6 +155,7 @@ IN: regexp4-tests
|
|||
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
|
||||
[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
|
||||
|
@ -236,12 +237,6 @@ IN: regexp4-tests
|
|||
matches?
|
||||
] unit-test
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
! ((A)(B(C)))
|
||||
! 1. ((A)(B(C)))
|
||||
! 2. (A)
|
||||
|
|
|
@ -4,7 +4,7 @@ USING: accessors arrays assocs combinators kernel math
|
|||
sequences namespaces locals combinators.lib state-tables
|
||||
math.parser state-parser sets dlists unicode.categories
|
||||
math.order quotations shuffle math.ranges splitting
|
||||
symbols fry parser ;
|
||||
symbols fry parser math.ranges inspector strings ;
|
||||
IN: regexp4
|
||||
|
||||
SYMBOLS: eps start-state final-state beginning-of-text
|
||||
|
@ -191,7 +191,7 @@ ERROR: unbalanced-brackets ;
|
|||
[ [ nip at-most-n ] [ at-least-n ] if* ] if
|
||||
] [ drop exactly-n ] if ;
|
||||
|
||||
:: make-nontoken-nfa ( regexp obj -- )
|
||||
:: push-single-nfa ( regexp obj -- )
|
||||
[let | s0 [ regexp next-state ]
|
||||
s1 [ regexp next-state ]
|
||||
stack [ regexp stack>> ]
|
||||
|
@ -213,18 +213,23 @@ ERROR: unbalanced-brackets ;
|
|||
: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
|
||||
|
||||
: hex-digit? ( n -- ? )
|
||||
dup decimal-digit?
|
||||
over CHAR: a CHAR: f between? or
|
||||
swap CHAR: A CHAR: F between? or ;
|
||||
[
|
||||
[ dup decimal-digit? ]
|
||||
[ dup CHAR: a CHAR: f between? ]
|
||||
[ dup CHAR: A CHAR: F between? ]
|
||||
] || nip ;
|
||||
|
||||
: control-char? ( n -- ? )
|
||||
dup 0 HEX: 1f between? swap HEX: 7f = or ;
|
||||
[
|
||||
[ dup 0 HEX: 1f between? ]
|
||||
[ dup HEX: 7f = ]
|
||||
] || nip ;
|
||||
|
||||
: punct? ( n -- ? )
|
||||
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
|
||||
|
||||
: c-identifier-char? ( ch -- ? )
|
||||
dup alpha? swap CHAR: _ = or ;
|
||||
[ [ dup alpha? ] [ dup CHAR: _ = ] ] || nip ;
|
||||
|
||||
: java-blank? ( n -- ? )
|
||||
{
|
||||
|
@ -233,7 +238,7 @@ ERROR: unbalanced-brackets ;
|
|||
} member? ;
|
||||
|
||||
: java-printable? ( n -- ? )
|
||||
dup alpha? swap punct? or ;
|
||||
[ [ dup alpha? ] [ dup punct? ] ] || nip ;
|
||||
|
||||
ERROR: bad-character-class obj ;
|
||||
|
||||
|
@ -261,32 +266,26 @@ ERROR: bad-character-class obj ;
|
|||
|
||||
ERROR: bad-octal number ;
|
||||
|
||||
: parse-octal ( regexp -- )
|
||||
: parse-octal ( -- n )
|
||||
next get-char drop
|
||||
3 take oct>
|
||||
dup 255 > [ bad-octal ] when
|
||||
make-nontoken-nfa ;
|
||||
dup 255 > [ bad-octal ] when ;
|
||||
|
||||
ERROR: bad-hex number ;
|
||||
|
||||
: parse-short-hex ( regexp -- )
|
||||
: parse-short-hex ( -- n )
|
||||
next 2 take hex>
|
||||
dup number? [ bad-hex ] unless
|
||||
make-nontoken-nfa ;
|
||||
dup number? [ bad-hex ] unless ;
|
||||
|
||||
: parse-long-hex ( regexp -- )
|
||||
next 4 take hex>
|
||||
dup number? [ bad-hex ] unless
|
||||
make-nontoken-nfa ;
|
||||
: parse-long-hex ( -- n )
|
||||
next 6 take hex>
|
||||
dup number? [ bad-hex ] unless ;
|
||||
|
||||
: parse-control-character ( regexp -- )
|
||||
next get-char make-nontoken-nfa ;
|
||||
|
||||
: parse-backreference ( regexp obj -- )
|
||||
2drop ;
|
||||
: parse-control-character ( -- n )
|
||||
next get-char ;
|
||||
|
||||
: dot-construction ( regexp -- )
|
||||
[ CHAR: \n = not ] make-nontoken-nfa ;
|
||||
[ CHAR: \n = not ] push-single-nfa ;
|
||||
|
||||
: front-anchor-construction ( regexp -- )
|
||||
drop ;
|
||||
|
@ -299,32 +298,50 @@ ERROR: bad-hex number ;
|
|||
[ get-char CHAR: } = ] take-until
|
||||
"," split1 [ [ string>number ] bi@ ] keep >boolean ;
|
||||
|
||||
: parse-escaped ( regexp -- )
|
||||
next get-char {
|
||||
{ CHAR: \ [ [ CHAR: \ = ] make-nontoken-nfa ] }
|
||||
{ CHAR: t [ [ CHAR: \t = ] make-nontoken-nfa ] }
|
||||
{ CHAR: n [ [ CHAR: \n = ] make-nontoken-nfa ] }
|
||||
{ CHAR: r [ [ CHAR: \r = ] make-nontoken-nfa ] }
|
||||
{ CHAR: f [ [ HEX: c = ] make-nontoken-nfa ] }
|
||||
{ CHAR: a [ [ HEX: 7 = ] make-nontoken-nfa ] }
|
||||
{ CHAR: e [ [ HEX: 1b = ] make-nontoken-nfa ] }
|
||||
TUPLE: character-class members ;
|
||||
TUPLE: character-class-range from to ;
|
||||
TUPLE: negated-character-class < character-class ;
|
||||
TUPLE: negated-character-class-range < character-class-range ;
|
||||
TUPLE: intersection-class < character-class ;
|
||||
TUPLE: negated-intersection-class < intersection-class ;
|
||||
|
||||
{ CHAR: d [ [ digit? ] make-nontoken-nfa ] }
|
||||
{ CHAR: D [ [ digit? not ] make-nontoken-nfa ] }
|
||||
{ CHAR: s [ [ java-blank? ] make-nontoken-nfa ] }
|
||||
{ CHAR: S [ [ java-blank? not ] make-nontoken-nfa ] }
|
||||
{ CHAR: w [ [ c-identifier-char? ] make-nontoken-nfa ] }
|
||||
{ CHAR: W [ [ c-identifier-char? not ] make-nontoken-nfa ] }
|
||||
GENERIC: character-class-contains? ( obj character-class -- ? )
|
||||
|
||||
{ CHAR: p [ parse-posix-class make-nontoken-nfa ] }
|
||||
{ CHAR: P [ parse-posix-class [ not ] compose make-nontoken-nfa ] }
|
||||
: parse-escaped-until ( -- seq )
|
||||
[ get-char CHAR: \ = get-next CHAR: E = and ] take-until
|
||||
next ;
|
||||
|
||||
: character-class-predicate ( seq -- quot )
|
||||
boa '[ , character-class-contains? ] ;
|
||||
|
||||
ERROR: unmatched-escape-sequence ;
|
||||
|
||||
: (parse-escaped) ( regexp ? ch -- obj )
|
||||
{
|
||||
{ CHAR: \ [ [ CHAR: \ = ] ] }
|
||||
{ CHAR: t [ [ CHAR: \t = ] ] }
|
||||
{ CHAR: n [ [ CHAR: \n = ] ] }
|
||||
{ CHAR: r [ [ CHAR: \r = ] ] }
|
||||
{ CHAR: f [ [ HEX: c = ] ] }
|
||||
{ CHAR: a [ [ HEX: 7 = ] ] }
|
||||
{ CHAR: e [ [ HEX: 1b = ] ] }
|
||||
|
||||
{ CHAR: d [ [ digit? ] ] }
|
||||
{ CHAR: D [ [ digit? not ] ] }
|
||||
{ CHAR: s [ [ java-blank? ] ] }
|
||||
{ CHAR: S [ [ java-blank? not ] ] }
|
||||
{ CHAR: w [ [ c-identifier-char? ] ] }
|
||||
{ CHAR: W [ [ c-identifier-char? not ] ] }
|
||||
|
||||
{ CHAR: p [ parse-posix-class ] }
|
||||
{ CHAR: P [ parse-posix-class [ not ] compose ] }
|
||||
{ CHAR: x [ parse-short-hex ] }
|
||||
{ CHAR: u [ parse-long-hex ] }
|
||||
{ CHAR: 0 [ parse-octal ] }
|
||||
{ CHAR: c [ parse-control-character ] }
|
||||
|
||||
! { CHAR: Q [ quot til \E ] }
|
||||
! { CHAR: E [ should be an error, parse this in the Q if exists ] }
|
||||
! { CHAR: Q [ next parse-escaped-until ] }
|
||||
! { CHAR: E [ unmatched-escape-sequence ] }
|
||||
|
||||
! { CHAR: b [ ] } ! a word boundary
|
||||
! { CHAR: B [ ] } ! a non-word boundary
|
||||
|
@ -332,34 +349,57 @@ ERROR: bad-hex number ;
|
|||
! { CHAR: G [ ] } ! end of previous match
|
||||
! { CHAR: Z [ ] } ! end of input but for the final terminator, if any
|
||||
! { CHAR: z [ ] } ! end of the input
|
||||
[ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ]
|
||||
[ ]
|
||||
} case ;
|
||||
|
||||
: parse-escaped ( regexp -- )
|
||||
next get-char (parse-escaped) push-single-nfa ;
|
||||
|
||||
: handle-dash ( vector -- vector )
|
||||
[ dup dash eq? [ drop CHAR: - ] when ] map ;
|
||||
|
||||
M: object character-class-contains? ( obj1 obj2 -- ? )
|
||||
= ;
|
||||
|
||||
M: callable character-class-contains? ( obj1 callable -- ? )
|
||||
call ;
|
||||
|
||||
M: character-class character-class-contains? ( obj cc -- ? )
|
||||
members>> [ character-class-contains? ] with find drop >boolean ;
|
||||
|
||||
M: negated-character-class character-class-contains? ( obj cc -- ? )
|
||||
call-next-method not ;
|
||||
|
||||
M: character-class-range character-class-contains? ( obj cc -- ? )
|
||||
[ from>> ] [ to>> ] bi between? ;
|
||||
|
||||
M: negated-character-class-range character-class-contains? ( obj cc -- ? )
|
||||
call-next-method not ;
|
||||
|
||||
M: intersection-class character-class-contains? ( obj cc -- ? )
|
||||
members>> [ character-class-contains? not ] with find drop not ;
|
||||
|
||||
M: negated-intersection-class character-class-contains? ( obj cc -- ? )
|
||||
call-next-method not ;
|
||||
|
||||
ERROR: unmatched-negated-character-class class ;
|
||||
|
||||
: handle-caret ( vector -- vector ? )
|
||||
: handle-caret ( obj -- seq class )
|
||||
dup [ length 2 >= ] [ first caret eq? ] bi and [
|
||||
rest t
|
||||
rest negated-character-class
|
||||
] [
|
||||
f
|
||||
character-class
|
||||
] if ;
|
||||
|
||||
: make-character-class ( regexp -- )
|
||||
left-bracket over stack>> cut-stack
|
||||
pick (>>stack)
|
||||
handle-dash
|
||||
handle-caret
|
||||
>r [ dup number? [ '[ dup , = ] ] when ] map
|
||||
[ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry r>
|
||||
[ [ not ] compose ] when
|
||||
make-nontoken-nfa ;
|
||||
handle-dash handle-caret
|
||||
character-class-predicate push-single-nfa ;
|
||||
|
||||
: apply-dash ( regexp -- )
|
||||
stack>> dup [ pop ] [ pop* ] [ pop ] tri
|
||||
swap '[ dup , , between? ] swap push ;
|
||||
swap character-class-range boa swap push ;
|
||||
|
||||
: apply-dash? ( regexp -- ? )
|
||||
stack>> dup length 3 >=
|
||||
|
@ -371,7 +411,7 @@ DEFER: parse-character-class
|
|||
next get-char
|
||||
{
|
||||
{ CHAR: [ [
|
||||
[ 1+ ] change-bracket-count left-bracket push-stack
|
||||
[ 1+ ] change-bracket-count dup left-bracket push-stack
|
||||
parse-character-class
|
||||
] }
|
||||
{ CHAR: ] [
|
||||
|
@ -381,7 +421,7 @@ DEFER: parse-character-class
|
|||
{ CHAR: - [ dash push-stack ] }
|
||||
! { CHAR: & [ ampersand push-stack ] }
|
||||
! { CHAR: : [ semicolon push-stack ] }
|
||||
{ CHAR: \ [ parse-escaped ] }
|
||||
{ CHAR: \ [ next get-char (parse-escaped) push-stack ] }
|
||||
{ f [ unbalanced-brackets ] }
|
||||
[ dupd push-stack dup apply-dash? [ apply-dash ] [ drop ] if ]
|
||||
} case
|
||||
|
@ -393,7 +433,7 @@ DEFER: parse-character-class
|
|||
: parse-character-class-second ( regexp -- )
|
||||
get-next
|
||||
{
|
||||
! { CHAR: [ [ CHAR: [ push-stack next ] }
|
||||
{ CHAR: [ [ CHAR: [ push-stack next ] }
|
||||
{ CHAR: ] [ CHAR: ] push-stack next ] }
|
||||
{ CHAR: - [ CHAR: - push-stack next ] }
|
||||
[ 2drop ]
|
||||
|
@ -403,7 +443,7 @@ DEFER: parse-character-class
|
|||
get-next
|
||||
{
|
||||
{ CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] }
|
||||
! { CHAR: [ [ CHAR: [ push-stack next ] }
|
||||
{ CHAR: [ [ CHAR: [ push-stack next ] }
|
||||
{ CHAR: ] [ CHAR: ] push-stack next ] }
|
||||
{ CHAR: - [ CHAR: - push-stack next ] }
|
||||
[ 2drop ]
|
||||
|
@ -442,7 +482,7 @@ ERROR: unsupported-token token ;
|
|||
[ set-start-state ]
|
||||
} cleave
|
||||
] }
|
||||
[ drop make-nontoken-nfa ]
|
||||
[ drop push-single-nfa ]
|
||||
} case ;
|
||||
|
||||
: (parse-raw-regexp) ( regexp -- )
|
||||
|
@ -639,7 +679,7 @@ TUPLE: dfa-traverser
|
|||
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
|
||||
|
||||
: match-head ( string regexp -- end )
|
||||
match length>> ;
|
||||
match length>> 1- ;
|
||||
|
||||
! character classes
|
||||
! TUPLE: range-class from to ;
|
||||
|
@ -647,5 +687,15 @@ TUPLE: dfa-traverser
|
|||
|
||||
! (?:a|b)* <- does not capture
|
||||
! (a|b)*\1 <- group captured
|
||||
! (?!abba) negative lookahead matches ababa but not abbaa
|
||||
! doesn't advance the current position:
|
||||
! (?=abba) positive lookahead matches abbaaa but not abaaa
|
||||
! (?!abba) negative lookahead matches ababa but not abbaa
|
||||
! look behind. "lookaround"
|
||||
|
||||
! : $ ( n -- obj ) groups get nth ;
|
||||
! [
|
||||
! groups bound to scope here
|
||||
! ] [
|
||||
! error or something
|
||||
! ] if-match
|
||||
! match in a string with .*foo.*
|
||||
|
|
|
@ -82,6 +82,7 @@
|
|||
("^!.*$" . font-lock-comment-face)
|
||||
(" !.*$" . font-lock-comment-face)
|
||||
("( .* )" . font-lock-comment-face)
|
||||
"BIN:"
|
||||
"MAIN:"
|
||||
"IN:" "USING:" "TUPLE:" "^C:" "^M:"
|
||||
"METHOD:"
|
||||
|
@ -89,7 +90,9 @@
|
|||
"REQUIRES:"
|
||||
"GENERIC:" "GENERIC#" "SYMBOL:" "PREDICATE:" "VAR:" "VARS:"
|
||||
"C-STRUCT:"
|
||||
"C-UNION:" "<PRIVATE" "PRIVATE>" "MACRO:" "MACRO::" "DEFER:" "TYPEDEF:"))
|
||||
"C-UNION:" "<PRIVATE" "PRIVATE>" "MACRO:" "MACRO::" "DEFER:" "TYPEDEF:"
|
||||
"SYMBOLS:"
|
||||
))
|
||||
|
||||
(defun factor-mode ()
|
||||
"A mode for editing programs written in the Factor programming language."
|
||||
|
|
|
@ -0,0 +1,244 @@
|
|||
USING: regexp4 tools.test kernel ;
|
||||
IN: regexp4-tests
|
||||
|
||||
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "" "a*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaaaaaa" "a*" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ab" "a*" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abc" "abc" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "a+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "a+" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "" "a?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a?" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aa" "a?" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "." <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "." <regexp> matches? ] unit-test
|
||||
[ t ] [ "." "." <regexp> matches? ] unit-test
|
||||
! [ f ] [ "\n" "." <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" ".+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" ".+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ab" ".+" <regexp> matches? ] unit-test
|
||||
|
||||
|
||||
[ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
|
||||
[ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "(a)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(a)" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "a{1}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "[a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "^" "[^]" <regexp> matches? ] must-fail
|
||||
[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
|
||||
[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
|
||||
[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
|
||||
[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
|
||||
[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
|
||||
[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "x" "\\." <regexp> matches? ] unit-test
|
||||
[ t ] [ "." "\\." <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
|
||||
|
||||
[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
|
||||
[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
|
||||
[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
|
||||
[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
|
||||
|
||||
! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
|
||||
|
||||
[ ] [
|
||||
"(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
|
||||
<regexp> drop
|
||||
] unit-test
|
||||
|
||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
|
||||
! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
|
||||
! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
|
||||
|
||||
! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
|
||||
! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
|
||||
! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
|
||||
|
||||
! Bug in parsing word
|
||||
[ t ] [
|
||||
"a"
|
||||
R' a'
|
||||
matches?
|
||||
] unit-test
|
||||
|
||||
! ((A)(B(C)))
|
||||
! 1. ((A)(B(C)))
|
||||
! 2. (A)
|
||||
! 3. (B(C))
|
||||
! 4. (C)
|
|
@ -0,0 +1,712 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators kernel math
|
||||
sequences namespaces locals combinators.lib state-tables
|
||||
math.parser state-parser sets dlists unicode.categories
|
||||
math.order quotations shuffle math.ranges splitting
|
||||
symbols fry parser math.ranges inspector strings ;
|
||||
IN: regexp4
|
||||
|
||||
SYMBOLS: eps start-state final-state beginning-of-text
|
||||
end-of-text left-parenthesis alternation left-bracket
|
||||
caret dash ampersand colon ;
|
||||
|
||||
SYMBOL: runtime-epsilon
|
||||
|
||||
TUPLE: regexp raw parentheses-count bracket-count
|
||||
state stack nfa new-states dfa minimized-dfa
|
||||
dot-matches-newlines? capture-group captured-groups ;
|
||||
|
||||
TUPLE: capture-group n range ;
|
||||
|
||||
ERROR: parentheses-underflow ;
|
||||
ERROR: unbalanced-parentheses ;
|
||||
ERROR: unbalanced-brackets ;
|
||||
|
||||
: push-stack ( regexp token -- ) swap stack>> push ;
|
||||
: push-all-stack ( regexp seq -- ) swap stack>> push-all ;
|
||||
: next-state ( regexp -- n ) [ 1+ ] change-state state>> ;
|
||||
|
||||
: check-parentheses-underflow ( regexp -- )
|
||||
parentheses-count>> 0 < [ parentheses-underflow ] when ;
|
||||
|
||||
: check-unbalanced-parentheses ( regexp -- )
|
||||
parentheses-count>> 0 > [ unbalanced-parentheses ] when ;
|
||||
|
||||
:: (apply-alternation) ( stack regexp -- )
|
||||
[let | s2 [ stack peek first ]
|
||||
s3 [ stack pop second ]
|
||||
s0 [ stack peek alternation = [ stack pop* ] when stack peek first ]
|
||||
s1 [ stack pop second ]
|
||||
s4 [ regexp next-state ]
|
||||
s5 [ regexp next-state ]
|
||||
table [ regexp nfa>> ] |
|
||||
s5 table add-row
|
||||
s4 eps s0 <entry> table add-entry
|
||||
s4 eps s2 <entry> table add-entry
|
||||
s1 eps s5 <entry> table add-entry
|
||||
s3 eps s5 <entry> table add-entry
|
||||
s1 table final-states>> delete-at
|
||||
s3 table final-states>> delete-at
|
||||
t s5 table final-states>> set-at
|
||||
s4 s5 2array stack push ] ;
|
||||
|
||||
: apply-alternation ( regexp -- )
|
||||
[ stack>> ] [ (apply-alternation) ] bi ;
|
||||
|
||||
: apply-alternation? ( stack -- ? )
|
||||
dup length dup 3 <
|
||||
[ 2drop f ] [ 2 - swap nth alternation = ] if ;
|
||||
|
||||
:: (apply-concatenation) ( stack regexp -- )
|
||||
[let* |
|
||||
s2 [ stack peek first ]
|
||||
s3 [ stack pop second ]
|
||||
s0 [ stack peek first ]
|
||||
s1 [ stack pop second ]
|
||||
table [ regexp nfa>> ] |
|
||||
s1 eps s2 <entry> table set-entry
|
||||
s1 table final-states>> delete-at
|
||||
s3 table add-row
|
||||
s0 s3 2array stack push ] ;
|
||||
|
||||
: apply-concatenation ( regexp -- )
|
||||
[ stack>> ] [ (apply-concatenation) ] bi ;
|
||||
|
||||
: apply-concatenation? ( seq -- ? )
|
||||
dup length dup 2 <
|
||||
[ 2drop f ] [ 2 - swap nth array? ] if ;
|
||||
|
||||
: apply-loop ( seq regexp -- seq regexp )
|
||||
over length 1 > [
|
||||
2dup over apply-alternation?
|
||||
[ (apply-alternation) ] [ (apply-concatenation) ] if apply-loop
|
||||
] when ;
|
||||
|
||||
: cut-out ( vector n -- vector' vector ) cut rest ;
|
||||
|
||||
: cut-stack ( obj vector -- vector' vector )
|
||||
tuck last-index cut-out swap ;
|
||||
|
||||
: apply-til-last ( regexp token -- )
|
||||
swap [ cut-stack ] change-stack
|
||||
apply-loop stack>> push-all ;
|
||||
|
||||
: concatenation-loop ( regexp -- )
|
||||
dup stack>> dup apply-concatenation?
|
||||
[ over (apply-concatenation) concatenation-loop ] [ 2drop ] if ;
|
||||
|
||||
:: apply-kleene-closure ( regexp -- )
|
||||
[let* | stack [ regexp stack>> ]
|
||||
s0 [ stack peek first ]
|
||||
s1 [ stack pop second ]
|
||||
s2 [ regexp next-state ]
|
||||
s3 [ regexp next-state ]
|
||||
table [ regexp nfa>> ] |
|
||||
s1 table final-states>> delete-at
|
||||
t s3 table final-states>> set-at
|
||||
s3 table add-row
|
||||
s1 eps s0 <entry> table add-entry
|
||||
s2 eps s0 <entry> table add-entry
|
||||
s2 eps s3 <entry> table add-entry
|
||||
s1 eps s3 <entry> table add-entry
|
||||
s2 s3 2array stack push ] ;
|
||||
|
||||
: add-numbers ( n obj -- obj )
|
||||
2dup [ number? ] bi@ and
|
||||
[ + ] [ dup sequence? [ [ + ] with map ] [ nip ] if ] if ;
|
||||
|
||||
: increment-columns ( n assoc -- )
|
||||
dup [ >r swap >r add-numbers r> r> set-at ] curry with* assoc-each ;
|
||||
|
||||
:: copy-state-rows ( regexp range -- )
|
||||
[let* | len [ range range-length ]
|
||||
offset [ regexp state>> range range-min - 1+ ]
|
||||
state [ regexp [ len + ] change-state ] |
|
||||
regexp nfa>> rows>>
|
||||
[ drop range member? ] assoc-filter
|
||||
[
|
||||
[ offset + ] dip
|
||||
[ offset swap add-numbers ] assoc-map
|
||||
] assoc-map
|
||||
regexp nfa>> [ assoc-union ] change-rows drop
|
||||
range [ range-min ] [ range-max ] bi [ offset + ] bi@ 2array
|
||||
regexp stack>> push ] ;
|
||||
|
||||
: last-state ( regexp -- range )
|
||||
stack>> peek first2 [a,b] ;
|
||||
|
||||
: set-last-state-final ( ? regexp -- )
|
||||
[ stack>> peek second ] [ nfa>> final-states>> ] bi set-at ;
|
||||
|
||||
: apply-plus-closure ( regexp -- )
|
||||
[ dup last-state copy-state-rows ]
|
||||
[ apply-kleene-closure ]
|
||||
[ apply-concatenation ] tri ;
|
||||
|
||||
: apply-question-closure ( regexp -- )
|
||||
[ stack>> peek first2 eps swap <entry> ] [ nfa>> add-entry ] bi ;
|
||||
|
||||
: with0 ( obj n quot -- n quot' ) swapd curry ; inline
|
||||
|
||||
|
||||
: range>state ( range -- pair )
|
||||
[ from>> ] [ length>> ] bi over - 2array ;
|
||||
|
||||
: copy-state ( regexp range n -- )
|
||||
dup zero? [
|
||||
drop range>state over stack>> push apply-question-closure
|
||||
] [
|
||||
[ copy-state-rows ] with0 with0 times
|
||||
] if ;
|
||||
|
||||
:: (exactly-n) ( regexp state n -- )
|
||||
regexp state n copy-state
|
||||
t regexp set-last-state-final ;
|
||||
|
||||
: exactly-n ( regexp n -- )
|
||||
>r dup last-state r> 1- (exactly-n) ;
|
||||
|
||||
: exactly-n-concatenated ( regexp state n -- )
|
||||
B
|
||||
[ (exactly-n) ] 3keep
|
||||
nip 1- [ apply-concatenation ] with0 times ;
|
||||
|
||||
:: at-least-n ( regexp n -- )
|
||||
[let | state [ regexp stack>> pop first2 [a,b] ] |
|
||||
regexp state n copy-state
|
||||
state regexp stack>> push
|
||||
regexp apply-kleene-closure ] ;
|
||||
|
||||
: peek-last ( regexp -- range )
|
||||
stack>> peek first2 [a,b] ;
|
||||
|
||||
: pop-last ( regexp -- range )
|
||||
stack>> pop first2 [a,b] ;
|
||||
|
||||
:: at-most-n ( regexp n -- )
|
||||
[let | state [ regexp pop-last ] |
|
||||
regexp state n [ 1+ exactly-n-concatenated ] with with each
|
||||
regexp n 1- [ apply-alternation ] with0 times
|
||||
regexp apply-question-closure ] ;
|
||||
|
||||
:: from-m-to-n ( regexp m n -- )
|
||||
[let | state [ regexp pop-last ] |
|
||||
regexp state
|
||||
m n [a,b] [ exactly-n-concatenated ] with with each
|
||||
regexp n m - [ apply-alternation ] with0 times ] ;
|
||||
|
||||
: apply-brace-closure ( regexp from/f to/f comma? -- )
|
||||
[
|
||||
2dup and
|
||||
[ from-m-to-n ]
|
||||
[ [ nip at-most-n ] [ at-least-n ] if* ] if
|
||||
] [ drop exactly-n ] if ;
|
||||
|
||||
:: push-single-nfa ( regexp obj -- )
|
||||
[let | s0 [ regexp next-state ]
|
||||
s1 [ regexp next-state ]
|
||||
stack [ regexp stack>> ]
|
||||
table [ regexp nfa>> ] |
|
||||
s0 obj s1 <entry> table set-entry
|
||||
s1 table add-row
|
||||
t s1 table final-states>> set-at
|
||||
s0 s1 2array stack push ] ;
|
||||
|
||||
: set-start-state ( regexp -- )
|
||||
dup stack>> dup empty? [
|
||||
2drop
|
||||
] [
|
||||
[ nfa>> ] [ pop first ] bi* >>start-state drop
|
||||
] if ;
|
||||
|
||||
: ascii? ( n -- ? ) 0 HEX: 7f between? ;
|
||||
: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
|
||||
: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
|
||||
|
||||
: hex-digit? ( n -- ? )
|
||||
[
|
||||
[ dup decimal-digit? ]
|
||||
[ dup CHAR: a CHAR: f between? ]
|
||||
[ dup CHAR: A CHAR: F between? ]
|
||||
] || nip ;
|
||||
|
||||
: control-char? ( n -- ? )
|
||||
[
|
||||
[ dup 0 HEX: 1f between? ]
|
||||
[ dup HEX: 7f = ]
|
||||
] || nip ;
|
||||
|
||||
: punct? ( n -- ? )
|
||||
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
|
||||
|
||||
: c-identifier-char? ( ch -- ? )
|
||||
[ [ dup alpha? ] [ dup CHAR: _ = ] ] || nip ;
|
||||
|
||||
: java-blank? ( n -- ? )
|
||||
{
|
||||
CHAR: \s CHAR: \t CHAR: \n
|
||||
HEX: b HEX: 7 CHAR: \r
|
||||
} member? ;
|
||||
|
||||
: java-printable? ( n -- ? )
|
||||
[ [ dup alpha? ] [ dup punct? ] ] || nip ;
|
||||
|
||||
ERROR: bad-character-class obj ;
|
||||
|
||||
: parse-posix-class ( -- quot )
|
||||
next
|
||||
CHAR: { expect
|
||||
[ get-char CHAR: } = ] take-until
|
||||
{
|
||||
{ "Lower" [ [ letter? ] ] }
|
||||
{ "Upper" [ [ LETTER? ] ] }
|
||||
{ "ASCII" [ [ ascii? ] ] }
|
||||
{ "Alpha" [ [ Letter? ] ] }
|
||||
{ "Digit" [ [ digit? ] ] }
|
||||
{ "Alnum" [ [ alpha? ] ] }
|
||||
{ "Punct" [ [ punct? ] ] }
|
||||
{ "Graph" [ [ java-printable? ] ] }
|
||||
{ "Print" [ [ java-printable? ] ] }
|
||||
{ "Blank" [ [ " \t" member? ] ] }
|
||||
{ "Cntrl" [ [ control-char? ] ] }
|
||||
{ "XDigit" [ [ hex-digit? ] ] }
|
||||
{ "Space" [ [ java-blank? ] ] }
|
||||
! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
|
||||
[ bad-character-class ]
|
||||
} case ;
|
||||
|
||||
ERROR: bad-octal number ;
|
||||
|
||||
: parse-octal ( -- n )
|
||||
next get-char drop
|
||||
3 take oct>
|
||||
dup 255 > [ bad-octal ] when ;
|
||||
|
||||
ERROR: bad-hex number ;
|
||||
|
||||
: parse-short-hex ( -- n )
|
||||
next 2 take hex>
|
||||
dup number? [ bad-hex ] unless ;
|
||||
|
||||
: parse-long-hex ( -- n )
|
||||
next 6 take hex>
|
||||
dup number? [ bad-hex ] unless ;
|
||||
|
||||
: parse-control-character ( -- n )
|
||||
next get-char ;
|
||||
|
||||
: dot-construction ( regexp -- )
|
||||
[ CHAR: \n = not ] push-single-nfa ;
|
||||
|
||||
: front-anchor-construction ( regexp -- )
|
||||
drop ;
|
||||
|
||||
: back-anchor-construction ( regexp -- )
|
||||
drop ;
|
||||
|
||||
: parse-brace ( -- from/f to/f comma? )
|
||||
next
|
||||
[ get-char CHAR: } = ] take-until
|
||||
"," split1 [ [ string>number ] bi@ ] keep >boolean ;
|
||||
|
||||
TUPLE: character-class members ;
|
||||
TUPLE: character-class-range from to ;
|
||||
TUPLE: negated-character-class < character-class ;
|
||||
TUPLE: negated-character-class-range < character-class-range ;
|
||||
TUPLE: intersection-class < character-class ;
|
||||
TUPLE: negated-intersection-class < intersection-class ;
|
||||
|
||||
GENERIC: character-class-contains? ( obj character-class -- ? )
|
||||
|
||||
: parse-escaped-until ( -- seq )
|
||||
[ get-char CHAR: \ = get-next CHAR: E = and ] take-until
|
||||
next ;
|
||||
|
||||
: character-class-predicate ( seq -- quot )
|
||||
boa '[ , character-class-contains? ] ;
|
||||
|
||||
ERROR: unmatched-escape-sequence ;
|
||||
|
||||
: (parse-escaped) ( regexp ? ch -- obj )
|
||||
{
|
||||
{ CHAR: \ [ [ CHAR: \ = ] ] }
|
||||
{ CHAR: t [ [ CHAR: \t = ] ] }
|
||||
{ CHAR: n [ [ CHAR: \n = ] ] }
|
||||
{ CHAR: r [ [ CHAR: \r = ] ] }
|
||||
{ CHAR: f [ [ HEX: c = ] ] }
|
||||
{ CHAR: a [ [ HEX: 7 = ] ] }
|
||||
{ CHAR: e [ [ HEX: 1b = ] ] }
|
||||
|
||||
{ CHAR: d [ [ digit? ] ] }
|
||||
{ CHAR: D [ [ digit? not ] ] }
|
||||
{ CHAR: s [ [ java-blank? ] ] }
|
||||
{ CHAR: S [ [ java-blank? not ] ] }
|
||||
{ CHAR: w [ [ c-identifier-char? ] ] }
|
||||
{ CHAR: W [ [ c-identifier-char? not ] ] }
|
||||
|
||||
{ CHAR: p [ parse-posix-class ] }
|
||||
{ CHAR: P [ parse-posix-class [ not ] compose ] }
|
||||
{ CHAR: x [ parse-short-hex ] }
|
||||
{ CHAR: u [ parse-long-hex ] }
|
||||
{ CHAR: 0 [ parse-octal ] }
|
||||
{ CHAR: c [ parse-control-character ] }
|
||||
|
||||
! { CHAR: Q [ next parse-escaped-until ] }
|
||||
! { CHAR: E [ unmatched-escape-sequence ] }
|
||||
|
||||
! { CHAR: b [ ] } ! a word boundary
|
||||
! { CHAR: B [ ] } ! a non-word boundary
|
||||
! { CHAR: A [ ] } ! beginning of input
|
||||
! { CHAR: G [ ] } ! end of previous match
|
||||
! { CHAR: Z [ ] } ! end of input but for the final terminator, if any
|
||||
! { CHAR: z [ ] } ! end of the input
|
||||
[ ]
|
||||
} case ;
|
||||
|
||||
: parse-escaped ( regexp -- )
|
||||
next get-char (parse-escaped) push-single-nfa ;
|
||||
|
||||
: handle-dash ( vector -- vector )
|
||||
[ dup dash eq? [ drop CHAR: - ] when ] map ;
|
||||
|
||||
M: object character-class-contains? ( obj1 obj2 -- ? )
|
||||
= ;
|
||||
|
||||
M: callable character-class-contains? ( obj1 callable -- ? )
|
||||
call ;
|
||||
|
||||
M: character-class character-class-contains? ( obj cc -- ? )
|
||||
members>> [ character-class-contains? ] with find drop >boolean ;
|
||||
|
||||
M: negated-character-class character-class-contains? ( obj cc -- ? )
|
||||
call-next-method not ;
|
||||
|
||||
M: character-class-range character-class-contains? ( obj cc -- ? )
|
||||
[ from>> ] [ to>> ] bi between? ;
|
||||
|
||||
M: negated-character-class-range character-class-contains? ( obj cc -- ? )
|
||||
call-next-method not ;
|
||||
|
||||
M: intersection-class character-class-contains? ( obj cc -- ? )
|
||||
members>> [ character-class-contains? not ] with find drop not ;
|
||||
|
||||
M: negated-intersection-class character-class-contains? ( obj cc -- ? )
|
||||
call-next-method not ;
|
||||
|
||||
ERROR: unmatched-negated-character-class class ;
|
||||
|
||||
: handle-caret ( obj -- seq class )
|
||||
dup [ length 2 >= ] [ first caret eq? ] bi and [
|
||||
rest negated-character-class
|
||||
] [
|
||||
character-class
|
||||
] if ;
|
||||
|
||||
: make-character-class ( regexp -- )
|
||||
left-bracket over stack>> cut-stack
|
||||
pick (>>stack)
|
||||
handle-dash handle-caret
|
||||
character-class-predicate push-single-nfa ;
|
||||
|
||||
: apply-dash ( regexp -- )
|
||||
stack>> dup [ pop ] [ pop* ] [ pop ] tri
|
||||
swap character-class-range boa swap push ;
|
||||
|
||||
: apply-dash? ( regexp -- ? )
|
||||
stack>> dup length 3 >=
|
||||
[ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
|
||||
|
||||
DEFER: parse-character-class
|
||||
: (parse-character-class) ( regexp -- )
|
||||
[
|
||||
next get-char
|
||||
{
|
||||
{ CHAR: [ [
|
||||
[ 1+ ] change-bracket-count dup left-bracket push-stack
|
||||
parse-character-class
|
||||
] }
|
||||
{ CHAR: ] [
|
||||
[ 1- ] change-bracket-count
|
||||
make-character-class
|
||||
] }
|
||||
{ CHAR: - [ dash push-stack ] }
|
||||
! { CHAR: & [ ampersand push-stack ] }
|
||||
! { CHAR: : [ semicolon push-stack ] }
|
||||
{ CHAR: \ [ next get-char (parse-escaped) push-stack ] }
|
||||
{ f [ unbalanced-brackets ] }
|
||||
[ dupd push-stack dup apply-dash? [ apply-dash ] [ drop ] if ]
|
||||
} case
|
||||
] [
|
||||
dup bracket-count>> 0 >
|
||||
[ (parse-character-class) ] [ drop ] if
|
||||
] bi ;
|
||||
|
||||
: parse-character-class-second ( regexp -- )
|
||||
get-next
|
||||
{
|
||||
{ CHAR: [ [ CHAR: [ push-stack next ] }
|
||||
{ CHAR: ] [ CHAR: ] push-stack next ] }
|
||||
{ CHAR: - [ CHAR: - push-stack next ] }
|
||||
[ 2drop ]
|
||||
} case ;
|
||||
|
||||
: parse-character-class-first ( regexp -- )
|
||||
get-next
|
||||
{
|
||||
{ CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] }
|
||||
{ CHAR: [ [ CHAR: [ push-stack next ] }
|
||||
{ CHAR: ] [ CHAR: ] push-stack next ] }
|
||||
{ CHAR: - [ CHAR: - push-stack next ] }
|
||||
[ 2drop ]
|
||||
} case ;
|
||||
|
||||
: parse-character-class ( regexp -- )
|
||||
[ parse-character-class-first ] [ (parse-character-class) ] bi ;
|
||||
|
||||
ERROR: unsupported-token token ;
|
||||
: parse-token ( regexp token -- )
|
||||
dup {
|
||||
{ CHAR: ^ [ drop front-anchor-construction ] }
|
||||
{ CHAR: $ [ drop back-anchor-construction ] }
|
||||
{ CHAR: \ [ drop parse-escaped ] }
|
||||
{ CHAR: | [ drop dup concatenation-loop alternation push-stack ] }
|
||||
{ CHAR: ( [ drop [ 1+ ] change-parentheses-count left-parenthesis push-stack ] }
|
||||
{ CHAR: ) [ drop [ 1- ] change-parentheses-count left-parenthesis apply-til-last ] }
|
||||
{ CHAR: * [ drop apply-kleene-closure ] }
|
||||
{ CHAR: + [ drop apply-plus-closure ] }
|
||||
{ CHAR: ? [ drop apply-question-closure ] }
|
||||
{ CHAR: { [ drop parse-brace apply-brace-closure ] }
|
||||
{ CHAR: [ [
|
||||
drop
|
||||
dup left-bracket push-stack
|
||||
[ 1+ ] change-bracket-count parse-character-class
|
||||
] }
|
||||
! { CHAR: } [ drop drop "brace" ] }
|
||||
{ CHAR: . [ drop dot-construction ] }
|
||||
{ beginning-of-text [ push-stack ] }
|
||||
{ end-of-text [
|
||||
drop {
|
||||
[ check-unbalanced-parentheses ]
|
||||
[ concatenation-loop ]
|
||||
[ beginning-of-text apply-til-last ]
|
||||
[ set-start-state ]
|
||||
} cleave
|
||||
] }
|
||||
[ drop push-single-nfa ]
|
||||
} case ;
|
||||
|
||||
: (parse-raw-regexp) ( regexp -- )
|
||||
get-char [ dupd parse-token next (parse-raw-regexp) ] [ drop ] if* ;
|
||||
|
||||
: parse-raw-regexp ( regexp -- )
|
||||
[ beginning-of-text parse-token ]
|
||||
[
|
||||
dup raw>> dup empty? [
|
||||
2drop
|
||||
] [
|
||||
[ (parse-raw-regexp) ] string-parse
|
||||
] if
|
||||
]
|
||||
[ end-of-text parse-token ] tri ;
|
||||
|
||||
:: find-delta ( states obj table -- keys )
|
||||
obj states [
|
||||
table get-row at
|
||||
[ dup integer? [ 1array ] when unique ] [ H{ } ] if*
|
||||
] with map H{ } clone [ assoc-union ] reduce keys ;
|
||||
|
||||
:: (find-closure) ( states obj assoc table -- keys )
|
||||
[let | size [ assoc assoc-size ] |
|
||||
assoc states unique assoc-union
|
||||
dup assoc-size size > [
|
||||
obj states [
|
||||
table get-row at* [
|
||||
dup integer? [ 1array ] when
|
||||
obj rot table (find-closure)
|
||||
] [
|
||||
drop
|
||||
] if
|
||||
] with each
|
||||
] when ] ;
|
||||
|
||||
: find-closure ( states obj table -- states )
|
||||
>r H{ } r> (find-closure) keys ;
|
||||
|
||||
: find-epsilon-closure ( states table -- states )
|
||||
>r eps H{ } r> (find-closure) keys ;
|
||||
|
||||
: filter-special-transition ( vec -- vec' )
|
||||
[ drop eps = not ] assoc-filter ;
|
||||
|
||||
: initialize-subset-construction ( regexp -- )
|
||||
<vector-table> >>dfa
|
||||
[
|
||||
nfa>> [ start-state>> 1array ] keep
|
||||
find-epsilon-closure 1dlist
|
||||
] [
|
||||
swap >>new-states drop
|
||||
] [
|
||||
[ dfa>> ] [ nfa>> ] bi
|
||||
columns>> filter-special-transition >>columns drop
|
||||
] tri ;
|
||||
|
||||
:: (subset-construction) ( regexp -- )
|
||||
[let* | nfa [ regexp nfa>> ]
|
||||
dfa [ regexp dfa>> ]
|
||||
new-states [ regexp new-states>> ]
|
||||
columns [ dfa columns>> keys ] |
|
||||
|
||||
new-states dlist-empty? [
|
||||
new-states pop-front
|
||||
dup dfa add-row
|
||||
columns [
|
||||
2dup nfa [ find-delta ] [ find-epsilon-closure ] bi
|
||||
dup [ dfa rows>> key? ] [ empty? ] bi or [
|
||||
dup new-states push-back
|
||||
] unless
|
||||
dup empty? [ 3drop ] [ <entry> dfa set-entry ] if
|
||||
] with each
|
||||
regexp (subset-construction)
|
||||
] unless ] ;
|
||||
|
||||
: set-start/final-states ( regexp -- )
|
||||
dup [ nfa>> start-state>> ]
|
||||
[ dfa>> rows>> keys [ member? ] with filter first ] bi
|
||||
>r dup dfa>> r> >>start-state drop
|
||||
|
||||
dup [ nfa>> final-states>> ] [ dfa>> rows>> ] bi
|
||||
[ keys ] bi@
|
||||
[ intersect empty? not ] with filter
|
||||
>r dfa>> r> >>final-states drop ;
|
||||
|
||||
: subset-construction ( regexp -- )
|
||||
[ initialize-subset-construction ]
|
||||
[ (subset-construction) ]
|
||||
[ set-start/final-states ] tri ;
|
||||
|
||||
: <regexp> ( raw -- obj )
|
||||
regexp new
|
||||
swap >>raw
|
||||
0 >>parentheses-count
|
||||
0 >>bracket-count
|
||||
-1 >>state
|
||||
V{ } clone >>stack
|
||||
<vector-table> >>nfa
|
||||
dup [ parse-raw-regexp ] [ subset-construction ] bi ;
|
||||
|
||||
! Literal syntax for regexps
|
||||
: parse-options ( string -- ? )
|
||||
#! Lame
|
||||
{
|
||||
{ "" [ f ] }
|
||||
{ "i" [ t ] }
|
||||
} case ;
|
||||
|
||||
: parse-regexp ( accum end -- accum )
|
||||
lexer get dup skip-blank
|
||||
[ [ index-from dup 1+ swap ] 2keep swapd subseq swap ] change-lexer-column
|
||||
! lexer get dup still-parsing-line?
|
||||
! [ (parse-token) parse-options ] [ drop f ] if
|
||||
<regexp> parsed ;
|
||||
|
||||
: R! CHAR: ! parse-regexp ; parsing
|
||||
: R" CHAR: " parse-regexp ; parsing
|
||||
: R# CHAR: # parse-regexp ; parsing
|
||||
: R' CHAR: ' parse-regexp ; parsing
|
||||
: R( CHAR: ) parse-regexp ; parsing
|
||||
: R/ CHAR: / parse-regexp ; parsing
|
||||
: R@ CHAR: @ parse-regexp ; parsing
|
||||
: R[ CHAR: ] parse-regexp ; parsing
|
||||
: R` CHAR: ` parse-regexp ; parsing
|
||||
: R{ CHAR: } parse-regexp ; parsing
|
||||
: R| CHAR: | parse-regexp ; parsing
|
||||
|
||||
TUPLE: dfa-traverser
|
||||
dfa
|
||||
last-state current-state
|
||||
text
|
||||
start-index current-index
|
||||
matches ;
|
||||
|
||||
: <dfa-traverser> ( text dfa -- match )
|
||||
dfa>>
|
||||
dfa-traverser new
|
||||
swap [ start-state>> >>current-state ] keep
|
||||
>>dfa
|
||||
swap >>text
|
||||
0 >>start-index
|
||||
0 >>current-index
|
||||
V{ } clone >>matches ;
|
||||
|
||||
: final-state? ( dfa-traverser -- ? )
|
||||
[ current-state>> ] [ dfa>> final-states>> ] bi
|
||||
member? ;
|
||||
|
||||
: text-finished? ( dfa-traverser -- ? )
|
||||
[ current-index>> ] [ text>> length ] bi >= ;
|
||||
|
||||
: save-final-state ( dfa-straverser -- )
|
||||
[ current-index>> ] [ matches>> ] bi push ;
|
||||
|
||||
: match-done? ( dfa-traverser -- ? )
|
||||
dup final-state? [
|
||||
dup save-final-state
|
||||
] when text-finished? ;
|
||||
|
||||
: increment-state ( dfa-traverser state -- dfa-traverser )
|
||||
>r [ 1+ ] change-current-index
|
||||
dup current-state>> >>last-state r>
|
||||
>>current-state ;
|
||||
|
||||
: match-transition ( obj hash -- state/f )
|
||||
2dup keys [ callable? ] filter predicates
|
||||
[ swap at nip ] [ at ] if* ;
|
||||
|
||||
: do-match ( dfa-traverser -- dfa-traverser )
|
||||
dup match-done? [
|
||||
dup {
|
||||
[ current-index>> ]
|
||||
[ text>> ]
|
||||
[ current-state>> ]
|
||||
[ dfa>> rows>> ]
|
||||
} cleave
|
||||
at >r nth r> match-transition [
|
||||
increment-state do-match
|
||||
] when*
|
||||
] unless ;
|
||||
|
||||
: return-match ( dfa-traverser -- interval/f )
|
||||
dup matches>> empty? [
|
||||
drop f
|
||||
] [
|
||||
[ start-index>> ] [ matches>> peek ] bi 1 <range>
|
||||
] if ;
|
||||
|
||||
: match ( string regexp -- pair )
|
||||
<dfa-traverser> do-match return-match ;
|
||||
|
||||
: matches? ( string regexp -- ? )
|
||||
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
|
||||
|
||||
: match-head ( string regexp -- end )
|
||||
match length>> 1- ;
|
||||
|
||||
! character classes
|
||||
! TUPLE: range-class from to ;
|
||||
! TUPLE: or-class left right ;
|
||||
|
||||
! (?:a|b)* <- does not capture
|
||||
! (a|b)*\1 <- group captured
|
||||
! doesn't advance the current position:
|
||||
! (?=abba) positive lookahead matches abbaaa but not abaaa
|
||||
! (?!abba) negative lookahead matches ababa but not abbaa
|
||||
! look behind. "lookaround"
|
||||
|
||||
! : $ ( n -- obj ) groups get nth ;
|
||||
! [
|
||||
! groups bound to scope here
|
||||
! ] [
|
||||
! error or something
|
||||
! ] if-match
|
||||
! match in a string with .*foo.*
|
Loading…
Reference in New Issue