Merge branch 'master' of git://factorcode.org/git/factor
commit
15a005359d
|
@ -75,10 +75,6 @@ METHOD: mutate-as { sequence object number } rot set-nth ;
|
|||
METHOD: at-mutate { number object sequence } swapd set-nth ;
|
||||
METHOD: as-mutate { object number sequence } set-nth ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! assoc
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
@ -181,15 +177,15 @@ METHOD: as-mutate { object object assoc } set-at ;
|
|||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: 1st ( seq -- obj ) 0 at ;
|
||||
: 2nd ( seq -- obj ) 1 at ;
|
||||
: 3rd ( seq -- obj ) 2 at ;
|
||||
: 4th ( seq -- obj ) 3 at ;
|
||||
: 5th ( seq -- obj ) 4 at ;
|
||||
: 6th ( seq -- obj ) 5 at ;
|
||||
: 7th ( seq -- obj ) 6 at ;
|
||||
: 8th ( seq -- obj ) 7 at ;
|
||||
: 9th ( seq -- obj ) 8 at ;
|
||||
: 1st ( seq -- obj ) 0 swap nth ;
|
||||
: 2nd ( seq -- obj ) 1 swap nth ;
|
||||
: 3rd ( seq -- obj ) 2 swap nth ;
|
||||
: 4th ( seq -- obj ) 3 swap nth ;
|
||||
: 5th ( seq -- obj ) 4 swap nth ;
|
||||
: 6th ( seq -- obj ) 5 swap nth ;
|
||||
: 7th ( seq -- obj ) 6 swap nth ;
|
||||
: 8th ( seq -- obj ) 7 swap nth ;
|
||||
: 9th ( seq -- obj ) 8 swap nth ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
|
||||
USING: arrays sequences ;
|
||||
|
||||
IN: obj.alist
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
PREDICATE: alist < sequence [ pair? ] all? ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
USING: kernel words namespaces arrays vectors hashtables
|
||||
sequences assocs sets grouping
|
||||
combinators.conditional
|
||||
combinators.short-circuit
|
||||
obj.util obj.alist ;
|
||||
|
||||
IN: obj
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: properties ( -- properties ) V{ } ;
|
||||
|
||||
SYM: self properties adjoin
|
||||
SYM: type properties adjoin
|
||||
SYM: title properties adjoin
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: types ( -- types ) V{ } ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: >obj ( val -- obj ) [ symbol? ] [ get ] [ ] 1if ;
|
||||
|
||||
: -> ( obj pro -- val ) swap >obj at ;
|
||||
|
||||
PREDICATE: obj < alist { [ self -> ] [ type -> ] } 1&& ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: objects ( -- objects ) V{ } ;
|
||||
|
||||
: define-object ( symbol table -- )
|
||||
2 group >vector
|
||||
self rot 2array prefix
|
||||
dup dup self -> set-global
|
||||
self -> objects adjoin ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
PREDICATE: ptr < symbol get obj? ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
USING: sets meta.util obj ;
|
||||
|
||||
IN: obj.papers
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
SYM: title properties adjoin
|
||||
SYM: abstract properties adjoin
|
||||
SYM: authors properties adjoin
|
||||
SYM: file properties adjoin
|
||||
SYM: date properties adjoin
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
SYM: paper types adjoin
|
||||
SYM: person types adjoin
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
SYM: randall-b-smith { type person } define-object
|
||||
SYM: david-ungar { type person } define-object
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
SYM: programming-as-an-experience
|
||||
{
|
||||
type paper
|
||||
title "Programming as an Experience: The Inspiration for Self"
|
||||
abstract "The Self system attempts to integrate intellectual and non-intellectual aspects of programming to create an overall experience. The language semantics, user interface, and implementation each help create this integrated experience. The language semantics embed the programmer in a uniform world of simple ob jects that can be modified without appealing to definitions of abstractions. In a similar way, the graphical interface puts the user into a uniform world of tangible objects that can be directly manipulated and changed without switching modes. The implementation strives to support the world-of-objects illusion by minimiz ing perceptible pauses and by providing true source-level semantics without sac rificing performance. As a side benefit, it encourages factoring. Although we see areas that fall short of the vision, on the whole, the language, interface, and im plementation conspire so that the Self programmer lives and acts in a consistent and malleable world of objects."
|
||||
authors { randall-b-smith david-ungar }
|
||||
file "/storage/papers/programming-as-experience.ps"
|
||||
date 1995
|
||||
}
|
||||
define-object
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
USING: kernel arrays strings sequences assocs io io.styles prettyprint colors
|
||||
combinators.conditional ;
|
||||
|
||||
IN: obj.print
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: write-wrapped ( string -- ) H{ { wrap-margin 500 } } [ write ] with-nesting ;
|
||||
|
||||
: print-elt ( val -- )
|
||||
{
|
||||
{ [ string? ] [ write-wrapped ] }
|
||||
{ [ array? ] [ [ . ] each ] }
|
||||
{ [ drop t ] [ . ] }
|
||||
}
|
||||
1cond ;
|
||||
|
||||
: print-grid ( grid -- )
|
||||
H{ { table-gap { 10 10 } } { table-border T{ rgba f 0 0 0 1 } } }
|
||||
[ [ [ [ [ print-elt ] with-cell ] each ] with-row ] each ] tabular-output ;
|
||||
|
||||
: print-table ( assoc -- ) >alist print-grid ;
|
||||
|
||||
: print-seq ( seq -- ) [ 1array ] map print-grid ;
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
|
||||
USING: kernel parser words ;
|
||||
|
||||
IN: obj.util
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: SYM: CREATE-WORD dup define-symbol parsed ; parsing
|
|
@ -0,0 +1,39 @@
|
|||
|
||||
USING: kernel words namespaces arrays sequences prettyprint help.topics bake
|
||||
obj obj.print ;
|
||||
|
||||
IN: obj.view
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: $tab ( seq -- ) first print-table ;
|
||||
: $obj ( seq -- ) first print-table ;
|
||||
: $seq ( seq -- ) first print-seq ;
|
||||
: $ptr ( seq -- ) first get print-table ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
PREDICATE: obj-type < symbol types member? ;
|
||||
|
||||
M: obj-type article-title ( type -- title ) unparse ;
|
||||
|
||||
M: obj-type article-content ( type -- content )
|
||||
objects [ type -> = ] with filter
|
||||
{ $seq , } bake ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
M: ptr article-title ( ptr -- title ) [ title -> ] [ unparse ] bi or ;
|
||||
|
||||
M: ptr article-content ( ptr -- content ) get { $obj , } bake ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
PREDICATE: obj-list < word \ objects = ;
|
||||
|
||||
M: obj-list article-title ( objects -- title ) drop "Objects" ;
|
||||
|
||||
M: obj-list article-content ( objects -- title )
|
||||
execute
|
||||
[ [ type -> ] [ ] bi 2array ] map
|
||||
{ $tab , } bake ;
|
|
@ -1 +0,0 @@
|
|||
Doug Coleman
|
|
@ -1,25 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors hashtables kernel math state-tables vars vectors ;
|
||||
IN: regexp2.backend
|
||||
|
||||
TUPLE: regexp
|
||||
raw
|
||||
{ stack vector }
|
||||
parse-tree
|
||||
{ options hashtable }
|
||||
nfa-table
|
||||
dfa-table
|
||||
minimized-table
|
||||
{ state integer }
|
||||
{ new-states vector }
|
||||
{ visited-states hashtable } ;
|
||||
|
||||
: reset-regexp ( regexp -- regexp )
|
||||
0 >>state
|
||||
V{ } clone >>stack
|
||||
V{ } clone >>new-states
|
||||
H{ } clone >>options
|
||||
H{ } clone >>visited-states ;
|
||||
|
||||
SYMBOL: current-regexp
|
|
@ -1,55 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math math.order symbols regexp2.parser
|
||||
words regexp2.utils unicode.categories combinators.short-circuit ;
|
||||
IN: regexp2.classes
|
||||
|
||||
GENERIC: class-member? ( obj class -- ? )
|
||||
|
||||
M: word class-member? ( obj class -- ? ) 2drop f ;
|
||||
M: integer class-member? ( obj class -- ? ) 2drop f ;
|
||||
|
||||
M: character-class-range class-member? ( obj class -- ? )
|
||||
[ from>> ] [ to>> ] bi between? ;
|
||||
|
||||
M: any-char class-member? ( obj class -- ? )
|
||||
2drop t ;
|
||||
|
||||
M: letter-class class-member? ( obj class -- ? )
|
||||
drop letter? ;
|
||||
|
||||
M: LETTER-class class-member? ( obj class -- ? )
|
||||
drop LETTER? ;
|
||||
|
||||
M: Letter-class class-member? ( obj class -- ? )
|
||||
drop Letter? ;
|
||||
|
||||
M: ascii-class class-member? ( obj class -- ? )
|
||||
drop ascii? ;
|
||||
|
||||
M: digit-class class-member? ( obj class -- ? )
|
||||
drop digit? ;
|
||||
|
||||
M: alpha-class class-member? ( obj class -- ? )
|
||||
drop alpha? ;
|
||||
|
||||
M: punctuation-class class-member? ( obj class -- ? )
|
||||
drop punct? ;
|
||||
|
||||
M: java-printable-class class-member? ( obj class -- ? )
|
||||
drop java-printable? ;
|
||||
|
||||
M: non-newline-blank-class class-member? ( obj class -- ? )
|
||||
drop { [ blank? ] [ CHAR: \n = not ] } 1&& ;
|
||||
|
||||
M: control-character-class class-member? ( obj class -- ? )
|
||||
drop control-char? ;
|
||||
|
||||
M: hex-digit-class class-member? ( obj class -- ? )
|
||||
drop hex-digit? ;
|
||||
|
||||
M: java-blank-class class-member? ( obj class -- ? )
|
||||
drop java-blank? ;
|
||||
|
||||
M: unmatchable-class class-member? ( obj class -- ? )
|
||||
2drop f ;
|
|
@ -1,70 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators fry kernel locals
|
||||
math math.order regexp2.nfa regexp2.transition-tables sequences
|
||||
sets sorting vectors regexp2.utils sequences.lib ;
|
||||
USING: io prettyprint threads ;
|
||||
IN: regexp2.dfa
|
||||
|
||||
: find-delta ( states transition regexp -- new-states )
|
||||
nfa-table>> transitions>>
|
||||
rot [ swap at at ] with with map sift concat prune ;
|
||||
|
||||
: (find-epsilon-closure) ( states regexp -- new-states )
|
||||
eps swap find-delta ;
|
||||
|
||||
: find-epsilon-closure ( states regexp -- new-states )
|
||||
'[ dup , (find-epsilon-closure) union ] [ length ] while-changes
|
||||
natural-sort ;
|
||||
|
||||
: find-closure ( states transition regexp -- new-states )
|
||||
[ find-delta ] 2keep nip find-epsilon-closure ;
|
||||
|
||||
: find-start-state ( regexp -- state )
|
||||
[ nfa-table>> start-state>> 1vector ] keep find-epsilon-closure ;
|
||||
|
||||
: find-transitions ( seq1 regexp -- seq2 )
|
||||
nfa-table>> transitions>>
|
||||
[ at keys ] curry map concat eps swap remove ;
|
||||
|
||||
: add-todo-state ( state regexp -- )
|
||||
2dup visited-states>> key? [
|
||||
2drop
|
||||
] [
|
||||
[ visited-states>> conjoin ]
|
||||
[ new-states>> push ] 2bi
|
||||
] if ;
|
||||
|
||||
: new-transitions ( regexp -- )
|
||||
dup new-states>> [
|
||||
drop
|
||||
] [
|
||||
dupd pop dup pick find-transitions rot
|
||||
[
|
||||
[ [ find-closure ] 2keep nip dupd add-todo-state ] 3keep
|
||||
>r swapd transition boa r> dfa-table>> add-transition
|
||||
] curry with each
|
||||
new-transitions
|
||||
] if-empty ;
|
||||
|
||||
: states ( hashtable -- array )
|
||||
[ keys ]
|
||||
[ values [ values concat ] map concat append ] bi ;
|
||||
|
||||
: set-final-states ( regexp -- )
|
||||
dup
|
||||
[ nfa-table>> final-states>> keys ]
|
||||
[ dfa-table>> transitions>> states ] bi
|
||||
[ intersect empty? not ] with filter
|
||||
|
||||
swap dfa-table>> final-states>>
|
||||
[ conjoin ] curry each ;
|
||||
|
||||
: set-initial-state ( regexp -- )
|
||||
dup
|
||||
[ dfa-table>> ] [ find-start-state ] bi
|
||||
[ >>start-state drop ] keep
|
||||
1vector >>new-states drop ;
|
||||
|
||||
: construct-dfa ( regexp -- )
|
||||
[ set-initial-state ] [ new-transitions ] [ set-final-states ] tri ;
|
|
@ -1,126 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs grouping kernel regexp2.backend
|
||||
locals math namespaces regexp2.parser sequences state-tables fry
|
||||
quotations math.order math.ranges vectors unicode.categories
|
||||
regexp2.utils regexp2.transition-tables words sequences.lib ;
|
||||
IN: regexp2.nfa
|
||||
|
||||
SYMBOL: negation-mode
|
||||
: negated? ( -- ? ) negation-mode get 0 or odd? ;
|
||||
|
||||
SINGLETON: eps
|
||||
|
||||
: next-state ( regexp -- state )
|
||||
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
||||
|
||||
: set-start-state ( regexp -- )
|
||||
dup stack>> [
|
||||
drop
|
||||
] [
|
||||
[ nfa-table>> ] [ pop first ] bi* >>start-state drop
|
||||
] if-empty ;
|
||||
|
||||
GENERIC: nfa-node ( node -- )
|
||||
|
||||
:: add-simple-entry ( obj class -- )
|
||||
[let* | regexp [ current-regexp get ]
|
||||
s0 [ regexp next-state ]
|
||||
s1 [ regexp next-state ]
|
||||
stack [ regexp stack>> ]
|
||||
table [ regexp nfa-table>> ] |
|
||||
negated? [
|
||||
s0 f obj class boa table add-transition
|
||||
s0 s1 <default-transition> table add-transition
|
||||
] [
|
||||
s0 s1 obj class boa table add-transition
|
||||
] if
|
||||
s0 s1 2array stack push
|
||||
t s1 table final-states>> set-at ] ;
|
||||
|
||||
:: concatenate-nodes ( -- )
|
||||
[let* | regexp [ current-regexp get ]
|
||||
stack [ regexp stack>> ]
|
||||
table [ regexp nfa-table>> ]
|
||||
s2 [ stack peek first ]
|
||||
s3 [ stack pop second ]
|
||||
s0 [ stack peek first ]
|
||||
s1 [ stack pop second ] |
|
||||
s1 s2 eps <literal-transition> table add-transition
|
||||
s1 table final-states>> delete-at
|
||||
s0 s3 2array stack push ] ;
|
||||
|
||||
:: alternate-nodes ( -- )
|
||||
[let* | regexp [ current-regexp get ]
|
||||
stack [ regexp stack>> ]
|
||||
table [ regexp nfa-table>> ]
|
||||
s2 [ stack peek first ]
|
||||
s3 [ stack pop second ]
|
||||
s0 [ stack peek first ]
|
||||
s1 [ stack pop second ]
|
||||
s4 [ regexp next-state ]
|
||||
s5 [ regexp next-state ] |
|
||||
s4 s0 eps <literal-transition> table add-transition
|
||||
s4 s2 eps <literal-transition> table add-transition
|
||||
s1 s5 eps <literal-transition> table add-transition
|
||||
s3 s5 eps <literal-transition> table add-transition
|
||||
s1 table final-states>> delete-at
|
||||
s3 table final-states>> delete-at
|
||||
t s5 table final-states>> set-at
|
||||
s4 s5 2array stack push ] ;
|
||||
|
||||
M: kleene-star nfa-node ( node -- )
|
||||
term>> nfa-node
|
||||
[let* | regexp [ current-regexp get ]
|
||||
stack [ regexp stack>> ]
|
||||
s0 [ stack peek first ]
|
||||
s1 [ stack pop second ]
|
||||
s2 [ regexp next-state ]
|
||||
s3 [ regexp next-state ]
|
||||
table [ regexp nfa-table>> ] |
|
||||
s1 table final-states>> delete-at
|
||||
t s3 table final-states>> set-at
|
||||
s1 s0 eps <literal-transition> table add-transition
|
||||
s2 s0 eps <literal-transition> table add-transition
|
||||
s2 s3 eps <literal-transition> table add-transition
|
||||
s1 s3 eps <literal-transition> table add-transition
|
||||
s2 s3 2array stack push ] ;
|
||||
|
||||
M: concatenation nfa-node ( node -- )
|
||||
seq>>
|
||||
[ [ nfa-node ] each ]
|
||||
[ length 1- [ concatenate-nodes ] times ] bi ;
|
||||
|
||||
M: alternation nfa-node ( node -- )
|
||||
seq>>
|
||||
[ [ nfa-node ] each ]
|
||||
[ length 1- [ alternate-nodes ] times ] bi ;
|
||||
|
||||
M: constant nfa-node ( node -- )
|
||||
char>> literal-transition add-simple-entry ;
|
||||
|
||||
M: epsilon nfa-node ( node -- )
|
||||
drop eps literal-transition add-simple-entry ;
|
||||
|
||||
M: word nfa-node ( node -- )
|
||||
class-transition add-simple-entry ;
|
||||
|
||||
M: character-class-range nfa-node ( node -- )
|
||||
class-transition add-simple-entry ;
|
||||
|
||||
M: capture-group nfa-node ( node -- )
|
||||
term>> nfa-node ;
|
||||
|
||||
M: negation nfa-node ( node -- )
|
||||
negation-mode inc
|
||||
term>> nfa-node
|
||||
negation-mode dec ;
|
||||
|
||||
: construct-nfa ( regexp -- )
|
||||
[
|
||||
reset-regexp
|
||||
negation-mode off
|
||||
[ current-regexp set ]
|
||||
[ parse-tree>> nfa-node ]
|
||||
[ set-start-state ] tri
|
||||
] with-scope ;
|
|
@ -1,33 +0,0 @@
|
|||
USING: kernel tools.test regexp2.backend regexp2 ;
|
||||
IN: regexp2.parser
|
||||
|
||||
: test-regexp ( string -- )
|
||||
default-regexp parse-regexp ;
|
||||
|
||||
: test-regexp2 ( string -- regexp )
|
||||
default-regexp dup parse-regexp ;
|
||||
|
||||
[ "(" ] [ unmatched-parentheses? ] must-fail-with
|
||||
|
||||
[ ] [ "a|b" test-regexp ] unit-test
|
||||
[ ] [ "a.b" test-regexp ] unit-test
|
||||
[ ] [ "a|b|c" test-regexp ] unit-test
|
||||
[ ] [ "abc|b" test-regexp ] unit-test
|
||||
[ ] [ "a|bcd" test-regexp ] unit-test
|
||||
[ ] [ "a|(b)" test-regexp ] unit-test
|
||||
[ ] [ "(a)|b" test-regexp ] unit-test
|
||||
[ ] [ "(a|b)" test-regexp ] unit-test
|
||||
[ ] [ "((a)|(b))" test-regexp ] unit-test
|
||||
|
||||
[ ] [ "(?:a)" test-regexp ] unit-test
|
||||
[ ] [ "(?i:a)" test-regexp ] unit-test
|
||||
[ ] [ "(?-i:a)" test-regexp ] unit-test
|
||||
[ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with
|
||||
[ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with
|
||||
|
||||
[ ] [ "(?=a)" test-regexp ] unit-test
|
||||
|
||||
[ ] [ "[abc]" test-regexp ] unit-test
|
||||
[ ] [ "[a-c]" test-regexp ] unit-test
|
||||
[ ] [ "[^a-c]" test-regexp ] unit-test
|
||||
[ "[^]" test-regexp ] must-fail
|
|
@ -1,391 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators io io.streams.string
|
||||
kernel math math.parser multi-methods namespaces qualified sets
|
||||
quotations sequences sequences.lib splitting symbols vectors
|
||||
dlists math.order combinators.lib unicode.categories strings
|
||||
sequences.lib regexp2.backend regexp2.utils unicode.case ;
|
||||
IN: regexp2.parser
|
||||
|
||||
FROM: math.ranges => [a,b] ;
|
||||
|
||||
MIXIN: node
|
||||
TUPLE: concatenation seq ; INSTANCE: concatenation node
|
||||
TUPLE: alternation seq ; INSTANCE: alternation node
|
||||
TUPLE: kleene-star term ; INSTANCE: kleene-star node
|
||||
TUPLE: question term ; INSTANCE: question node
|
||||
TUPLE: negation term ; INSTANCE: negation node
|
||||
TUPLE: constant char ; INSTANCE: constant node
|
||||
TUPLE: range from to ; INSTANCE: range node
|
||||
TUPLE: lookahead term ; INSTANCE: lookahead node
|
||||
TUPLE: lookbehind term ; INSTANCE: lookbehind node
|
||||
TUPLE: capture-group term ; INSTANCE: capture-group node
|
||||
TUPLE: non-capture-group term ; INSTANCE: non-capture-group node
|
||||
TUPLE: independent-group term ; INSTANCE: independent-group node
|
||||
TUPLE: character-class-range from to ; INSTANCE: character-class-range node
|
||||
SINGLETON: epsilon INSTANCE: epsilon node
|
||||
SINGLETON: any-char INSTANCE: any-char node
|
||||
SINGLETON: front-anchor INSTANCE: front-anchor node
|
||||
SINGLETON: back-anchor INSTANCE: back-anchor node
|
||||
|
||||
TUPLE: option-on option ; INSTANCE: option-on node
|
||||
TUPLE: option-off option ; INSTANCE: option-off node
|
||||
SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case reversed-regexp ;
|
||||
|
||||
SINGLETONS: letter-class LETTER-class Letter-class digit-class
|
||||
alpha-class non-newline-blank-class
|
||||
ascii-class punctuation-class java-printable-class blank-class
|
||||
control-character-class hex-digit-class java-blank-class c-identifier-class
|
||||
unmatchable-class ;
|
||||
|
||||
SINGLETONS: beginning-of-group end-of-group
|
||||
beginning-of-character-class end-of-character-class
|
||||
left-parenthesis pipe caret dash ;
|
||||
|
||||
: get-option ( option -- ? ) current-regexp get options>> at ;
|
||||
: get-unix-lines ( -- ? ) unix-lines get-option ;
|
||||
: get-dotall ( -- ? ) dotall get-option ;
|
||||
: get-multiline ( -- ? ) multiline get-option ;
|
||||
: get-comments ( -- ? ) comments get-option ;
|
||||
: get-case-insensitive ( -- ? ) case-insensitive get-option ;
|
||||
: get-unicode-case ( -- ? ) unicode-case get-option ;
|
||||
: get-reversed-regexp ( -- ? ) reversed-regexp get-option ;
|
||||
|
||||
: <negation> ( obj -- negation ) negation boa ;
|
||||
: <concatenation> ( seq -- concatenation )
|
||||
>vector get-reversed-regexp [ reverse ] when
|
||||
concatenation boa ;
|
||||
: <alternation> ( seq -- alternation ) >vector alternation boa ;
|
||||
: <capture-group> ( obj -- capture-group ) capture-group boa ;
|
||||
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
|
||||
: <constant> ( obj -- constant )
|
||||
dup Letter? get-case-insensitive and [
|
||||
[ ch>lower constant boa ]
|
||||
[ ch>upper constant boa ] bi 2array <alternation>
|
||||
] [
|
||||
constant boa
|
||||
] if ;
|
||||
|
||||
: first|concatenation ( seq -- first/concatenation )
|
||||
dup length 1 = [ first ] [ <concatenation> ] if ;
|
||||
|
||||
: first|alternation ( seq -- first/alternation )
|
||||
dup length 1 = [ first ] [ <alternation> ] if ;
|
||||
|
||||
: <character-class-range> ( from to -- obj )
|
||||
2dup [ Letter? ] bi@ or get-case-insensitive and [
|
||||
[ [ ch>lower ] bi@ character-class-range boa ]
|
||||
[ [ ch>upper ] bi@ character-class-range boa ] 2bi
|
||||
2array [ [ from>> ] [ to>> ] bi < ] filter
|
||||
[ unmatchable-class ] [ first|alternation ] if-empty
|
||||
] [
|
||||
2dup <
|
||||
[ character-class-range boa ] [ 2drop unmatchable-class ] if
|
||||
] if ;
|
||||
|
||||
ERROR: unmatched-parentheses ;
|
||||
|
||||
: make-positive-lookahead ( string -- )
|
||||
lookahead boa push-stack ;
|
||||
|
||||
: make-negative-lookahead ( string -- )
|
||||
<negation> lookahead boa push-stack ;
|
||||
|
||||
: make-independent-group ( string -- )
|
||||
#! no backtracking
|
||||
independent-group boa push-stack ;
|
||||
|
||||
: make-positive-lookbehind ( string -- )
|
||||
lookbehind boa push-stack ;
|
||||
|
||||
: make-negative-lookbehind ( string -- )
|
||||
<negation> lookbehind boa push-stack ;
|
||||
|
||||
DEFER: nested-parse-regexp
|
||||
: make-non-capturing-group ( string -- )
|
||||
non-capture-group boa push-stack ;
|
||||
|
||||
ERROR: bad-option ch ;
|
||||
|
||||
: option ( ch -- singleton )
|
||||
{
|
||||
{ CHAR: i [ case-insensitive ] }
|
||||
{ CHAR: d [ unix-lines ] }
|
||||
{ CHAR: m [ multiline ] }
|
||||
{ CHAR: r [ reversed-regexp ] }
|
||||
{ CHAR: s [ dotall ] }
|
||||
{ CHAR: u [ unicode-case ] }
|
||||
{ CHAR: x [ comments ] }
|
||||
[ bad-option ]
|
||||
} case ;
|
||||
|
||||
: option-on ( option -- ) current-regexp get options>> conjoin ;
|
||||
: option-off ( option -- ) current-regexp get options>> delete-at ;
|
||||
|
||||
: toggle-option ( ch ? -- ) [ option ] dip [ option-on ] [ option-off ] if ;
|
||||
: (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
|
||||
|
||||
: parse-options ( string -- )
|
||||
"-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ;
|
||||
|
||||
DEFER: (parse-regexp)
|
||||
: parse-special-group ( -- )
|
||||
beginning-of-group push-stack
|
||||
(parse-regexp) pop-stack make-non-capturing-group ;
|
||||
|
||||
ERROR: bad-special-group string ;
|
||||
|
||||
: (parse-special-group) ( -- )
|
||||
read1 {
|
||||
{ [ dup CHAR: : = ]
|
||||
[ drop nested-parse-regexp pop-stack make-non-capturing-group ] }
|
||||
{ [ dup CHAR: = = ]
|
||||
[ drop nested-parse-regexp pop-stack make-positive-lookahead ] }
|
||||
{ [ dup CHAR: = = ]
|
||||
[ drop nested-parse-regexp pop-stack make-negative-lookahead ] }
|
||||
{ [ dup CHAR: > = ]
|
||||
[ drop nested-parse-regexp pop-stack make-independent-group ] }
|
||||
{ [ dup CHAR: < = peek1 CHAR: = = and ]
|
||||
[ drop read1 drop nested-parse-regexp pop-stack make-positive-lookbehind ] }
|
||||
{ [ dup CHAR: < = peek1 CHAR: ! = and ]
|
||||
[ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] }
|
||||
[
|
||||
":)" read-until
|
||||
[ swap prefix ] dip
|
||||
{
|
||||
{ CHAR: : [ parse-options parse-special-group ] }
|
||||
{ CHAR: ) [ parse-options ] }
|
||||
[ drop bad-special-group ]
|
||||
} case
|
||||
]
|
||||
} cond ;
|
||||
|
||||
: handle-left-parenthesis ( -- )
|
||||
peek1 CHAR: ? =
|
||||
[ read1 drop (parse-special-group) ]
|
||||
[ nested-parse-regexp ] if ;
|
||||
|
||||
: handle-dot ( -- ) any-char push-stack ;
|
||||
: handle-pipe ( -- ) pipe push-stack ;
|
||||
: handle-star ( -- ) stack pop <kleene-star> push-stack ;
|
||||
: handle-question ( -- )
|
||||
stack pop epsilon 2array <alternation> push-stack ;
|
||||
: handle-plus ( -- )
|
||||
stack pop dup <kleene-star> 2array <concatenation> push-stack ;
|
||||
|
||||
ERROR: unmatched-brace ;
|
||||
: parse-repetition ( -- start finish ? )
|
||||
"}" read-until [ unmatched-brace ] unless
|
||||
[ "," split1 [ string>number ] bi@ ]
|
||||
[ CHAR: , swap index >boolean ] bi ;
|
||||
|
||||
: replicate/concatenate ( n obj -- obj' )
|
||||
over zero? [ 2drop epsilon ]
|
||||
[ <repetition> first|concatenation ] if ;
|
||||
|
||||
: exactly-n ( n -- )
|
||||
stack pop replicate/concatenate push-stack ;
|
||||
|
||||
: at-least-n ( n -- )
|
||||
stack pop
|
||||
[ replicate/concatenate ] keep
|
||||
<kleene-star> 2array <concatenation> push-stack ;
|
||||
|
||||
: at-most-n ( n -- )
|
||||
1+
|
||||
stack pop
|
||||
[ replicate/concatenate ] curry map <alternation> push-stack ;
|
||||
|
||||
: from-m-to-n ( m n -- )
|
||||
[a,b]
|
||||
stack pop
|
||||
[ replicate/concatenate ] curry map
|
||||
<alternation> push-stack ;
|
||||
|
||||
ERROR: invalid-range a b ;
|
||||
|
||||
: handle-left-brace ( -- )
|
||||
parse-repetition
|
||||
>r 2dup [ [ 0 < [ invalid-range ] when ] when* ] bi@ r>
|
||||
[
|
||||
2dup and [ from-m-to-n ]
|
||||
[ [ nip at-most-n ] [ at-least-n ] if* ] if
|
||||
] [ drop 0 max exactly-n ] if ;
|
||||
|
||||
: handle-front-anchor ( -- ) front-anchor push-stack ;
|
||||
: handle-back-anchor ( -- ) back-anchor push-stack ;
|
||||
|
||||
ERROR: bad-character-class obj ;
|
||||
ERROR: expected-posix-class ;
|
||||
|
||||
: parse-posix-class ( -- obj )
|
||||
read1 CHAR: { = [ expected-posix-class ] unless
|
||||
"}" read-until [ bad-character-class ] unless
|
||||
{
|
||||
{ "Lower" [ get-case-insensitive Letter-class letter-class ? ] }
|
||||
{ "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] }
|
||||
{ "Alpha" [ Letter-class ] }
|
||||
{ "ASCII" [ ascii-class ] }
|
||||
{ "Digit" [ digit-class ] }
|
||||
{ "Alnum" [ alpha-class ] }
|
||||
{ "Punct" [ punctuation-class ] }
|
||||
{ "Graph" [ java-printable-class ] }
|
||||
{ "Print" [ java-printable-class ] }
|
||||
{ "Blank" [ non-newline-blank-class ] }
|
||||
{ "Cntrl" [ control-character-class ] }
|
||||
{ "XDigit" [ hex-digit-class ] }
|
||||
{ "Space" [ java-blank-class ] }
|
||||
! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
|
||||
[ bad-character-class ]
|
||||
} case ;
|
||||
|
||||
: parse-octal ( -- n ) 3 read oct> check-octal ;
|
||||
: parse-short-hex ( -- n ) 2 read hex> check-hex ;
|
||||
: parse-long-hex ( -- n ) 6 read hex> check-hex ;
|
||||
: parse-control-character ( -- n ) read1 ;
|
||||
|
||||
ERROR: bad-escaped-literals seq ;
|
||||
: parse-escaped-literals ( -- obj )
|
||||
"\\E" read-until [ bad-escaped-literals ] unless
|
||||
read1 drop
|
||||
[ epsilon ] [
|
||||
[ <constant> ] V{ } map-as
|
||||
first|concatenation
|
||||
] if-empty ;
|
||||
|
||||
: parse-escaped ( -- obj )
|
||||
read1
|
||||
{
|
||||
{ CHAR: \ [ CHAR: \ <constant> ] }
|
||||
{ CHAR: . [ CHAR: . <constant> ] }
|
||||
{ CHAR: t [ CHAR: \t <constant> ] }
|
||||
{ CHAR: n [ CHAR: \n <constant> ] }
|
||||
{ CHAR: r [ CHAR: \r <constant> ] }
|
||||
{ CHAR: f [ HEX: c <constant> ] }
|
||||
{ CHAR: a [ HEX: 7 <constant> ] }
|
||||
{ CHAR: e [ HEX: 1b <constant> ] }
|
||||
|
||||
{ CHAR: d [ digit-class ] }
|
||||
{ CHAR: D [ digit-class <negation> ] }
|
||||
{ CHAR: s [ java-blank-class ] }
|
||||
{ CHAR: S [ java-blank-class <negation> ] }
|
||||
{ CHAR: w [ c-identifier-class ] }
|
||||
{ CHAR: W [ c-identifier-class <negation> ] }
|
||||
|
||||
{ CHAR: p [ parse-posix-class ] }
|
||||
{ CHAR: P [ parse-posix-class <negation> ] }
|
||||
{ CHAR: x [ parse-short-hex <constant> ] }
|
||||
{ CHAR: u [ parse-long-hex <constant> ] }
|
||||
{ CHAR: 0 [ parse-octal <constant> ] }
|
||||
{ CHAR: c [ parse-control-character ] }
|
||||
|
||||
! { CHAR: b [ handle-word-boundary ] }
|
||||
! { CHAR: B [ handle-word-boundary <negation> ] }
|
||||
! { CHAR: A [ handle-beginning-of-input ] }
|
||||
! { CHAR: G [ end of previous match ] }
|
||||
! { CHAR: Z [ handle-end-of-input ] }
|
||||
! { CHAR: z [ handle-end-of-input ] } ! except for terminator
|
||||
|
||||
{ CHAR: Q [ parse-escaped-literals ] }
|
||||
} case ;
|
||||
|
||||
: handle-escape ( -- ) parse-escaped push-stack ;
|
||||
|
||||
: handle-dash ( vector -- vector' )
|
||||
H{ { dash CHAR: - } } substitute ;
|
||||
|
||||
: character-class>alternation ( seq -- alternation )
|
||||
[ dup number? [ <constant> ] when ] map first|alternation ;
|
||||
|
||||
: handle-caret ( vector -- vector' )
|
||||
dup [ length 2 >= ] [ first caret eq? ] bi and [
|
||||
rest-slice character-class>alternation <negation>
|
||||
] [
|
||||
character-class>alternation
|
||||
] if ;
|
||||
|
||||
: make-character-class ( -- character-class )
|
||||
[ beginning-of-character-class swap cut-stack ] change-whole-stack
|
||||
handle-dash handle-caret ;
|
||||
|
||||
: apply-dash ( -- )
|
||||
stack [ pop3 nip <character-class-range> ] keep push ;
|
||||
|
||||
: apply-dash? ( -- ? )
|
||||
stack dup length 3 >=
|
||||
[ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
|
||||
|
||||
ERROR: empty-negated-character-class ;
|
||||
DEFER: handle-left-bracket
|
||||
: (parse-character-class) ( -- )
|
||||
read1 [ empty-negated-character-class ] unless* {
|
||||
{ CHAR: [ [ handle-left-bracket t ] }
|
||||
{ CHAR: ] [ make-character-class push-stack f ] }
|
||||
{ CHAR: - [ dash push-stack t ] }
|
||||
{ CHAR: \ [ parse-escaped push-stack t ] }
|
||||
[ push-stack apply-dash? [ apply-dash ] when t ]
|
||||
} case
|
||||
[ (parse-character-class) ] when ;
|
||||
|
||||
: parse-character-class-second ( -- )
|
||||
read1 {
|
||||
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
|
||||
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
|
||||
{ CHAR: - [ CHAR: - <constant> push-stack ] }
|
||||
[ push1 ]
|
||||
} case ;
|
||||
|
||||
: parse-character-class-first ( -- )
|
||||
read1 {
|
||||
{ CHAR: ^ [ caret push-stack parse-character-class-second ] }
|
||||
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
|
||||
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
|
||||
{ CHAR: - [ CHAR: - <constant> push-stack ] }
|
||||
[ push1 ]
|
||||
} case ;
|
||||
|
||||
: handle-left-bracket ( -- )
|
||||
beginning-of-character-class push-stack
|
||||
parse-character-class-first (parse-character-class) ;
|
||||
|
||||
: finish-regexp-parse ( stack -- obj )
|
||||
{ pipe } split
|
||||
[ first|concatenation ] map first|alternation ;
|
||||
|
||||
: handle-right-parenthesis ( -- )
|
||||
stack beginning-of-group over last-index cut rest
|
||||
[ current-regexp get swap >>stack drop ]
|
||||
[ finish-regexp-parse <capture-group> push-stack ] bi* ;
|
||||
|
||||
: nested-parse-regexp ( -- )
|
||||
beginning-of-group push-stack (parse-regexp) ;
|
||||
|
||||
: ((parse-regexp)) ( token -- )
|
||||
{
|
||||
{ CHAR: . [ handle-dot ] }
|
||||
{ CHAR: ( [ handle-left-parenthesis ] }
|
||||
{ CHAR: ) [ handle-right-parenthesis ] }
|
||||
{ CHAR: | [ handle-pipe ] }
|
||||
{ CHAR: ? [ handle-question ] }
|
||||
{ CHAR: * [ handle-star ] }
|
||||
{ CHAR: + [ handle-plus ] }
|
||||
{ CHAR: { [ handle-left-brace ] }
|
||||
{ CHAR: [ [ handle-left-bracket ] }
|
||||
{ CHAR: ^ [ handle-front-anchor ] }
|
||||
{ CHAR: $ [ handle-back-anchor ] }
|
||||
{ CHAR: \ [ handle-escape ] }
|
||||
[ <constant> push-stack ]
|
||||
} case ;
|
||||
|
||||
: (parse-regexp) ( -- )
|
||||
read1 [ ((parse-regexp)) (parse-regexp) ] when* ;
|
||||
|
||||
: parse-regexp ( regexp -- )
|
||||
dup current-regexp [
|
||||
raw>> [
|
||||
<string-reader> [ (parse-regexp) ] with-input-stream
|
||||
] unless-empty
|
||||
current-regexp get
|
||||
stack finish-regexp-parse
|
||||
>>parse-tree drop
|
||||
] with-variable ;
|
|
@ -1,14 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel strings help.markup help.syntax regexp2.backend ;
|
||||
IN: regexp2
|
||||
|
||||
HELP: <regexp>
|
||||
{ $values { "string" string } { "regexp" regexp } }
|
||||
{ $description "Compiles a regular expression into a DFA and returns this object. Regular expressions only have to be compiled once and can then be used multiple times to match input strings." } ;
|
||||
|
||||
HELP: <iregexp>
|
||||
{ $values { "string" string } { "regexp" regexp } }
|
||||
{ $description "Compiles a case-insensitive regular expression into a DFA and returns this object. Otherwise, exactly the same as " { $link <regexp> } } ;
|
||||
|
||||
{ <regexp> <iregexp> } related-words
|
|
@ -1,263 +0,0 @@
|
|||
USING: regexp2 tools.test kernel sequences regexp2.parser
|
||||
regexp2.traversal ;
|
||||
IN: regexp2-tests
|
||||
|
||||
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "" "a*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaaaaaa" "a*" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ab" "a*" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abc" "abc" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "a+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "a+" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "" "a?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a?" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aa" "a?" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "." <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "." <regexp> matches? ] unit-test
|
||||
[ t ] [ "." "." <regexp> matches? ] unit-test
|
||||
! [ f ] [ "\n" "." <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" ".+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" ".+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ab" ".+" <regexp> matches? ] unit-test
|
||||
|
||||
|
||||
[ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
|
||||
[ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "(a)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(a)" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "a{1}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "[a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
|
||||
|
||||
[ "^" "[^]" <regexp> matches? ] must-fail
|
||||
[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
|
||||
!
|
||||
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
|
||||
[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
|
||||
[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
|
||||
[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
|
||||
[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
|
||||
[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "x" "\\." <regexp> matches? ] unit-test
|
||||
[ t ] [ "." "\\." <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
|
||||
|
||||
[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
|
||||
[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
|
||||
[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
|
||||
[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
|
||||
|
||||
! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
|
||||
|
||||
[ ] [
|
||||
"(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
|
||||
<regexp> drop
|
||||
] unit-test
|
||||
|
||||
[ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
|
||||
|
||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
|
||||
! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
|
||||
! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
|
||||
|
||||
! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
|
||||
! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
|
||||
! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
[ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
|
||||
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
|
||||
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "[a-z]" <iregexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "\\p{Lower}" <iregexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abc" <reversed> "abc" <rregexp> matches? ] unit-test
|
||||
[ t ] [ "abc" <reversed> "a[bB][cC]" <rregexp> matches? ] unit-test
|
||||
[ t ] [ "adcbe" "a(?r)(bcd)(?-r)e" <rregexp> matches? ] unit-test
|
||||
|
||||
! Bug in parsing word
|
||||
! [ t ] [ "a" R' a' matches? ] unit-test
|
||||
|
||||
! ((A)(B(C)))
|
||||
! 1. ((A)(B(C)))
|
||||
! 2. (A)
|
||||
! 3. (B(C))
|
||||
! 4. (C)
|
|
@ -1,59 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors combinators kernel math math.ranges
|
||||
sequences regexp2.backend regexp2.utils memoize sets
|
||||
regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal
|
||||
regexp2.transition-tables ;
|
||||
IN: regexp2
|
||||
|
||||
: default-regexp ( string -- regexp )
|
||||
regexp new
|
||||
swap >>raw
|
||||
<transition-table> >>nfa-table
|
||||
<transition-table> >>dfa-table
|
||||
<transition-table> >>minimized-table
|
||||
reset-regexp ;
|
||||
|
||||
: construct-regexp ( regexp -- regexp' )
|
||||
{
|
||||
[ parse-regexp ]
|
||||
[ construct-nfa ]
|
||||
[ construct-dfa ]
|
||||
[ ]
|
||||
} cleave ;
|
||||
|
||||
: match ( string regexp -- pair )
|
||||
<dfa-traverser> do-match return-match ;
|
||||
|
||||
: matches? ( string regexp -- ? )
|
||||
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
|
||||
|
||||
: match-head ( string regexp -- end ) match length>> 1- ;
|
||||
|
||||
: initial-option ( regexp option -- regexp' )
|
||||
over options>> conjoin ;
|
||||
|
||||
: <regexp> ( string -- regexp )
|
||||
default-regexp construct-regexp ;
|
||||
|
||||
: <iregexp> ( string -- regexp )
|
||||
default-regexp
|
||||
case-insensitive initial-option
|
||||
construct-regexp ;
|
||||
|
||||
: <rregexp> ( string -- regexp )
|
||||
default-regexp
|
||||
reversed-regexp initial-option
|
||||
construct-regexp ;
|
||||
|
||||
: R! CHAR: ! <regexp> ; parsing
|
||||
: R" CHAR: " <regexp> ; parsing
|
||||
: R# CHAR: # <regexp> ; parsing
|
||||
: R' CHAR: ' <regexp> ; parsing
|
||||
: R( CHAR: ) <regexp> ; parsing
|
||||
: R/ CHAR: / <regexp> ; parsing
|
||||
: R@ CHAR: @ <regexp> ; parsing
|
||||
: R[ CHAR: ] <regexp> ; parsing
|
||||
: R` CHAR: ` <regexp> ; parsing
|
||||
: R{ CHAR: } <regexp> ; parsing
|
||||
: R| CHAR: | <regexp> ; parsing
|
|
@ -1 +0,0 @@
|
|||
Regular expressions
|
|
@ -1,2 +0,0 @@
|
|||
parsing
|
||||
text
|
|
@ -1,44 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs fry hashtables kernel sequences
|
||||
vectors ;
|
||||
IN: regexp2.transition-tables
|
||||
|
||||
: insert-at ( value key hash -- )
|
||||
2dup at* [
|
||||
2nip push
|
||||
] [
|
||||
drop >r >r dup vector? [ 1vector ] unless r> r> set-at
|
||||
] if ;
|
||||
|
||||
: ?insert-at ( value key hash/f -- hash )
|
||||
[ H{ } clone ] unless* [ insert-at ] keep ;
|
||||
|
||||
TUPLE: transition from to obj ;
|
||||
TUPLE: literal-transition < transition ;
|
||||
TUPLE: class-transition < transition ;
|
||||
TUPLE: default-transition < transition ;
|
||||
|
||||
TUPLE: literal obj ;
|
||||
TUPLE: class obj ;
|
||||
TUPLE: default ;
|
||||
: <literal-transition> ( from to obj -- transition ) literal-transition boa ;
|
||||
: <class-transition> ( from to obj -- transition ) class-transition boa ;
|
||||
: <default-transition> ( from to -- transition ) t default-transition boa ;
|
||||
|
||||
TUPLE: transition-table transitions
|
||||
literals classes defaults
|
||||
start-state final-states ;
|
||||
|
||||
: <transition-table> ( -- transition-table )
|
||||
transition-table new
|
||||
H{ } clone >>transitions
|
||||
H{ } clone >>final-states ;
|
||||
|
||||
: set-transition ( transition hash -- )
|
||||
>r [ to>> ] [ obj>> ] [ from>> ] tri r>
|
||||
2dup at* [ 2nip insert-at ]
|
||||
[ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
|
||||
|
||||
: add-transition ( transition transition-table -- )
|
||||
transitions>> set-transition ;
|
|
@ -1,80 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors assocs combinators combinators.lib kernel
|
||||
math math.ranges quotations sequences regexp2.parser
|
||||
regexp2.classes combinators.short-circuit assocs.lib
|
||||
sequences.lib ;
|
||||
IN: regexp2.traversal
|
||||
|
||||
TUPLE: dfa-traverser
|
||||
dfa-table
|
||||
last-state current-state
|
||||
text
|
||||
start-index current-index
|
||||
matches ;
|
||||
|
||||
: <dfa-traverser> ( text regexp -- match )
|
||||
dfa-table>>
|
||||
dfa-traverser new
|
||||
swap [ start-state>> >>current-state ] keep
|
||||
>>dfa-table
|
||||
swap >>text
|
||||
0 >>start-index
|
||||
0 >>current-index
|
||||
V{ } clone >>matches ;
|
||||
|
||||
: final-state? ( dfa-traverser -- ? )
|
||||
[ current-state>> ] [ dfa-table>> final-states>> ] bi
|
||||
key? ;
|
||||
|
||||
: text-finished? ( dfa-traverser -- ? )
|
||||
[ current-index>> ] [ text>> length ] bi >= ;
|
||||
|
||||
: save-final-state ( dfa-straverser -- )
|
||||
[ current-index>> ] [ matches>> ] bi push ;
|
||||
|
||||
: match-done? ( dfa-traverser -- ? )
|
||||
dup final-state? [
|
||||
dup save-final-state
|
||||
] when text-finished? ;
|
||||
|
||||
: increment-state ( dfa-traverser state -- dfa-traverser )
|
||||
>r [ 1+ ] change-current-index
|
||||
dup current-state>> >>last-state r>
|
||||
first >>current-state ;
|
||||
|
||||
: match-failed ( dfa-traverser -- dfa-traverser )
|
||||
V{ } clone >>matches ;
|
||||
|
||||
: match-literal ( transition from-state table -- to-state/f )
|
||||
transitions>> [ at ] [ 2drop f ] if-at ;
|
||||
|
||||
: assoc-with ( param assoc quot -- assoc curry )
|
||||
swapd [ [ -rot ] dip call ] 2curry ; inline
|
||||
|
||||
: match-class ( transition from-state table -- to-state/f )
|
||||
transitions>> at* [
|
||||
[ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
|
||||
] [ drop ] if ;
|
||||
|
||||
: match-default ( transition from-state table -- to-state/f )
|
||||
[ nip ] dip transitions>>
|
||||
[ t swap [ drop f ] unless-at ] [ drop f ] if-at ;
|
||||
|
||||
: match-transition ( obj from-state dfa -- to-state/f )
|
||||
{ [ match-literal ] [ match-class ] [ match-default ] } 3|| ;
|
||||
|
||||
: setup-match ( match -- obj state dfa-table )
|
||||
{ current-index>> text>> current-state>> dfa-table>> } get-slots
|
||||
[ nth ] 2dip ;
|
||||
|
||||
: do-match ( dfa-traverser -- dfa-traverser )
|
||||
dup match-done? [
|
||||
dup setup-match match-transition
|
||||
[ increment-state do-match ] when*
|
||||
] unless ;
|
||||
|
||||
: return-match ( dfa-traverser -- interval/f )
|
||||
dup matches>>
|
||||
[ drop f ]
|
||||
[ [ start-index>> ] [ peek ] bi* 1 <range> ] if-empty ;
|
|
@ -1,69 +0,0 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators.lib io kernel
|
||||
math math.order namespaces regexp2.backend sequences
|
||||
sequences.lib unicode.categories math.ranges fry
|
||||
combinators.short-circuit ;
|
||||
IN: regexp2.utils
|
||||
|
||||
: (while-changes) ( obj quot pred pred-ret -- obj )
|
||||
! quot: ( obj -- obj' )
|
||||
! pred: ( obj -- <=> )
|
||||
>r >r dup slip r> pick over call r> dupd =
|
||||
[ 3drop ] [ (while-changes) ] if ; inline
|
||||
|
||||
: while-changes ( obj quot pred -- obj' )
|
||||
pick over call (while-changes) ; inline
|
||||
|
||||
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
|
||||
: push1 ( obj -- ) input-stream get stream>> push ;
|
||||
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
|
||||
: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
|
||||
|
||||
: stack ( -- obj ) current-regexp get stack>> ;
|
||||
: change-whole-stack ( quot -- )
|
||||
current-regexp get
|
||||
[ stack>> swap call ] keep (>>stack) ; inline
|
||||
: push-stack ( obj -- ) stack push ;
|
||||
: pop-stack ( -- obj ) stack pop ;
|
||||
: cut-out ( vector n -- vector' vector ) cut rest ;
|
||||
ERROR: cut-stack-error ;
|
||||
: cut-stack ( obj vector -- vector' vector )
|
||||
tuck last-index [ cut-stack-error ] unless* cut-out swap ;
|
||||
|
||||
ERROR: bad-octal number ;
|
||||
ERROR: bad-hex number ;
|
||||
: check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ;
|
||||
: check-hex ( hex -- hex ) dup number? [ bad-hex ] unless ;
|
||||
|
||||
: ascii? ( n -- ? ) 0 HEX: 7f between? ;
|
||||
: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
|
||||
: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
|
||||
|
||||
: hex-digit? ( n -- ? )
|
||||
[
|
||||
[ decimal-digit? ]
|
||||
[ CHAR: a CHAR: f between? ]
|
||||
[ CHAR: A CHAR: F between? ]
|
||||
] 1|| ;
|
||||
|
||||
: control-char? ( n -- ? )
|
||||
[
|
||||
[ 0 HEX: 1f between? ]
|
||||
[ HEX: 7f = ]
|
||||
] 1|| ;
|
||||
|
||||
: punct? ( n -- ? )
|
||||
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
|
||||
|
||||
: c-identifier-char? ( ch -- ? )
|
||||
[ [ alpha? ] [ CHAR: _ = ] ] 1|| ;
|
||||
|
||||
: java-blank? ( n -- ? )
|
||||
{
|
||||
CHAR: \s CHAR: \t CHAR: \n
|
||||
HEX: b HEX: 7 CHAR: \r
|
||||
} member? ;
|
||||
|
||||
: java-printable? ( n -- ? )
|
||||
[ [ alpha? ] [ punct? ] ] 1|| ;
|
Loading…
Reference in New Issue