Merge branch 'master' of git://factorcode.org/git/factor

db4
Slava Pestov 2008-08-27 05:43:17 -05:00
commit 15a005359d
22 changed files with 173 additions and 1246 deletions

View File

@ -75,10 +75,6 @@ METHOD: mutate-as { sequence object number } rot set-nth ;
METHOD: at-mutate { number object sequence } swapd set-nth ;
METHOD: as-mutate { object number sequence } set-nth ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! assoc
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@ -181,15 +177,15 @@ METHOD: as-mutate { object object assoc } set-at ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: 1st ( seq -- obj ) 0 at ;
: 2nd ( seq -- obj ) 1 at ;
: 3rd ( seq -- obj ) 2 at ;
: 4th ( seq -- obj ) 3 at ;
: 5th ( seq -- obj ) 4 at ;
: 6th ( seq -- obj ) 5 at ;
: 7th ( seq -- obj ) 6 at ;
: 8th ( seq -- obj ) 7 at ;
: 9th ( seq -- obj ) 8 at ;
: 1st ( seq -- obj ) 0 swap nth ;
: 2nd ( seq -- obj ) 1 swap nth ;
: 3rd ( seq -- obj ) 2 swap nth ;
: 4th ( seq -- obj ) 3 swap nth ;
: 5th ( seq -- obj ) 4 swap nth ;
: 6th ( seq -- obj ) 5 swap nth ;
: 7th ( seq -- obj ) 6 swap nth ;
: 8th ( seq -- obj ) 7 swap nth ;
: 9th ( seq -- obj ) 8 swap nth ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

View File

@ -0,0 +1,11 @@
USING: arrays sequences ;
IN: obj.alist
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
PREDICATE: alist < sequence [ pair? ] all? ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

45
extra/obj/obj.factor Normal file
View File

@ -0,0 +1,45 @@
USING: kernel words namespaces arrays vectors hashtables
sequences assocs sets grouping
combinators.conditional
combinators.short-circuit
obj.util obj.alist ;
IN: obj
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: properties ( -- properties ) V{ } ;
SYM: self properties adjoin
SYM: type properties adjoin
SYM: title properties adjoin
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: types ( -- types ) V{ } ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: >obj ( val -- obj ) [ symbol? ] [ get ] [ ] 1if ;
: -> ( obj pro -- val ) swap >obj at ;
PREDICATE: obj < alist { [ self -> ] [ type -> ] } 1&& ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: objects ( -- objects ) V{ } ;
: define-object ( symbol table -- )
2 group >vector
self rot 2array prefix
dup dup self -> set-global
self -> objects adjoin ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
PREDICATE: ptr < symbol get obj? ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

View File

@ -0,0 +1,35 @@
USING: sets meta.util obj ;
IN: obj.papers
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
SYM: title properties adjoin
SYM: abstract properties adjoin
SYM: authors properties adjoin
SYM: file properties adjoin
SYM: date properties adjoin
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
SYM: paper types adjoin
SYM: person types adjoin
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
SYM: randall-b-smith { type person } define-object
SYM: david-ungar { type person } define-object
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
SYM: programming-as-an-experience
{
type paper
title "Programming as an Experience: The Inspiration for Self"
abstract "The Self system attempts to integrate intellectual and non-intellectual aspects of programming to create an overall experience. The language semantics, user interface, and implementation each help create this integrated experience. The language semantics embed the programmer in a uniform world of simple ob jects that can be modified without appealing to definitions of abstractions. In a similar way, the graphical interface puts the user into a uniform world of tangible objects that can be directly manipulated and changed without switching modes. The implementation strives to support the world-of-objects illusion by minimiz ing perceptible pauses and by providing true source-level semantics without sac rificing performance. As a side benefit, it encourages factoring. Although we see areas that fall short of the vision, on the whole, the language, interface, and im plementation conspire so that the Self programmer lives and acts in a consistent and malleable world of objects."
authors { randall-b-smith david-ungar }
file "/storage/papers/programming-as-experience.ps"
date 1995
}
define-object

View File

@ -0,0 +1,26 @@
USING: kernel arrays strings sequences assocs io io.styles prettyprint colors
combinators.conditional ;
IN: obj.print
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: write-wrapped ( string -- ) H{ { wrap-margin 500 } } [ write ] with-nesting ;
: print-elt ( val -- )
{
{ [ string? ] [ write-wrapped ] }
{ [ array? ] [ [ . ] each ] }
{ [ drop t ] [ . ] }
}
1cond ;
: print-grid ( grid -- )
H{ { table-gap { 10 10 } } { table-border T{ rgba f 0 0 0 1 } } }
[ [ [ [ [ print-elt ] with-cell ] each ] with-row ] each ] tabular-output ;
: print-table ( assoc -- ) >alist print-grid ;
: print-seq ( seq -- ) [ 1array ] map print-grid ;

View File

@ -0,0 +1,8 @@
USING: kernel parser words ;
IN: obj.util
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: SYM: CREATE-WORD dup define-symbol parsed ; parsing

View File

@ -0,0 +1,39 @@
USING: kernel words namespaces arrays sequences prettyprint help.topics bake
obj obj.print ;
IN: obj.view
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: $tab ( seq -- ) first print-table ;
: $obj ( seq -- ) first print-table ;
: $seq ( seq -- ) first print-seq ;
: $ptr ( seq -- ) first get print-table ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
PREDICATE: obj-type < symbol types member? ;
M: obj-type article-title ( type -- title ) unparse ;
M: obj-type article-content ( type -- content )
objects [ type -> = ] with filter
{ $seq , } bake ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
M: ptr article-title ( ptr -- title ) [ title -> ] [ unparse ] bi or ;
M: ptr article-content ( ptr -- content ) get { $obj , } bake ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
PREDICATE: obj-list < word \ objects = ;
M: obj-list article-title ( objects -- title ) drop "Objects" ;
M: obj-list article-content ( objects -- title )
execute
[ [ type -> ] [ ] bi 2array ] map
{ $tab , } bake ;

View File

@ -1 +0,0 @@
Doug Coleman

View File

@ -1,25 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors hashtables kernel math state-tables vars vectors ;
IN: regexp2.backend
TUPLE: regexp
raw
{ stack vector }
parse-tree
{ options hashtable }
nfa-table
dfa-table
minimized-table
{ state integer }
{ new-states vector }
{ visited-states hashtable } ;
: reset-regexp ( regexp -- regexp )
0 >>state
V{ } clone >>stack
V{ } clone >>new-states
H{ } clone >>options
H{ } clone >>visited-states ;
SYMBOL: current-regexp

View File

@ -1,55 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math math.order symbols regexp2.parser
words regexp2.utils unicode.categories combinators.short-circuit ;
IN: regexp2.classes
GENERIC: class-member? ( obj class -- ? )
M: word class-member? ( obj class -- ? ) 2drop f ;
M: integer class-member? ( obj class -- ? ) 2drop f ;
M: character-class-range class-member? ( obj class -- ? )
[ from>> ] [ to>> ] bi between? ;
M: any-char class-member? ( obj class -- ? )
2drop t ;
M: letter-class class-member? ( obj class -- ? )
drop letter? ;
M: LETTER-class class-member? ( obj class -- ? )
drop LETTER? ;
M: Letter-class class-member? ( obj class -- ? )
drop Letter? ;
M: ascii-class class-member? ( obj class -- ? )
drop ascii? ;
M: digit-class class-member? ( obj class -- ? )
drop digit? ;
M: alpha-class class-member? ( obj class -- ? )
drop alpha? ;
M: punctuation-class class-member? ( obj class -- ? )
drop punct? ;
M: java-printable-class class-member? ( obj class -- ? )
drop java-printable? ;
M: non-newline-blank-class class-member? ( obj class -- ? )
drop { [ blank? ] [ CHAR: \n = not ] } 1&& ;
M: control-character-class class-member? ( obj class -- ? )
drop control-char? ;
M: hex-digit-class class-member? ( obj class -- ? )
drop hex-digit? ;
M: java-blank-class class-member? ( obj class -- ? )
drop java-blank? ;
M: unmatchable-class class-member? ( obj class -- ? )
2drop f ;

View File

@ -1,70 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators fry kernel locals
math math.order regexp2.nfa regexp2.transition-tables sequences
sets sorting vectors regexp2.utils sequences.lib ;
USING: io prettyprint threads ;
IN: regexp2.dfa
: find-delta ( states transition regexp -- new-states )
nfa-table>> transitions>>
rot [ swap at at ] with with map sift concat prune ;
: (find-epsilon-closure) ( states regexp -- new-states )
eps swap find-delta ;
: find-epsilon-closure ( states regexp -- new-states )
'[ dup , (find-epsilon-closure) union ] [ length ] while-changes
natural-sort ;
: find-closure ( states transition regexp -- new-states )
[ find-delta ] 2keep nip find-epsilon-closure ;
: find-start-state ( regexp -- state )
[ nfa-table>> start-state>> 1vector ] keep find-epsilon-closure ;
: find-transitions ( seq1 regexp -- seq2 )
nfa-table>> transitions>>
[ at keys ] curry map concat eps swap remove ;
: add-todo-state ( state regexp -- )
2dup visited-states>> key? [
2drop
] [
[ visited-states>> conjoin ]
[ new-states>> push ] 2bi
] if ;
: new-transitions ( regexp -- )
dup new-states>> [
drop
] [
dupd pop dup pick find-transitions rot
[
[ [ find-closure ] 2keep nip dupd add-todo-state ] 3keep
>r swapd transition boa r> dfa-table>> add-transition
] curry with each
new-transitions
] if-empty ;
: states ( hashtable -- array )
[ keys ]
[ values [ values concat ] map concat append ] bi ;
: set-final-states ( regexp -- )
dup
[ nfa-table>> final-states>> keys ]
[ dfa-table>> transitions>> states ] bi
[ intersect empty? not ] with filter
swap dfa-table>> final-states>>
[ conjoin ] curry each ;
: set-initial-state ( regexp -- )
dup
[ dfa-table>> ] [ find-start-state ] bi
[ >>start-state drop ] keep
1vector >>new-states drop ;
: construct-dfa ( regexp -- )
[ set-initial-state ] [ new-transitions ] [ set-final-states ] tri ;

View File

@ -1,126 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs grouping kernel regexp2.backend
locals math namespaces regexp2.parser sequences state-tables fry
quotations math.order math.ranges vectors unicode.categories
regexp2.utils regexp2.transition-tables words sequences.lib ;
IN: regexp2.nfa
SYMBOL: negation-mode
: negated? ( -- ? ) negation-mode get 0 or odd? ;
SINGLETON: eps
: next-state ( regexp -- state )
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
: set-start-state ( regexp -- )
dup stack>> [
drop
] [
[ nfa-table>> ] [ pop first ] bi* >>start-state drop
] if-empty ;
GENERIC: nfa-node ( node -- )
:: add-simple-entry ( obj class -- )
[let* | regexp [ current-regexp get ]
s0 [ regexp next-state ]
s1 [ regexp next-state ]
stack [ regexp stack>> ]
table [ regexp nfa-table>> ] |
negated? [
s0 f obj class boa table add-transition
s0 s1 <default-transition> table add-transition
] [
s0 s1 obj class boa table add-transition
] if
s0 s1 2array stack push
t s1 table final-states>> set-at ] ;
:: concatenate-nodes ( -- )
[let* | regexp [ current-regexp get ]
stack [ regexp stack>> ]
table [ regexp nfa-table>> ]
s2 [ stack peek first ]
s3 [ stack pop second ]
s0 [ stack peek first ]
s1 [ stack pop second ] |
s1 s2 eps <literal-transition> table add-transition
s1 table final-states>> delete-at
s0 s3 2array stack push ] ;
:: alternate-nodes ( -- )
[let* | regexp [ current-regexp get ]
stack [ regexp stack>> ]
table [ regexp nfa-table>> ]
s2 [ stack peek first ]
s3 [ stack pop second ]
s0 [ stack peek first ]
s1 [ stack pop second ]
s4 [ regexp next-state ]
s5 [ regexp next-state ] |
s4 s0 eps <literal-transition> table add-transition
s4 s2 eps <literal-transition> table add-transition
s1 s5 eps <literal-transition> table add-transition
s3 s5 eps <literal-transition> table add-transition
s1 table final-states>> delete-at
s3 table final-states>> delete-at
t s5 table final-states>> set-at
s4 s5 2array stack push ] ;
M: kleene-star nfa-node ( node -- )
term>> nfa-node
[let* | regexp [ current-regexp get ]
stack [ regexp stack>> ]
s0 [ stack peek first ]
s1 [ stack pop second ]
s2 [ regexp next-state ]
s3 [ regexp next-state ]
table [ regexp nfa-table>> ] |
s1 table final-states>> delete-at
t s3 table final-states>> set-at
s1 s0 eps <literal-transition> table add-transition
s2 s0 eps <literal-transition> table add-transition
s2 s3 eps <literal-transition> table add-transition
s1 s3 eps <literal-transition> table add-transition
s2 s3 2array stack push ] ;
M: concatenation nfa-node ( node -- )
seq>>
[ [ nfa-node ] each ]
[ length 1- [ concatenate-nodes ] times ] bi ;
M: alternation nfa-node ( node -- )
seq>>
[ [ nfa-node ] each ]
[ length 1- [ alternate-nodes ] times ] bi ;
M: constant nfa-node ( node -- )
char>> literal-transition add-simple-entry ;
M: epsilon nfa-node ( node -- )
drop eps literal-transition add-simple-entry ;
M: word nfa-node ( node -- )
class-transition add-simple-entry ;
M: character-class-range nfa-node ( node -- )
class-transition add-simple-entry ;
M: capture-group nfa-node ( node -- )
term>> nfa-node ;
M: negation nfa-node ( node -- )
negation-mode inc
term>> nfa-node
negation-mode dec ;
: construct-nfa ( regexp -- )
[
reset-regexp
negation-mode off
[ current-regexp set ]
[ parse-tree>> nfa-node ]
[ set-start-state ] tri
] with-scope ;

View File

@ -1,33 +0,0 @@
USING: kernel tools.test regexp2.backend regexp2 ;
IN: regexp2.parser
: test-regexp ( string -- )
default-regexp parse-regexp ;
: test-regexp2 ( string -- regexp )
default-regexp dup parse-regexp ;
[ "(" ] [ unmatched-parentheses? ] must-fail-with
[ ] [ "a|b" test-regexp ] unit-test
[ ] [ "a.b" test-regexp ] unit-test
[ ] [ "a|b|c" test-regexp ] unit-test
[ ] [ "abc|b" test-regexp ] unit-test
[ ] [ "a|bcd" test-regexp ] unit-test
[ ] [ "a|(b)" test-regexp ] unit-test
[ ] [ "(a)|b" test-regexp ] unit-test
[ ] [ "(a|b)" test-regexp ] unit-test
[ ] [ "((a)|(b))" test-regexp ] unit-test
[ ] [ "(?:a)" test-regexp ] unit-test
[ ] [ "(?i:a)" test-regexp ] unit-test
[ ] [ "(?-i:a)" test-regexp ] unit-test
[ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with
[ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with
[ ] [ "(?=a)" test-regexp ] unit-test
[ ] [ "[abc]" test-regexp ] unit-test
[ ] [ "[a-c]" test-regexp ] unit-test
[ ] [ "[^a-c]" test-regexp ] unit-test
[ "[^]" test-regexp ] must-fail

View File

@ -1,391 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators io io.streams.string
kernel math math.parser multi-methods namespaces qualified sets
quotations sequences sequences.lib splitting symbols vectors
dlists math.order combinators.lib unicode.categories strings
sequences.lib regexp2.backend regexp2.utils unicode.case ;
IN: regexp2.parser
FROM: math.ranges => [a,b] ;
MIXIN: node
TUPLE: concatenation seq ; INSTANCE: concatenation node
TUPLE: alternation seq ; INSTANCE: alternation node
TUPLE: kleene-star term ; INSTANCE: kleene-star node
TUPLE: question term ; INSTANCE: question node
TUPLE: negation term ; INSTANCE: negation node
TUPLE: constant char ; INSTANCE: constant node
TUPLE: range from to ; INSTANCE: range node
TUPLE: lookahead term ; INSTANCE: lookahead node
TUPLE: lookbehind term ; INSTANCE: lookbehind node
TUPLE: capture-group term ; INSTANCE: capture-group node
TUPLE: non-capture-group term ; INSTANCE: non-capture-group node
TUPLE: independent-group term ; INSTANCE: independent-group node
TUPLE: character-class-range from to ; INSTANCE: character-class-range node
SINGLETON: epsilon INSTANCE: epsilon node
SINGLETON: any-char INSTANCE: any-char node
SINGLETON: front-anchor INSTANCE: front-anchor node
SINGLETON: back-anchor INSTANCE: back-anchor node
TUPLE: option-on option ; INSTANCE: option-on node
TUPLE: option-off option ; INSTANCE: option-off node
SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case reversed-regexp ;
SINGLETONS: letter-class LETTER-class Letter-class digit-class
alpha-class non-newline-blank-class
ascii-class punctuation-class java-printable-class blank-class
control-character-class hex-digit-class java-blank-class c-identifier-class
unmatchable-class ;
SINGLETONS: beginning-of-group end-of-group
beginning-of-character-class end-of-character-class
left-parenthesis pipe caret dash ;
: get-option ( option -- ? ) current-regexp get options>> at ;
: get-unix-lines ( -- ? ) unix-lines get-option ;
: get-dotall ( -- ? ) dotall get-option ;
: get-multiline ( -- ? ) multiline get-option ;
: get-comments ( -- ? ) comments get-option ;
: get-case-insensitive ( -- ? ) case-insensitive get-option ;
: get-unicode-case ( -- ? ) unicode-case get-option ;
: get-reversed-regexp ( -- ? ) reversed-regexp get-option ;
: <negation> ( obj -- negation ) negation boa ;
: <concatenation> ( seq -- concatenation )
>vector get-reversed-regexp [ reverse ] when
concatenation boa ;
: <alternation> ( seq -- alternation ) >vector alternation boa ;
: <capture-group> ( obj -- capture-group ) capture-group boa ;
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
: <constant> ( obj -- constant )
dup Letter? get-case-insensitive and [
[ ch>lower constant boa ]
[ ch>upper constant boa ] bi 2array <alternation>
] [
constant boa
] if ;
: first|concatenation ( seq -- first/concatenation )
dup length 1 = [ first ] [ <concatenation> ] if ;
: first|alternation ( seq -- first/alternation )
dup length 1 = [ first ] [ <alternation> ] if ;
: <character-class-range> ( from to -- obj )
2dup [ Letter? ] bi@ or get-case-insensitive and [
[ [ ch>lower ] bi@ character-class-range boa ]
[ [ ch>upper ] bi@ character-class-range boa ] 2bi
2array [ [ from>> ] [ to>> ] bi < ] filter
[ unmatchable-class ] [ first|alternation ] if-empty
] [
2dup <
[ character-class-range boa ] [ 2drop unmatchable-class ] if
] if ;
ERROR: unmatched-parentheses ;
: make-positive-lookahead ( string -- )
lookahead boa push-stack ;
: make-negative-lookahead ( string -- )
<negation> lookahead boa push-stack ;
: make-independent-group ( string -- )
#! no backtracking
independent-group boa push-stack ;
: make-positive-lookbehind ( string -- )
lookbehind boa push-stack ;
: make-negative-lookbehind ( string -- )
<negation> lookbehind boa push-stack ;
DEFER: nested-parse-regexp
: make-non-capturing-group ( string -- )
non-capture-group boa push-stack ;
ERROR: bad-option ch ;
: option ( ch -- singleton )
{
{ CHAR: i [ case-insensitive ] }
{ CHAR: d [ unix-lines ] }
{ CHAR: m [ multiline ] }
{ CHAR: r [ reversed-regexp ] }
{ CHAR: s [ dotall ] }
{ CHAR: u [ unicode-case ] }
{ CHAR: x [ comments ] }
[ bad-option ]
} case ;
: option-on ( option -- ) current-regexp get options>> conjoin ;
: option-off ( option -- ) current-regexp get options>> delete-at ;
: toggle-option ( ch ? -- ) [ option ] dip [ option-on ] [ option-off ] if ;
: (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
: parse-options ( string -- )
"-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ;
DEFER: (parse-regexp)
: parse-special-group ( -- )
beginning-of-group push-stack
(parse-regexp) pop-stack make-non-capturing-group ;
ERROR: bad-special-group string ;
: (parse-special-group) ( -- )
read1 {
{ [ dup CHAR: : = ]
[ drop nested-parse-regexp pop-stack make-non-capturing-group ] }
{ [ dup CHAR: = = ]
[ drop nested-parse-regexp pop-stack make-positive-lookahead ] }
{ [ dup CHAR: = = ]
[ drop nested-parse-regexp pop-stack make-negative-lookahead ] }
{ [ dup CHAR: > = ]
[ drop nested-parse-regexp pop-stack make-independent-group ] }
{ [ dup CHAR: < = peek1 CHAR: = = and ]
[ drop read1 drop nested-parse-regexp pop-stack make-positive-lookbehind ] }
{ [ dup CHAR: < = peek1 CHAR: ! = and ]
[ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] }
[
":)" read-until
[ swap prefix ] dip
{
{ CHAR: : [ parse-options parse-special-group ] }
{ CHAR: ) [ parse-options ] }
[ drop bad-special-group ]
} case
]
} cond ;
: handle-left-parenthesis ( -- )
peek1 CHAR: ? =
[ read1 drop (parse-special-group) ]
[ nested-parse-regexp ] if ;
: handle-dot ( -- ) any-char push-stack ;
: handle-pipe ( -- ) pipe push-stack ;
: handle-star ( -- ) stack pop <kleene-star> push-stack ;
: handle-question ( -- )
stack pop epsilon 2array <alternation> push-stack ;
: handle-plus ( -- )
stack pop dup <kleene-star> 2array <concatenation> push-stack ;
ERROR: unmatched-brace ;
: parse-repetition ( -- start finish ? )
"}" read-until [ unmatched-brace ] unless
[ "," split1 [ string>number ] bi@ ]
[ CHAR: , swap index >boolean ] bi ;
: replicate/concatenate ( n obj -- obj' )
over zero? [ 2drop epsilon ]
[ <repetition> first|concatenation ] if ;
: exactly-n ( n -- )
stack pop replicate/concatenate push-stack ;
: at-least-n ( n -- )
stack pop
[ replicate/concatenate ] keep
<kleene-star> 2array <concatenation> push-stack ;
: at-most-n ( n -- )
1+
stack pop
[ replicate/concatenate ] curry map <alternation> push-stack ;
: from-m-to-n ( m n -- )
[a,b]
stack pop
[ replicate/concatenate ] curry map
<alternation> push-stack ;
ERROR: invalid-range a b ;
: handle-left-brace ( -- )
parse-repetition
>r 2dup [ [ 0 < [ invalid-range ] when ] when* ] bi@ r>
[
2dup and [ from-m-to-n ]
[ [ nip at-most-n ] [ at-least-n ] if* ] if
] [ drop 0 max exactly-n ] if ;
: handle-front-anchor ( -- ) front-anchor push-stack ;
: handle-back-anchor ( -- ) back-anchor push-stack ;
ERROR: bad-character-class obj ;
ERROR: expected-posix-class ;
: parse-posix-class ( -- obj )
read1 CHAR: { = [ expected-posix-class ] unless
"}" read-until [ bad-character-class ] unless
{
{ "Lower" [ get-case-insensitive Letter-class letter-class ? ] }
{ "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] }
{ "Alpha" [ Letter-class ] }
{ "ASCII" [ ascii-class ] }
{ "Digit" [ digit-class ] }
{ "Alnum" [ alpha-class ] }
{ "Punct" [ punctuation-class ] }
{ "Graph" [ java-printable-class ] }
{ "Print" [ java-printable-class ] }
{ "Blank" [ non-newline-blank-class ] }
{ "Cntrl" [ control-character-class ] }
{ "XDigit" [ hex-digit-class ] }
{ "Space" [ java-blank-class ] }
! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
[ bad-character-class ]
} case ;
: parse-octal ( -- n ) 3 read oct> check-octal ;
: parse-short-hex ( -- n ) 2 read hex> check-hex ;
: parse-long-hex ( -- n ) 6 read hex> check-hex ;
: parse-control-character ( -- n ) read1 ;
ERROR: bad-escaped-literals seq ;
: parse-escaped-literals ( -- obj )
"\\E" read-until [ bad-escaped-literals ] unless
read1 drop
[ epsilon ] [
[ <constant> ] V{ } map-as
first|concatenation
] if-empty ;
: parse-escaped ( -- obj )
read1
{
{ CHAR: \ [ CHAR: \ <constant> ] }
{ CHAR: . [ CHAR: . <constant> ] }
{ CHAR: t [ CHAR: \t <constant> ] }
{ CHAR: n [ CHAR: \n <constant> ] }
{ CHAR: r [ CHAR: \r <constant> ] }
{ CHAR: f [ HEX: c <constant> ] }
{ CHAR: a [ HEX: 7 <constant> ] }
{ CHAR: e [ HEX: 1b <constant> ] }
{ CHAR: d [ digit-class ] }
{ CHAR: D [ digit-class <negation> ] }
{ CHAR: s [ java-blank-class ] }
{ CHAR: S [ java-blank-class <negation> ] }
{ CHAR: w [ c-identifier-class ] }
{ CHAR: W [ c-identifier-class <negation> ] }
{ CHAR: p [ parse-posix-class ] }
{ CHAR: P [ parse-posix-class <negation> ] }
{ CHAR: x [ parse-short-hex <constant> ] }
{ CHAR: u [ parse-long-hex <constant> ] }
{ CHAR: 0 [ parse-octal <constant> ] }
{ CHAR: c [ parse-control-character ] }
! { CHAR: b [ handle-word-boundary ] }
! { CHAR: B [ handle-word-boundary <negation> ] }
! { CHAR: A [ handle-beginning-of-input ] }
! { CHAR: G [ end of previous match ] }
! { CHAR: Z [ handle-end-of-input ] }
! { CHAR: z [ handle-end-of-input ] } ! except for terminator
{ CHAR: Q [ parse-escaped-literals ] }
} case ;
: handle-escape ( -- ) parse-escaped push-stack ;
: handle-dash ( vector -- vector' )
H{ { dash CHAR: - } } substitute ;
: character-class>alternation ( seq -- alternation )
[ dup number? [ <constant> ] when ] map first|alternation ;
: handle-caret ( vector -- vector' )
dup [ length 2 >= ] [ first caret eq? ] bi and [
rest-slice character-class>alternation <negation>
] [
character-class>alternation
] if ;
: make-character-class ( -- character-class )
[ beginning-of-character-class swap cut-stack ] change-whole-stack
handle-dash handle-caret ;
: apply-dash ( -- )
stack [ pop3 nip <character-class-range> ] keep push ;
: apply-dash? ( -- ? )
stack dup length 3 >=
[ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
ERROR: empty-negated-character-class ;
DEFER: handle-left-bracket
: (parse-character-class) ( -- )
read1 [ empty-negated-character-class ] unless* {
{ CHAR: [ [ handle-left-bracket t ] }
{ CHAR: ] [ make-character-class push-stack f ] }
{ CHAR: - [ dash push-stack t ] }
{ CHAR: \ [ parse-escaped push-stack t ] }
[ push-stack apply-dash? [ apply-dash ] when t ]
} case
[ (parse-character-class) ] when ;
: parse-character-class-second ( -- )
read1 {
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
{ CHAR: - [ CHAR: - <constant> push-stack ] }
[ push1 ]
} case ;
: parse-character-class-first ( -- )
read1 {
{ CHAR: ^ [ caret push-stack parse-character-class-second ] }
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
{ CHAR: - [ CHAR: - <constant> push-stack ] }
[ push1 ]
} case ;
: handle-left-bracket ( -- )
beginning-of-character-class push-stack
parse-character-class-first (parse-character-class) ;
: finish-regexp-parse ( stack -- obj )
{ pipe } split
[ first|concatenation ] map first|alternation ;
: handle-right-parenthesis ( -- )
stack beginning-of-group over last-index cut rest
[ current-regexp get swap >>stack drop ]
[ finish-regexp-parse <capture-group> push-stack ] bi* ;
: nested-parse-regexp ( -- )
beginning-of-group push-stack (parse-regexp) ;
: ((parse-regexp)) ( token -- )
{
{ CHAR: . [ handle-dot ] }
{ CHAR: ( [ handle-left-parenthesis ] }
{ CHAR: ) [ handle-right-parenthesis ] }
{ CHAR: | [ handle-pipe ] }
{ CHAR: ? [ handle-question ] }
{ CHAR: * [ handle-star ] }
{ CHAR: + [ handle-plus ] }
{ CHAR: { [ handle-left-brace ] }
{ CHAR: [ [ handle-left-bracket ] }
{ CHAR: ^ [ handle-front-anchor ] }
{ CHAR: $ [ handle-back-anchor ] }
{ CHAR: \ [ handle-escape ] }
[ <constant> push-stack ]
} case ;
: (parse-regexp) ( -- )
read1 [ ((parse-regexp)) (parse-regexp) ] when* ;
: parse-regexp ( regexp -- )
dup current-regexp [
raw>> [
<string-reader> [ (parse-regexp) ] with-input-stream
] unless-empty
current-regexp get
stack finish-regexp-parse
>>parse-tree drop
] with-variable ;

View File

@ -1,14 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel strings help.markup help.syntax regexp2.backend ;
IN: regexp2
HELP: <regexp>
{ $values { "string" string } { "regexp" regexp } }
{ $description "Compiles a regular expression into a DFA and returns this object. Regular expressions only have to be compiled once and can then be used multiple times to match input strings." } ;
HELP: <iregexp>
{ $values { "string" string } { "regexp" regexp } }
{ $description "Compiles a case-insensitive regular expression into a DFA and returns this object. Otherwise, exactly the same as " { $link <regexp> } } ;
{ <regexp> <iregexp> } related-words

View File

@ -1,263 +0,0 @@
USING: regexp2 tools.test kernel sequences regexp2.parser
regexp2.traversal ;
IN: regexp2-tests
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
[ t ] [ "" "a*" <regexp> matches? ] unit-test
[ t ] [ "a" "a*" <regexp> matches? ] unit-test
[ t ] [ "aaaaaaa" "a*" <regexp> matches? ] unit-test
[ f ] [ "ab" "a*" <regexp> matches? ] unit-test
[ t ] [ "abc" "abc" <regexp> matches? ] unit-test
[ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
[ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
[ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
[ f ] [ "" "a+" <regexp> matches? ] unit-test
[ t ] [ "a" "a+" <regexp> matches? ] unit-test
[ t ] [ "aa" "a+" <regexp> matches? ] unit-test
[ t ] [ "" "a?" <regexp> matches? ] unit-test
[ t ] [ "a" "a?" <regexp> matches? ] unit-test
[ f ] [ "aa" "a?" <regexp> matches? ] unit-test
[ f ] [ "" "." <regexp> matches? ] unit-test
[ t ] [ "a" "." <regexp> matches? ] unit-test
[ t ] [ "." "." <regexp> matches? ] unit-test
! [ f ] [ "\n" "." <regexp> matches? ] unit-test
[ f ] [ "" ".+" <regexp> matches? ] unit-test
[ t ] [ "a" ".+" <regexp> matches? ] unit-test
[ t ] [ "ab" ".+" <regexp> matches? ] unit-test
[ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
[ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
[ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
[ f ] [ "" "(a)" <regexp> matches? ] unit-test
[ t ] [ "a" "(a)" <regexp> matches? ] unit-test
[ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
[ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
[ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
[ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
[ f ] [ "" "a{1}" <regexp> matches? ] unit-test
[ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
[ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
[ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
[ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
[ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
[ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
[ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
[ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
[ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
[ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
[ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
[ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
[ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
[ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
[ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
[ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
[ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
[ f ] [ "" "[a]" <regexp> matches? ] unit-test
[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
[ "^" "[^]" <regexp> matches? ] must-fail
[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
!
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
[ f ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
[ f ] [ "x" "\\." <regexp> matches? ] unit-test
[ t ] [ "." "\\." <regexp> matches? ] unit-test
[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
[ ] [
"(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
<regexp> drop
] unit-test
[ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
[ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
[ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
[ t ] [ "A" "[a-z]" <iregexp> matches? ] unit-test
[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
[ t ] [ "A" "\\p{Lower}" <iregexp> matches? ] unit-test
[ t ] [ "abc" <reversed> "abc" <rregexp> matches? ] unit-test
[ t ] [ "abc" <reversed> "a[bB][cC]" <rregexp> matches? ] unit-test
[ t ] [ "adcbe" "a(?r)(bcd)(?-r)e" <rregexp> matches? ] unit-test
! Bug in parsing word
! [ t ] [ "a" R' a' matches? ] unit-test
! ((A)(B(C)))
! 1. ((A)(B(C)))
! 2. (A)
! 3. (B(C))
! 4. (C)

View File

@ -1,59 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators kernel math math.ranges
sequences regexp2.backend regexp2.utils memoize sets
regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal
regexp2.transition-tables ;
IN: regexp2
: default-regexp ( string -- regexp )
regexp new
swap >>raw
<transition-table> >>nfa-table
<transition-table> >>dfa-table
<transition-table> >>minimized-table
reset-regexp ;
: construct-regexp ( regexp -- regexp' )
{
[ parse-regexp ]
[ construct-nfa ]
[ construct-dfa ]
[ ]
} cleave ;
: match ( string regexp -- pair )
<dfa-traverser> do-match return-match ;
: matches? ( string regexp -- ? )
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
: match-head ( string regexp -- end ) match length>> 1- ;
: initial-option ( regexp option -- regexp' )
over options>> conjoin ;
: <regexp> ( string -- regexp )
default-regexp construct-regexp ;
: <iregexp> ( string -- regexp )
default-regexp
case-insensitive initial-option
construct-regexp ;
: <rregexp> ( string -- regexp )
default-regexp
reversed-regexp initial-option
construct-regexp ;
: R! CHAR: ! <regexp> ; parsing
: R" CHAR: " <regexp> ; parsing
: R# CHAR: # <regexp> ; parsing
: R' CHAR: ' <regexp> ; parsing
: R( CHAR: ) <regexp> ; parsing
: R/ CHAR: / <regexp> ; parsing
: R@ CHAR: @ <regexp> ; parsing
: R[ CHAR: ] <regexp> ; parsing
: R` CHAR: ` <regexp> ; parsing
: R{ CHAR: } <regexp> ; parsing
: R| CHAR: | <regexp> ; parsing

View File

@ -1 +0,0 @@
Regular expressions

View File

@ -1,2 +0,0 @@
parsing
text

View File

@ -1,44 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs fry hashtables kernel sequences
vectors ;
IN: regexp2.transition-tables
: insert-at ( value key hash -- )
2dup at* [
2nip push
] [
drop >r >r dup vector? [ 1vector ] unless r> r> set-at
] if ;
: ?insert-at ( value key hash/f -- hash )
[ H{ } clone ] unless* [ insert-at ] keep ;
TUPLE: transition from to obj ;
TUPLE: literal-transition < transition ;
TUPLE: class-transition < transition ;
TUPLE: default-transition < transition ;
TUPLE: literal obj ;
TUPLE: class obj ;
TUPLE: default ;
: <literal-transition> ( from to obj -- transition ) literal-transition boa ;
: <class-transition> ( from to obj -- transition ) class-transition boa ;
: <default-transition> ( from to -- transition ) t default-transition boa ;
TUPLE: transition-table transitions
literals classes defaults
start-state final-states ;
: <transition-table> ( -- transition-table )
transition-table new
H{ } clone >>transitions
H{ } clone >>final-states ;
: set-transition ( transition hash -- )
>r [ to>> ] [ obj>> ] [ from>> ] tri r>
2dup at* [ 2nip insert-at ]
[ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
: add-transition ( transition transition-table -- )
transitions>> set-transition ;

View File

@ -1,80 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs combinators combinators.lib kernel
math math.ranges quotations sequences regexp2.parser
regexp2.classes combinators.short-circuit assocs.lib
sequences.lib ;
IN: regexp2.traversal
TUPLE: dfa-traverser
dfa-table
last-state current-state
text
start-index current-index
matches ;
: <dfa-traverser> ( text regexp -- match )
dfa-table>>
dfa-traverser new
swap [ start-state>> >>current-state ] keep
>>dfa-table
swap >>text
0 >>start-index
0 >>current-index
V{ } clone >>matches ;
: final-state? ( dfa-traverser -- ? )
[ current-state>> ] [ dfa-table>> final-states>> ] bi
key? ;
: text-finished? ( dfa-traverser -- ? )
[ current-index>> ] [ text>> length ] bi >= ;
: save-final-state ( dfa-straverser -- )
[ current-index>> ] [ matches>> ] bi push ;
: match-done? ( dfa-traverser -- ? )
dup final-state? [
dup save-final-state
] when text-finished? ;
: increment-state ( dfa-traverser state -- dfa-traverser )
>r [ 1+ ] change-current-index
dup current-state>> >>last-state r>
first >>current-state ;
: match-failed ( dfa-traverser -- dfa-traverser )
V{ } clone >>matches ;
: match-literal ( transition from-state table -- to-state/f )
transitions>> [ at ] [ 2drop f ] if-at ;
: assoc-with ( param assoc quot -- assoc curry )
swapd [ [ -rot ] dip call ] 2curry ; inline
: match-class ( transition from-state table -- to-state/f )
transitions>> at* [
[ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
] [ drop ] if ;
: match-default ( transition from-state table -- to-state/f )
[ nip ] dip transitions>>
[ t swap [ drop f ] unless-at ] [ drop f ] if-at ;
: match-transition ( obj from-state dfa -- to-state/f )
{ [ match-literal ] [ match-class ] [ match-default ] } 3|| ;
: setup-match ( match -- obj state dfa-table )
{ current-index>> text>> current-state>> dfa-table>> } get-slots
[ nth ] 2dip ;
: do-match ( dfa-traverser -- dfa-traverser )
dup match-done? [
dup setup-match match-transition
[ increment-state do-match ] when*
] unless ;
: return-match ( dfa-traverser -- interval/f )
dup matches>>
[ drop f ]
[ [ start-index>> ] [ peek ] bi* 1 <range> ] if-empty ;

View File

@ -1,69 +0,0 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators.lib io kernel
math math.order namespaces regexp2.backend sequences
sequences.lib unicode.categories math.ranges fry
combinators.short-circuit ;
IN: regexp2.utils
: (while-changes) ( obj quot pred pred-ret -- obj )
! quot: ( obj -- obj' )
! pred: ( obj -- <=> )
>r >r dup slip r> pick over call r> dupd =
[ 3drop ] [ (while-changes) ] if ; inline
: while-changes ( obj quot pred -- obj' )
pick over call (while-changes) ; inline
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
: push1 ( obj -- ) input-stream get stream>> push ;
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
: stack ( -- obj ) current-regexp get stack>> ;
: change-whole-stack ( quot -- )
current-regexp get
[ stack>> swap call ] keep (>>stack) ; inline
: push-stack ( obj -- ) stack push ;
: pop-stack ( -- obj ) stack pop ;
: cut-out ( vector n -- vector' vector ) cut rest ;
ERROR: cut-stack-error ;
: cut-stack ( obj vector -- vector' vector )
tuck last-index [ cut-stack-error ] unless* cut-out swap ;
ERROR: bad-octal number ;
ERROR: bad-hex number ;
: check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ;
: check-hex ( hex -- hex ) dup number? [ bad-hex ] unless ;
: ascii? ( n -- ? ) 0 HEX: 7f between? ;
: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
: hex-digit? ( n -- ? )
[
[ decimal-digit? ]
[ CHAR: a CHAR: f between? ]
[ CHAR: A CHAR: F between? ]
] 1|| ;
: control-char? ( n -- ? )
[
[ 0 HEX: 1f between? ]
[ HEX: 7f = ]
] 1|| ;
: punct? ( n -- ? )
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
: c-identifier-char? ( ch -- ? )
[ [ alpha? ] [ CHAR: _ = ] ] 1|| ;
: java-blank? ( n -- ? )
{
CHAR: \s CHAR: \t CHAR: \n
HEX: b HEX: 7 CHAR: \r
} member? ;
: java-printable? ( n -- ? )
[ [ alpha? ] [ punct? ] ] 1|| ;