Merge branch 'master' into new_ui
commit
acd49fa557
|
@ -57,8 +57,10 @@ HELP: >upper
|
||||||
{ $values { "str" "a string" } { "upper" "a string" } }
|
{ $values { "str" "a string" } { "upper" "a string" } }
|
||||||
{ $description "Converts an ASCII string to upper case." } ;
|
{ $description "Converts an ASCII string to upper case." } ;
|
||||||
|
|
||||||
ARTICLE: "ascii" "ASCII character classes"
|
ARTICLE: "ascii" "ASCII"
|
||||||
"The " { $vocab-link "ascii" } " vocabulary implements traditional ASCII character classes:"
|
"The " { $vocab-link "ascii" } " vocabulary implements support for the legacy ASCII character set. Most applications should use " { $link "unicode" } " instead."
|
||||||
|
$nl
|
||||||
|
"ASCII character classes:"
|
||||||
{ $subsection blank? }
|
{ $subsection blank? }
|
||||||
{ $subsection letter? }
|
{ $subsection letter? }
|
||||||
{ $subsection LETTER? }
|
{ $subsection LETTER? }
|
||||||
|
@ -67,11 +69,10 @@ ARTICLE: "ascii" "ASCII character classes"
|
||||||
{ $subsection control? }
|
{ $subsection control? }
|
||||||
{ $subsection quotable? }
|
{ $subsection quotable? }
|
||||||
{ $subsection ascii? }
|
{ $subsection ascii? }
|
||||||
"ASCII case conversion is also implemented:"
|
"ASCII case conversion:"
|
||||||
{ $subsection ch>lower }
|
{ $subsection ch>lower }
|
||||||
{ $subsection ch>upper }
|
{ $subsection ch>upper }
|
||||||
{ $subsection >lower }
|
{ $subsection >lower }
|
||||||
{ $subsection >upper }
|
{ $subsection >upper } ;
|
||||||
"Modern applications should use Unicode 5.1 instead (" { $vocab-link "unicode.categories" } ")." ;
|
|
||||||
|
|
||||||
ABOUT: "ascii"
|
ABOUT: "ascii"
|
||||||
|
|
|
@ -1,41 +1,23 @@
|
||||||
! Copyright (C) 2005, 2008 Slava Pestov.
|
! Copyright (C) 2005, 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel math math.order sequences
|
USING: kernel math math.order sequences strings
|
||||||
combinators.short-circuit ;
|
combinators.short-circuit hints ;
|
||||||
IN: ascii
|
IN: ascii
|
||||||
|
|
||||||
: ascii? ( ch -- ? ) 0 127 between? ; inline
|
: ascii? ( ch -- ? ) 0 127 between? ; inline
|
||||||
|
|
||||||
: blank? ( ch -- ? ) " \t\n\r" member? ; inline
|
: blank? ( ch -- ? ) " \t\n\r" member? ; inline
|
||||||
|
|
||||||
: letter? ( ch -- ? ) CHAR: a CHAR: z between? ; inline
|
: letter? ( ch -- ? ) CHAR: a CHAR: z between? ; inline
|
||||||
|
|
||||||
: LETTER? ( ch -- ? ) CHAR: A CHAR: Z between? ; inline
|
: LETTER? ( ch -- ? ) CHAR: A CHAR: Z between? ; inline
|
||||||
|
|
||||||
: digit? ( ch -- ? ) CHAR: 0 CHAR: 9 between? ; inline
|
: digit? ( ch -- ? ) CHAR: 0 CHAR: 9 between? ; inline
|
||||||
|
|
||||||
: printable? ( ch -- ? ) CHAR: \s CHAR: ~ between? ; inline
|
: printable? ( ch -- ? ) CHAR: \s CHAR: ~ between? ; inline
|
||||||
|
: control? ( ch -- ? ) "\0\e\r\n\t\u000008\u00007f" member? ; inline
|
||||||
|
: quotable? ( ch -- ? ) { [ printable? ] [ "\"\\" member? not ] } 1&& ; inline
|
||||||
|
: Letter? ( ch -- ? ) { [ letter? ] [ LETTER? ] } 1|| ; inline
|
||||||
|
: alpha? ( ch -- ? ) { [ Letter? ] [ digit? ] } 1|| ; inline
|
||||||
|
: ch>lower ( ch -- lower ) dup LETTER? [ HEX: 20 + ] when ; inline
|
||||||
|
: >lower ( str -- lower ) [ ch>lower ] map ;
|
||||||
|
: ch>upper ( ch -- upper ) dup letter? [ HEX: 20 - ] when ; inline
|
||||||
|
: >upper ( str -- upper ) [ ch>upper ] map ;
|
||||||
|
|
||||||
: control? ( ch -- ? )
|
HINTS: >lower string ;
|
||||||
"\0\e\r\n\t\u000008\u00007f" member? ; inline
|
HINTS: >upper string ;
|
||||||
|
|
||||||
: quotable? ( ch -- ? )
|
|
||||||
dup printable? [ "\"\\" member? not ] [ drop f ] if ; inline
|
|
||||||
|
|
||||||
: Letter? ( ch -- ? )
|
|
||||||
[ [ letter? ] [ LETTER? ] ] 1|| ;
|
|
||||||
|
|
||||||
: alpha? ( ch -- ? )
|
|
||||||
[ [ Letter? ] [ digit? ] ] 1|| ;
|
|
||||||
|
|
||||||
: ch>lower ( ch -- lower )
|
|
||||||
dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ;
|
|
||||||
|
|
||||||
: >lower ( str -- lower )
|
|
||||||
[ ch>lower ] map ;
|
|
||||||
|
|
||||||
: ch>upper ( ch -- upper )
|
|
||||||
dup CHAR: a CHAR: z between? [ HEX: 20 - ] when ;
|
|
||||||
|
|
||||||
: >upper ( str -- upper )
|
|
||||||
[ ch>upper ] map ;
|
|
|
@ -1,4 +1,4 @@
|
||||||
! Copyright (C) 2004, 2008 Slava Pestov.
|
! Copyright (C) 2004, 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: alien arrays byte-arrays generic assocs hashtables assocs
|
USING: alien arrays byte-arrays generic assocs hashtables assocs
|
||||||
hashtables.private io io.binary io.files io.encodings.binary
|
hashtables.private io io.binary io.files io.encodings.binary
|
||||||
|
@ -8,9 +8,9 @@ vectors words quotations assocs system layouts splitting
|
||||||
grouping growable classes classes.builtin classes.tuple
|
grouping growable classes classes.builtin classes.tuple
|
||||||
classes.tuple.private words.private vocabs
|
classes.tuple.private words.private vocabs
|
||||||
vocabs.loader source-files definitions debugger
|
vocabs.loader source-files definitions debugger
|
||||||
quotations.private sequences.private combinators
|
quotations.private sequences.private combinators combinators.smart
|
||||||
math.order math.private accessors
|
math.order math.private accessors
|
||||||
slots.private compiler.units ;
|
slots.private compiler.units fry ;
|
||||||
IN: bootstrap.image
|
IN: bootstrap.image
|
||||||
|
|
||||||
: arch ( os cpu -- arch )
|
: arch ( os cpu -- arch )
|
||||||
|
@ -73,7 +73,7 @@ SYMBOL: objects
|
||||||
: put-object ( n obj -- ) (objects) set-at ;
|
: put-object ( n obj -- ) (objects) set-at ;
|
||||||
|
|
||||||
: cache-object ( obj quot -- value )
|
: cache-object ( obj quot -- value )
|
||||||
[ (objects) ] dip [ obj>> ] prepose cache ; inline
|
[ (objects) ] dip '[ obj>> @ ] cache ; inline
|
||||||
|
|
||||||
! Constants
|
! Constants
|
||||||
|
|
||||||
|
@ -95,7 +95,7 @@ SYMBOL: objects
|
||||||
SYMBOL: sub-primitives
|
SYMBOL: sub-primitives
|
||||||
|
|
||||||
: make-jit ( quot rc rt offset -- quad )
|
: make-jit ( quot rc rt offset -- quad )
|
||||||
{ [ { } make ] [ ] [ ] [ ] } spread 4array ; inline
|
[ [ { } make ] 3dip ] output>array ; inline
|
||||||
|
|
||||||
: jit-define ( quot rc rt offset name -- )
|
: jit-define ( quot rc rt offset name -- )
|
||||||
[ make-jit ] dip set ; inline
|
[ make-jit ] dip set ; inline
|
||||||
|
@ -524,11 +524,9 @@ M: quotation '
|
||||||
! Image output
|
! Image output
|
||||||
|
|
||||||
: (write-image) ( image -- )
|
: (write-image) ( image -- )
|
||||||
bootstrap-cell big-endian get [
|
bootstrap-cell big-endian get
|
||||||
[ >be write ] curry each
|
[ '[ _ >be write ] each ]
|
||||||
] [
|
[ '[ _ >le write ] each ] if ;
|
||||||
[ >le write ] curry each
|
|
||||||
] if ;
|
|
||||||
|
|
||||||
: write-image ( image -- )
|
: write-image ( image -- )
|
||||||
"Writing image to " write
|
"Writing image to " write
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
USE: unicode
|
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2008 Slava Pestov.
|
! Copyright (C) 2008, 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel sequences math ;
|
USING: kernel sequences math fry ;
|
||||||
IN: deques
|
IN: deques
|
||||||
|
|
||||||
GENERIC: push-front* ( obj deque -- node )
|
GENERIC: push-front* ( obj deque -- node )
|
||||||
|
@ -34,7 +34,8 @@ GENERIC: deque-empty? ( deque -- ? )
|
||||||
[ peek-back ] [ pop-back* ] bi ;
|
[ peek-back ] [ pop-back* ] bi ;
|
||||||
|
|
||||||
: slurp-deque ( deque quot -- )
|
: slurp-deque ( deque quot -- )
|
||||||
[ drop [ deque-empty? not ] curry ]
|
[ drop '[ _ deque-empty? not ] ]
|
||||||
[ [ pop-back ] prepose curry ] 2bi [ ] while ; inline
|
[ '[ _ pop-back @ ] ]
|
||||||
|
2bi [ ] while ; inline
|
||||||
|
|
||||||
MIXIN: deque
|
MIXIN: deque
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
! Copyright (C) 2007, 2008 Mackenzie Straight, Doug Coleman,
|
! Copyright (C) 2007, 2009 Mackenzie Straight, Doug Coleman,
|
||||||
! Slava Pestov.
|
! Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: combinators kernel math sequences accessors deques
|
USING: combinators kernel math sequences accessors deques
|
||||||
search-deques summary hashtables ;
|
search-deques summary hashtables fry ;
|
||||||
IN: dlists
|
IN: dlists
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
@ -64,7 +64,7 @@ M: dlist-node node-value obj>> ;
|
||||||
[ front>> ] dip (dlist-find-node) ; inline
|
[ front>> ] dip (dlist-find-node) ; inline
|
||||||
|
|
||||||
: dlist-each-node ( dlist quot -- )
|
: dlist-each-node ( dlist quot -- )
|
||||||
[ f ] compose dlist-find-node 2drop ; inline
|
'[ @ f ] dlist-find-node 2drop ; inline
|
||||||
|
|
||||||
: unlink-node ( dlist-node -- )
|
: unlink-node ( dlist-node -- )
|
||||||
dup prev>> over next>> set-prev-when
|
dup prev>> over next>> set-prev-when
|
||||||
|
@ -115,8 +115,7 @@ M: dlist pop-back* ( dlist -- )
|
||||||
normalize-front ;
|
normalize-front ;
|
||||||
|
|
||||||
: dlist-find ( dlist quot -- obj/f ? )
|
: dlist-find ( dlist quot -- obj/f ? )
|
||||||
[ obj>> ] prepose
|
'[ obj>> @ ] dlist-find-node [ obj>> t ] [ drop f f ] if ; inline
|
||||||
dlist-find-node [ obj>> t ] [ drop f f ] if ; inline
|
|
||||||
|
|
||||||
: dlist-contains? ( dlist quot -- ? )
|
: dlist-contains? ( dlist quot -- ? )
|
||||||
dlist-find nip ; inline
|
dlist-find nip ; inline
|
||||||
|
@ -143,7 +142,7 @@ M: dlist delete-node ( dlist-node dlist -- )
|
||||||
] if ; inline
|
] if ; inline
|
||||||
|
|
||||||
: delete-node-if ( dlist quot -- obj/f )
|
: delete-node-if ( dlist quot -- obj/f )
|
||||||
[ obj>> ] prepose delete-node-if* drop ; inline
|
'[ obj>> @ ] delete-node-if* drop ; inline
|
||||||
|
|
||||||
M: dlist clear-deque ( dlist -- )
|
M: dlist clear-deque ( dlist -- )
|
||||||
f >>front
|
f >>front
|
||||||
|
@ -151,7 +150,7 @@ M: dlist clear-deque ( dlist -- )
|
||||||
drop ;
|
drop ;
|
||||||
|
|
||||||
: dlist-each ( dlist quot -- )
|
: dlist-each ( dlist quot -- )
|
||||||
[ obj>> ] prepose dlist-each-node ; inline
|
'[ obj>> @ ] dlist-each-node ; inline
|
||||||
|
|
||||||
: dlist>seq ( dlist -- seq )
|
: dlist>seq ( dlist -- seq )
|
||||||
[ ] accumulator [ dlist-each ] dip ;
|
[ ] accumulator [ dlist-each ] dip ;
|
||||||
|
@ -159,8 +158,6 @@ M: dlist clear-deque ( dlist -- )
|
||||||
: 1dlist ( obj -- dlist ) <dlist> [ push-front ] keep ;
|
: 1dlist ( obj -- dlist ) <dlist> [ push-front ] keep ;
|
||||||
|
|
||||||
M: dlist clone
|
M: dlist clone
|
||||||
<dlist> [
|
<dlist> [ '[ _ push-back ] dlist-each ] keep ;
|
||||||
[ push-back ] curry dlist-each
|
|
||||||
] keep ;
|
|
||||||
|
|
||||||
INSTANCE: dlist deque
|
INSTANCE: dlist deque
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2005, 2009 Slava Pestov.
|
! Copyright (C) 2005, 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel math math.order strings arrays vectors sequences
|
USING: kernel math math.order strings arrays vectors sequences
|
||||||
sequences.private accessors ;
|
sequences.private accessors fry ;
|
||||||
IN: grouping
|
IN: grouping
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
@ -94,7 +94,7 @@ INSTANCE: sliced-clumps slice-chunking
|
||||||
[ first2-unsafe ] dip call
|
[ first2-unsafe ] dip call
|
||||||
] [
|
] [
|
||||||
[ 2 <sliced-clumps> ] dip
|
[ 2 <sliced-clumps> ] dip
|
||||||
[ first2-unsafe ] prepose all?
|
'[ first2-unsafe @ ] all?
|
||||||
] if
|
] if
|
||||||
] if ; inline
|
] if ; inline
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
! Copyright (C) 2008 Slava Pestov.
|
! Copyright (C) 2008, 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: io kernel macros make multiline namespaces parser
|
USING: io kernel macros make multiline namespaces parser
|
||||||
present sequences strings splitting fry accessors ;
|
present sequences strings splitting fry accessors ;
|
||||||
IN: interpolate
|
IN: interpolate
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
TUPLE: interpolate-var name ;
|
TUPLE: interpolate-var name ;
|
||||||
|
|
||||||
: (parse-interpolate) ( string -- )
|
: (parse-interpolate) ( string -- )
|
||||||
|
@ -20,21 +22,22 @@ TUPLE: interpolate-var name ;
|
||||||
: parse-interpolate ( string -- seq )
|
: parse-interpolate ( string -- seq )
|
||||||
[ (parse-interpolate) ] { } make ;
|
[ (parse-interpolate) ] { } make ;
|
||||||
|
|
||||||
MACRO: interpolate ( string -- )
|
: (interpolate) ( string quot -- quot' )
|
||||||
parse-interpolate [
|
[ parse-interpolate ] dip '[
|
||||||
dup interpolate-var?
|
dup interpolate-var?
|
||||||
[ name>> '[ _ get present write ] ]
|
[ name>> @ '[ _ @ present write ] ]
|
||||||
[ '[ _ write ] ]
|
[ '[ _ write ] ]
|
||||||
if
|
if
|
||||||
] map [ ] join ;
|
] map [ ] join ; inline
|
||||||
|
|
||||||
: interpolate-locals ( string -- quot )
|
: interpolate-locals ( string -- quot )
|
||||||
parse-interpolate [
|
[ search [ ] ] (interpolate) ;
|
||||||
dup interpolate-var?
|
|
||||||
[ name>> search '[ _ present write ] ]
|
|
||||||
[ '[ _ write ] ]
|
|
||||||
if
|
|
||||||
] map [ ] join ;
|
|
||||||
|
|
||||||
: I[ "]I" parse-multiline-string
|
PRIVATE>
|
||||||
interpolate-locals parsed \ call parsed ; parsing
|
|
||||||
|
MACRO: interpolate ( string -- )
|
||||||
|
[ [ get ] ] (interpolate) ;
|
||||||
|
|
||||||
|
: I[
|
||||||
|
"]I" parse-multiline-string
|
||||||
|
interpolate-locals over push-all ; parsing
|
||||||
|
|
|
@ -491,3 +491,7 @@ M:: integer lambda-method-forget-test ( a -- b ) ;
|
||||||
[ 10 ] [
|
[ 10 ] [
|
||||||
[| | 0 '[ [let | A [ 10 ] | A _ + ] ] call ] call
|
[| | 0 '[ [let | A [ 10 ] | A _ + ] ] call ] call
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
! Discovered by littledan
|
||||||
|
[ "bar" ] [ [let | a [ [let | foo [ "bar" ] | foo ] ] | a ] ] unit-test
|
||||||
|
[ 10 ] [ [let | a [ 10 ] | [let | b [ a ] | b ] ] ] unit-test
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2007, 2008 Slava Pestov, Eduardo Cavazos.
|
! Copyright (C) 2007, 2009 Slava Pestov, Eduardo Cavazos.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays combinators effects.parser
|
USING: accessors arrays combinators effects.parser
|
||||||
generic.parser kernel lexer locals.errors
|
generic.parser kernel lexer locals.errors fry
|
||||||
locals.rewrite.closures locals.types make namespaces parser
|
locals.rewrite.closures locals.types make namespaces parser
|
||||||
quotations sequences splitting words vocabs.parser ;
|
quotations sequences splitting words vocabs.parser ;
|
||||||
IN: locals.parser
|
IN: locals.parser
|
||||||
|
@ -56,19 +56,21 @@ SYMBOL: in-lambda?
|
||||||
(parse-bindings)
|
(parse-bindings)
|
||||||
] [ 2drop ] if ;
|
] [ 2drop ] if ;
|
||||||
|
|
||||||
|
: with-bindings ( quot -- words assoc )
|
||||||
|
'[
|
||||||
|
in-lambda? on
|
||||||
|
_ H{ } make-assoc
|
||||||
|
] { } make swap ; inline
|
||||||
|
|
||||||
: parse-bindings ( end -- bindings vars )
|
: parse-bindings ( end -- bindings vars )
|
||||||
[
|
[ (parse-bindings) ] with-bindings ;
|
||||||
[ (parse-bindings) ] H{ } make-assoc
|
|
||||||
] { } make swap ;
|
|
||||||
|
|
||||||
: parse-bindings* ( end -- words assoc )
|
: parse-bindings* ( end -- words assoc )
|
||||||
[
|
|
||||||
[
|
[
|
||||||
namespace push-locals
|
namespace push-locals
|
||||||
(parse-bindings)
|
(parse-bindings)
|
||||||
namespace pop-locals
|
namespace pop-locals
|
||||||
] { } make-assoc
|
] with-bindings ;
|
||||||
] { } make swap ;
|
|
||||||
|
|
||||||
: (parse-wbindings) ( end -- )
|
: (parse-wbindings) ( end -- )
|
||||||
dup parse-binding dup [
|
dup parse-binding dup [
|
||||||
|
@ -77,9 +79,7 @@ SYMBOL: in-lambda?
|
||||||
] [ 2drop ] if ;
|
] [ 2drop ] if ;
|
||||||
|
|
||||||
: parse-wbindings ( end -- bindings vars )
|
: parse-wbindings ( end -- bindings vars )
|
||||||
[
|
[ (parse-wbindings) ] with-bindings ;
|
||||||
[ (parse-wbindings) ] H{ } make-assoc
|
|
||||||
] { } make swap ;
|
|
||||||
|
|
||||||
: parse-locals ( -- vars assoc )
|
: parse-locals ( -- vars assoc )
|
||||||
"(" expect ")" parse-effect
|
"(" expect ")" parse-effect
|
||||||
|
@ -88,8 +88,8 @@ SYMBOL: in-lambda?
|
||||||
|
|
||||||
: parse-locals-definition ( word -- word quot )
|
: parse-locals-definition ( word -- word quot )
|
||||||
parse-locals \ ; (parse-lambda) <lambda>
|
parse-locals \ ; (parse-lambda) <lambda>
|
||||||
2dup "lambda" set-word-prop
|
[ "lambda" set-word-prop ]
|
||||||
rewrite-closures dup length 1 = [ first ] [ bad-lambda-rewrite ] if ;
|
[ rewrite-closures dup length 1 = [ first ] [ bad-lambda-rewrite ] if ] 2bi ;
|
||||||
|
|
||||||
: (::) ( -- word def ) CREATE-WORD parse-locals-definition ;
|
: (::) ( -- word def ) CREATE-WORD parse-locals-definition ;
|
||||||
|
|
||||||
|
|
|
@ -287,9 +287,13 @@ IN: regexp-tests
|
||||||
[ { "1" "2" "3" "4" } ]
|
[ { "1" "2" "3" "4" } ]
|
||||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
[ { "1" "2" "3" "4" } ]
|
[ { "1" "2" "3" "4" "" } ]
|
||||||
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
|
[ { "" } ] [ "" R/ =/ re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
|
[ { "a" "" } ] [ "a=" R/ =/ re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
[ { "ABC" "DEF" "GHI" } ]
|
[ { "ABC" "DEF" "GHI" } ]
|
||||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test
|
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test
|
||||||
|
|
||||||
|
@ -299,16 +303,16 @@ IN: regexp-tests
|
||||||
[ 0 ]
|
[ 0 ]
|
||||||
[ "123" R/ [A-Z]+/ count-matches ] unit-test
|
[ "123" R/ [A-Z]+/ count-matches ] unit-test
|
||||||
|
|
||||||
[ "1.2.3.4" ]
|
[ "1.2.3.4." ]
|
||||||
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test
|
[ "1ABC2DEF3GHI4JK" R/ [A-Z]+/ "." re-replace ] unit-test
|
||||||
|
|
||||||
[ "-- title --" ] [ "== title ==" "=" <regexp> "-" re-replace ] unit-test
|
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
||||||
|
|
||||||
/*
|
/*
|
||||||
! FIXME
|
! FIXME
|
||||||
[ f ] [ "ab" "a(?!b)" <regexp> first-match ] unit-test
|
[ f ] [ "ab" "a(?!b)" <regexp> first-match ] unit-test
|
||||||
[ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
|
[ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||||
! [ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||||
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
|
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
|
||||||
[ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match >string ] unit-test
|
[ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match >string ] unit-test
|
||||||
|
|
|
@ -61,8 +61,11 @@ IN: regexp
|
||||||
dupd first-match
|
dupd first-match
|
||||||
[ split1-slice swap ] [ "" like f swap ] if* ;
|
[ split1-slice swap ] [ "" like f swap ] if* ;
|
||||||
|
|
||||||
|
: (re-split) ( string regexp -- )
|
||||||
|
over [ [ re-cut , ] keep (re-split) ] [ 2drop ] if ;
|
||||||
|
|
||||||
: re-split ( string regexp -- seq )
|
: re-split ( string regexp -- seq )
|
||||||
[ dup length 0 > ] swap '[ _ re-cut ] [ ] produce nip ;
|
[ (re-split) ] { } make ;
|
||||||
|
|
||||||
: re-replace ( string regexp replacement -- result )
|
: re-replace ( string regexp replacement -- result )
|
||||||
[ re-split ] dip join ;
|
[ re-split ] dip join ;
|
||||||
|
|
|
@ -1,49 +1,59 @@
|
||||||
! Copyright (C) 2009 Your name.
|
! Copyright (C) 2009 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: help.markup help.syntax kernel ;
|
USING: help.markup help.syntax kernel ;
|
||||||
IN: unicode.categories
|
IN: unicode.categories
|
||||||
|
|
||||||
HELP: LETTER
|
HELP: LETTER
|
||||||
{ $class-description "The class of upper cased letters" } ;
|
{ $class-description "The class of upper cased letters." } ;
|
||||||
|
|
||||||
HELP: Letter
|
HELP: Letter
|
||||||
{ $class-description "The class of letters" } ;
|
{ $class-description "The class of letters." } ;
|
||||||
|
|
||||||
HELP: alpha
|
HELP: alpha
|
||||||
{ $class-description "The class of code points which are alphanumeric" } ;
|
{ $class-description "The class of alphanumeric characters." } ;
|
||||||
|
|
||||||
HELP: blank
|
HELP: blank
|
||||||
{ $class-description "The class of code points which are whitespace" } ;
|
{ $class-description "The class of whitespace characters." } ;
|
||||||
|
|
||||||
HELP: character
|
HELP: character
|
||||||
{ $class-description "The class of numbers which are pre-defined Unicode code points" } ;
|
{ $class-description "The class of pre-defined Unicode code points." } ;
|
||||||
|
|
||||||
HELP: control
|
HELP: control
|
||||||
{ $class-description "The class of control characters" } ;
|
{ $class-description "The class of control characters." } ;
|
||||||
|
|
||||||
HELP: digit
|
HELP: digit
|
||||||
{ $class-description "The class of code coints which are digits" } ;
|
{ $class-description "The class of digits." } ;
|
||||||
|
|
||||||
HELP: letter
|
HELP: letter
|
||||||
{ $class-description "The class of code points which are lower-cased letters" } ;
|
{ $class-description "The class of lower-cased letters." } ;
|
||||||
|
|
||||||
HELP: printable
|
HELP: printable
|
||||||
{ $class-description "The class of characters which are printable, as opposed to being control or formatting characters" } ;
|
{ $class-description "The class of characters which are printable, as opposed to being control or formatting characters." } ;
|
||||||
|
|
||||||
HELP: uncased
|
HELP: uncased
|
||||||
{ $class-description "The class of letters which don't have a case" } ;
|
{ $class-description "The class of letters which don't have a case." } ;
|
||||||
|
|
||||||
ARTICLE: "unicode.categories" "Character classes"
|
ARTICLE: "unicode.categories" "Character classes"
|
||||||
{ $vocab-link "unicode.categories" } " is a vocabulary which provides predicates for determining if a code point has a particular property, for example being a lower cased letter. These should be used in preference to the " { $vocab-link "ascii" } " equivalents in most cases. Below are links to classes of characters, but note that each of these also has a predicate defined, which is usually more useful."
|
"The " { $vocab-link "unicode.categories" } " vocabulary implements predicates for determining if a code point has a particular property, for example being a lower cased letter. These should be used in preference to the " { $vocab-link "ascii" } " equivalents in most cases. Each character class has an associated predicate word."
|
||||||
{ $subsection blank }
|
{ $subsection blank }
|
||||||
|
{ $subsection blank? }
|
||||||
{ $subsection letter }
|
{ $subsection letter }
|
||||||
|
{ $subsection letter? }
|
||||||
{ $subsection LETTER }
|
{ $subsection LETTER }
|
||||||
|
{ $subsection LETTER? }
|
||||||
{ $subsection Letter }
|
{ $subsection Letter }
|
||||||
|
{ $subsection Letter? }
|
||||||
{ $subsection digit }
|
{ $subsection digit }
|
||||||
|
{ $subsection digit? }
|
||||||
{ $subsection printable }
|
{ $subsection printable }
|
||||||
|
{ $subsection printable? }
|
||||||
{ $subsection alpha }
|
{ $subsection alpha }
|
||||||
|
{ $subsection alpha? }
|
||||||
{ $subsection control }
|
{ $subsection control }
|
||||||
|
{ $subsection control? }
|
||||||
{ $subsection uncased }
|
{ $subsection uncased }
|
||||||
{ $subsection character } ;
|
{ $subsection uncased? }
|
||||||
|
{ $subsection character }
|
||||||
|
{ $subsection character? } ;
|
||||||
|
|
||||||
ABOUT: "unicode.categories"
|
ABOUT: "unicode.categories"
|
||||||
|
|
|
@ -4,7 +4,13 @@ IN: unicode.normalize
|
||||||
ABOUT: "unicode.normalize"
|
ABOUT: "unicode.normalize"
|
||||||
|
|
||||||
ARTICLE: "unicode.normalize" "Unicode normalization"
|
ARTICLE: "unicode.normalize" "Unicode normalization"
|
||||||
"The " { $vocab-link "unicode.normalize" "unicode.normalize" } " vocabulary defines words for normalizing Unicode strings. In Unicode, it is often possible to have multiple sequences of characters which really represent exactly the same thing. For example, to represent e with an acute accent above, there are two possible strings: \"e\\u000301\" (the e character, followed by the combining acute accent character) and \"\\u0000e9\" (a single character, e with an acute accent). There are four normalization forms: NFD, NFC, NFKD, and NFKC. Basically, in NFD and NFKD, everything is expanded, whereas in NFC and NFKC, everything is contracted. In NFKD and NFKC, more things are expanded and contracted. This is a process which loses some information, so it should be done only with care. Most of the world uses NFC to communicate, but for many purposes, NFD/NFKD is easier to process. For more information, see Unicode Standard Annex #15 and section 3 of the Unicode standard."
|
"The " { $vocab-link "unicode.normalize" "unicode.normalize" } " vocabulary defines words for normalizing Unicode strings."
|
||||||
|
$nl
|
||||||
|
"In Unicode, it is often possible to have multiple sequences of characters which really represent exactly the same thing. For example, to represent e with an acute accent above, there are two possible strings: " { $snippet "\"e\\u000301\"" } " (the e character, followed by the combining acute accent character) and " { $snippet "\"\\u0000e9\"" } " (a single character, e with an acute accent)."
|
||||||
|
$nl
|
||||||
|
"There are four normalization forms: NFD, NFC, NFKD, and NFKC. Basically, in NFD and NFKD, everything is expanded, whereas in NFC and NFKC, everything is contracted. In NFKD and NFKC, more things are expanded and contracted. This is a process which loses some information, so it should be done only with care."
|
||||||
|
$nl
|
||||||
|
"Most of the world uses NFC to communicate, but for many purposes, NFD/NFKD is easier to process. For more information, see Unicode Standard Annex #15 and section 3 of the Unicode standard."
|
||||||
{ $subsection nfc }
|
{ $subsection nfc }
|
||||||
{ $subsection nfd }
|
{ $subsection nfd }
|
||||||
{ $subsection nfkc }
|
{ $subsection nfkc }
|
||||||
|
@ -12,16 +18,16 @@ ARTICLE: "unicode.normalize" "Unicode normalization"
|
||||||
|
|
||||||
HELP: nfc
|
HELP: nfc
|
||||||
{ $values { "string" string } { "nfc" "a string in NFC" } }
|
{ $values { "string" string } { "nfc" "a string in NFC" } }
|
||||||
{ $description "Converts a string to Normalization Form C" } ;
|
{ $description "Converts a string to Normalization Form C." } ;
|
||||||
|
|
||||||
HELP: nfd
|
HELP: nfd
|
||||||
{ $values { "string" string } { "nfd" "a string in NFD" } }
|
{ $values { "string" string } { "nfd" "a string in NFD" } }
|
||||||
{ $description "Converts a string to Normalization Form D" } ;
|
{ $description "Converts a string to Normalization Form D." } ;
|
||||||
|
|
||||||
HELP: nfkc
|
HELP: nfkc
|
||||||
{ $values { "string" string } { "nfkc" "a string in NFKC" } }
|
{ $values { "string" string } { "nfkc" "a string in NFKC" } }
|
||||||
{ $description "Converts a string to Normalization Form KC" } ;
|
{ $description "Converts a string to Normalization Form KC." } ;
|
||||||
|
|
||||||
HELP: nfkd
|
HELP: nfkd
|
||||||
{ $values { "string" string } { "nfkd" "a string in NFKD" } }
|
{ $values { "string" string } { "nfkd" "a string in NFKD" } }
|
||||||
{ $description "Converts a string to Normalization Form KD" } ;
|
{ $description "Converts a string to Normalization Form KD." } ;
|
||||||
|
|
|
@ -1,8 +1,14 @@
|
||||||
USING: help.markup help.syntax ;
|
USING: help.markup help.syntax strings ;
|
||||||
IN: unicode
|
IN: unicode
|
||||||
|
|
||||||
ARTICLE: "unicode" "Unicode"
|
ARTICLE: "unicode" "Unicode"
|
||||||
"Unicode is a set of characters, or " { $emphasis "code points" } " covering what's used in most world writing systems. Any Factor string can hold any of these code points transparently; a factor string is a sequence of Unicode code points. Unicode is accompanied by several standard algorithms for common operations like encoding in files, capitalizing a string, finding the boundaries between words, etc. When a programmer is faced with a string manipulation problem, where the string represents human language, a Unicode algorithm is often much better than the naive one. This is not in terms of efficiency, but rather internationalization. Even English text that remains in ASCII is better served by the Unicode collation algorithm than a naive algorithm. The Unicode algorithms implemented here are:"
|
"The " { $vocab-link "unicode" } " vocabulary and its sub-vocabularies implement support for the Unicode 5.1 character set."
|
||||||
|
$nl
|
||||||
|
"The Unicode character set contains most of the world's writing systems. Unicode is intended as a replacement for, and is a superset of, such legacy character sets as ASCII, Latin1, MacRoman, and so on. Unicode characters are called " { $emphasis "code points" } "; Factor's " { $link "strings" } " are sequences of code points."
|
||||||
|
$nl
|
||||||
|
"The Unicode character set is accompanied by several standard algorithms for common operations like encoding text in files, capitalizing a string, finding the boundaries between words, and so on."
|
||||||
|
$nl
|
||||||
|
"The Unicode algorithms implemented by the " { $vocab-link "unicode" } " vocabulary are:"
|
||||||
{ $vocab-subsection "Case mapping" "unicode.case" }
|
{ $vocab-subsection "Case mapping" "unicode.case" }
|
||||||
{ $vocab-subsection "Collation and weak comparison" "unicode.collation" }
|
{ $vocab-subsection "Collation and weak comparison" "unicode.collation" }
|
||||||
{ $vocab-subsection "Character classes" "unicode.categories" }
|
{ $vocab-subsection "Character classes" "unicode.categories" }
|
||||||
|
@ -11,6 +17,6 @@ ARTICLE: "unicode" "Unicode"
|
||||||
"The following are mostly for internal use:"
|
"The following are mostly for internal use:"
|
||||||
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
|
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
|
||||||
{ $vocab-subsection "Unicode data tables" "unicode.data" }
|
{ $vocab-subsection "Unicode data tables" "unicode.data" }
|
||||||
{ $see-also "io.encodings" } ;
|
{ $see-also "ascii" "io.encodings" } ;
|
||||||
|
|
||||||
ABOUT: "unicode"
|
ABOUT: "unicode"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
USING: kernel alien.c-types alien.strings sequences math alien.syntax unix
|
USING: kernel alien.c-types alien.strings sequences math alien.syntax unix
|
||||||
vectors kernel namespaces continuations threads assocs vectors
|
vectors kernel namespaces continuations threads assocs vectors
|
||||||
io.backend.unix io.encodings.utf8 unix.utilities ;
|
io.backend.unix io.encodings.utf8 unix.utilities fry ;
|
||||||
IN: unix.process
|
IN: unix.process
|
||||||
|
|
||||||
! Low-level Unix process launching utilities. These are used
|
! Low-level Unix process launching utilities. These are used
|
||||||
|
@ -36,7 +36,7 @@ FUNCTION: int execve ( char* path, char** argv, char** envp ) ;
|
||||||
[ [ first ] [ ] bi ] dip exec-with-env ;
|
[ [ first ] [ ] bi ] dip exec-with-env ;
|
||||||
|
|
||||||
: with-fork ( child parent -- )
|
: with-fork ( child parent -- )
|
||||||
[ [ fork-process dup zero? ] dip [ drop ] prepose ] dip
|
[ [ fork-process dup zero? ] dip '[ drop @ ] ] dip
|
||||||
if ; inline
|
if ; inline
|
||||||
|
|
||||||
CONSTANT: SIGKILL 9
|
CONSTANT: SIGKILL 9
|
||||||
|
|
|
@ -22,9 +22,8 @@ $nl
|
||||||
{ $subsection 1string }
|
{ $subsection 1string }
|
||||||
"Since strings are sequences, basic string manipulation can be performed using sequence operations (" { $link "sequences" } "). More advanced functionality can be found in other vocabularies, including but not limited to:"
|
"Since strings are sequences, basic string manipulation can be performed using sequence operations (" { $link "sequences" } "). More advanced functionality can be found in other vocabularies, including but not limited to:"
|
||||||
{ $list
|
{ $list
|
||||||
{ { $vocab-link "ascii" } " - traditional ASCII character classes" }
|
{ { $link "ascii" } " - ASCII algorithms for interoperability with legacy applications" }
|
||||||
{ { $vocab-link "unicode.categories" } " - Unicode character classes" }
|
{ { $link "unicode" } " - Unicode algorithms for modern multilingual applications" }
|
||||||
{ { $vocab-link "unicode.case" } " - Unicode case conversion" }
|
|
||||||
{ { $vocab-link "regexp" } " - regular expressions" }
|
{ { $vocab-link "regexp" } " - regular expressions" }
|
||||||
{ { $vocab-link "peg" } " - parser expression grammars" }
|
{ { $vocab-link "peg" } " - parser expression grammars" }
|
||||||
} ;
|
} ;
|
||||||
|
|
Loading…
Reference in New Issue