From cf160c60ea38aaac4cf005e77eb2118a2345f664 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 17:48:17 -0600 Subject: [PATCH 01/11] use input> "Minneapolis" = ] unit-test diff --git a/extra/usa-cities/usa-cities.factor b/extra/usa-cities/usa-cities.factor index deb3e15845..25ec30ac78 100644 --- a/extra/usa-cities/usa-cities.factor +++ b/extra/usa-cities/usa-cities.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: io.files io.encodings.ascii sequences generalizations math.parser combinators kernel memoize csv summary -words accessors math.order binary-search ; +words accessors math.order binary-search combinators.smart ; IN: usa-cities SINGLETONS: AK AL AR AS AZ CA CO CT DC DE FL GA HI IA ID IL IN @@ -30,15 +30,17 @@ first-zip name state latitude longitude gmt-offset dst-offset ; MEMO: cities ( -- seq ) "resource:extra/usa-cities/zipcode.csv" ascii csv rest-slice [ - 7 firstn { - [ string>number ] - [ ] - [ string>state ] - [ string>number ] - [ string>number ] - [ string>number ] - [ string>number ] - } spread city boa + [ + { + [ string>number ] + [ ] + [ string>state ] + [ string>number ] + [ string>number ] + [ string>number ] + [ string>number ] + } spread + ] input Date: Thu, 8 Jan 2009 17:53:48 -0600 Subject: [PATCH 02/11] use smart combinator in geo-ip --- extra/geo-ip/geo-ip.factor | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/extra/geo-ip/geo-ip.factor b/extra/geo-ip/geo-ip.factor index c878306d7d..ad6302ca55 100644 --- a/extra/geo-ip/geo-ip.factor +++ b/extra/geo-ip/geo-ip.factor @@ -4,7 +4,7 @@ USING: kernel sequences io.files io.files.temp io.launcher io.pathnames io.encodings.ascii io.streams.string http.client generalizations combinators math.parser math.vectors math.intervals interval-maps memoize csv accessors assocs -strings math splitting grouping arrays ; +strings math splitting grouping arrays combinators.smart ; IN: geo-ip : db-path ( -- path ) "IpToCountry.csv" temp-file ; @@ -20,15 +20,17 @@ IN: geo-ip TUPLE: ip-entry from to registry assigned city cntry country ; : parse-ip-entry ( row -- ip-entry ) - 7 firstn { - [ string>number ] - [ string>number ] - [ ] - [ ] - [ ] - [ ] - [ ] - } spread ip-entry boa ; + [ + { + [ string>number ] + [ string>number ] + [ ] + [ ] + [ ] + [ ] + [ ] + } spread + ] input Date: Thu, 8 Jan 2009 17:55:04 -0600 Subject: [PATCH 03/11] use smart combinators in tools.files.unix --- basis/tools/files/unix/unix.factor | 50 +++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/basis/tools/files/unix/unix.factor b/basis/tools/files/unix/unix.factor index 507c689a55..9757db171a 100755 --- a/basis/tools/files/unix/unix.factor +++ b/basis/tools/files/unix/unix.factor @@ -3,12 +3,9 @@ USING: accessors combinators kernel system unicode.case io.files io.files.info io.files.info.unix tools.files generalizations strings arrays sequences math.parser unix.groups unix.users -tools.files.private unix.stat math fry macros ; +tools.files.private unix.stat math fry macros combinators.smart ; IN: tools.files.unix -MACRO: cleave>array ( array -- quot ) - dup length '[ _ cleave _ narray ] ; - string ( str bools -- str' ) @@ -20,18 +17,20 @@ MACRO: cleave>array ( array -- quot ) } case ; : permissions-string ( permissions -- str ) - { - [ type>> file-type>ch 1string ] - [ user-read? read>string ] - [ user-write? write>string ] - [ [ uid? ] [ user-execute? ] bi 2array "s" unix-execute>string ] - [ group-read? read>string ] - [ group-write? write>string ] - [ [ gid? ] [ group-execute? ] bi 2array "s" unix-execute>string ] - [ other-read? read>string ] - [ other-write? write>string ] - [ [ sticky? ] [ other-execute? ] bi 2array "t" unix-execute>string ] - } cleave>array concat ; + [ + { + [ type>> file-type>ch 1string ] + [ user-read? read>string ] + [ user-write? write>string ] + [ [ uid? ] [ user-execute? ] bi 2array "s" unix-execute>string ] + [ group-read? read>string ] + [ group-write? write>string ] + [ [ gid? ] [ group-execute? ] bi 2array "s" unix-execute>string ] + [ other-read? read>string ] + [ other-write? write>string ] + [ [ sticky? ] [ other-execute? ] bi 2array "t" unix-execute>string ] + } cleave + ] output>array concat ; : mode>symbol ( mode -- ch ) S_IFMT bitand @@ -48,15 +47,16 @@ MACRO: cleave>array ( array -- quot ) M: unix (directory.) ( path -- lines ) [ [ [ - dup file-info - { - [ permissions-string ] - [ nlink>> number>string 3 CHAR: \s pad-left ] - [ uid>> user-name ] - [ gid>> group-name ] - [ size>> number>string 15 CHAR: \s pad-left ] - [ modified>> ls-timestamp ] - } cleave>array swap suffix " " join + dup file-info [ + { + [ permissions-string ] + [ nlink>> number>string 3 CHAR: \s pad-left ] + [ uid>> user-name ] + [ gid>> group-name ] + [ size>> number>string 15 CHAR: \s pad-left ] + [ modified>> ls-timestamp ] + } cleave + ] output>array swap suffix " " join ] map ] with-group-cache ] with-user-cache ; From a773e592164c4197aef519ab5424e6bc58bc466c Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 17:56:03 -0600 Subject: [PATCH 04/11] username -> user-name --- basis/io/files/info/unix/unix-docs.factor | 8 ++++---- basis/io/files/info/unix/unix.factor | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/basis/io/files/info/unix/unix-docs.factor b/basis/io/files/info/unix/unix-docs.factor index 0dff2e4419..a6ee2b9597 100644 --- a/basis/io/files/info/unix/unix-docs.factor +++ b/basis/io/files/info/unix/unix-docs.factor @@ -22,11 +22,11 @@ HELP: file-permissions { "n" integer } } { $description "Returns the Unix file permissions for a given file." } ; -HELP: file-username +HELP: file-user-name { $values { "path" "a pathname string" } { "string" string } } -{ $description "Returns the username for a given file." } ; +{ $description "Returns the user-name for a given file." } ; HELP: file-user-id { $values @@ -110,7 +110,7 @@ HELP: set-file-times HELP: set-file-user { $values { "path" "a pathname string" } { "string/id" "a string or a user id" } } -{ $description "Sets a file's user id from the given user id or username." } ; +{ $description "Sets a file's user id from the given user id or user-name." } ; HELP: set-file-modified-time { $values @@ -258,7 +258,7 @@ ARTICLE: "unix-file-timestamps" "Unix file timestamps" ARTICLE: "unix-file-ids" "Unix file user and group ids" "Reading file user data:" { $subsection file-user-id } -{ $subsection file-username } +{ $subsection file-user-name } "Setting file user data:" { $subsection set-file-user } "Reading file group data:" diff --git a/basis/io/files/info/unix/unix.factor b/basis/io/files/info/unix/unix.factor index 66b95db144..9287e7f4ad 100644 --- a/basis/io/files/info/unix/unix.factor +++ b/basis/io/files/info/unix/unix.factor @@ -243,8 +243,8 @@ M: string set-file-group ( path string -- ) : file-user-id ( path -- uid ) normalize-path file-info uid>> ; -: file-username ( path -- string ) - file-user-id username ; +: file-user-name ( path -- string ) + file-user-id user-name ; : file-group-id ( path -- gid ) normalize-path file-info gid>> ; From cf3473cc911c5f0b404675217d2196e1a080f611 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 8 Jan 2009 17:56:52 -0600 Subject: [PATCH 05/11] Making normalization and case conversion faster --- basis/unicode/case/case.factor | 2 +- basis/unicode/data/data.factor | 9 +++- basis/unicode/normalize/normalize.factor | 56 ++++++++++++++---------- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor index b0472cd9cb..99278cd72e 100644 --- a/basis/unicode/case/case.factor +++ b/basis/unicode/case/case.factor @@ -18,7 +18,7 @@ SYMBOL: locale ! Just casing locale, or overall? char ( name -- char ) name-map at ; : char>name ( char -- name ) name-map value-at ; : property? ( char property -- ? ) properties at interval-key? ; @@ -183,6 +183,13 @@ load-data { [ process-category to: category-map ] } cleave +: postprocess-class ( -- ) + combine-map [ [ second ] map ] map concat + [ combining-class not ] filter + [ 0 swap class-map set-at ] each ; + +postprocess-class + load-special-casing to: special-casing load-properties to: properties diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index 58ce412a2e..c8d0eb3f7d 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: sequences namespaces make unicode.data kernel math arrays -locals sorting.insertion accessors assocs math.order ; +locals sorting.insertion accessors assocs math.order combinators ; IN: unicode.normalize jamo % ] - [ dup quot call [ % ] [ , ] ?if ] if - ] each - ] "" make - dup reorder - ] if ; inline + [ + string [ + dup hangul? [ hangul>jamo % ] + [ dup quot call [ % ] [ , ] ?if ] if + ] each + ] "" make + dup reorder ; + +: with-string ( str quot -- str ) + over aux>> [ call ] [ drop ] if ; inline + +: (nfd) ( string -- nfd ) + [ canonical-entry ] decompose ; + +: (nfkd) ( string -- nfkd ) + [ compatibility-entry ] decompose ; PRIVATE> : nfd ( string -- nfd ) - [ canonical-entry ] decompose ; + [ (nfd) ] with-string ; : nfkd ( string -- nfkd ) - [ compatibility-entry ] decompose ; + [ (nfkd) ] with-string ; : string-append ( s1 s2 -- string ) [ append ] keep @@ -138,20 +142,26 @@ DEFER: compose-iter : compose-iter ( last-class -- ) current [ - dup combining-class - [ try-compose to compose-iter ] - [ swap [ drop ] [ try-noncombining ] if ] if* + dup combining-class { + { f [ 2drop ] } + { 0 [ swap [ drop ] [ try-noncombining ] if ] } + [ try-compose to compose-iter ] + } case ] [ drop ] if* ; : ?new-after ( -- ) after [ dup empty? [ drop SBUF" " clone ] unless ] change ; +: compose-combining ( ch -- ) + char set to ?new-after + f compose-iter + char get , after get % ; + : (compose) ( -- ) current [ dup jamo? [ drop compose-jamo ] [ - char set to ?new-after - f compose-iter - char get , after get % + 1 get-str combining-class + [ compose-combining ] [ , to ] if ] if (compose) ] when* ; @@ -166,7 +176,7 @@ DEFER: compose-iter PRIVATE> : nfc ( string -- nfc ) - nfd combine ; + [ (nfd) combine ] with-string ; : nfkc ( string -- nfkc ) - nfkd combine ; + [ (nfkd) combine ] with-string ; From fe92608a1f753c83f53b9a91f7ba6df05f1ea919 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 18:56:49 -0600 Subject: [PATCH 06/11] Add combinators.smart --- basis/combinators/smart/authors.txt | 1 + basis/combinators/smart/smart-docs.factor | 91 ++++++++++++++++++++++ basis/combinators/smart/smart-tests.factor | 21 +++++ basis/combinators/smart/smart.factor | 22 ++++++ 4 files changed, 135 insertions(+) create mode 100644 basis/combinators/smart/authors.txt create mode 100644 basis/combinators/smart/smart-docs.factor create mode 100644 basis/combinators/smart/smart-tests.factor create mode 100644 basis/combinators/smart/smart.factor diff --git a/basis/combinators/smart/authors.txt b/basis/combinators/smart/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/basis/combinators/smart/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/basis/combinators/smart/smart-docs.factor b/basis/combinators/smart/smart-docs.factor new file mode 100644 index 0000000000..69ec3e7013 --- /dev/null +++ b/basis/combinators/smart/smart-docs.factor @@ -0,0 +1,91 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: help.markup help.syntax kernel quotations math sequences +multiline ; +IN: combinators.smart + +HELP: inputarray +{ $values + { "quot" quotation } + { "newquot" quotation } +} +{ $description "Infers the number or outputs from the quotation and constructs an array from those outputs." } +{ $examples + { $example + <" USING: combinators combinators.smart math prettyprint ; +9 [ + { [ 1- ] [ 1+ ] [ sq ] } cleave +] output>array ."> + "{ 8 10 81 }" + } +} ; + +HELP: output>sequence +{ $values + { "quot" quotation } { "exemplar" "an exemplar" } + { "newquot" quotation } +} +{ $description "Infers the number of outputs from the quotation and constructs a new sequence from those objects of the same type as the exemplar." } +{ $examples + { $example + "USING: combinators.smart kernel math prettyprint ;" + "4 [ [ 1 + ] [ 2 + ] [ 3 + ] tri ] V{ } output>sequence ." + "V{ 5 6 7 }" + } +} ; + +HELP: reduce-output +{ $values + { "quot" quotation } { "operation" quotation } + { "newquot" quotation } +} +{ $description "Infers the number of outputs from " { $snippet "quot" } " and reduces them using " { $snippet "operation" } ". The identity for the " { $link reduce } " operation is the first output." } +{ $examples + { $example + "USING: combinators.smart kernel math prettyprint ;" + "3 [ [ 4 * ] [ 4 / ] [ 4 - ] tri ] [ * ] reduce-output ." + "-9" + } +} ; + +HELP: sum-outputs +{ $values + { "quot" quotation } + { "n" integer } +} +{ $description "Infers the number of outputs from " { $snippet "quot" } " and returns their sum." } +{ $examples + { $example + "USING: combinators.smart kernel math prettyprint ;" + "10 [ [ 1- ] [ 1+ ] bi ] sum-outputs ." + "20" + } +} ; + +ARTICLE: "combinators.smart" "Smart combinators" +"The " { $vocab-link "combinators.smart" } " vocabulary implements " { $emphasis "smart combinators" } ". A smart combinator is one whose behavior depends on the static stack effect of an input quotation." $nl +"Smart inputs from a sequence:" +{ $subsection inputsequence } +{ $subsection output>array } +"Reducing the output of a quotation:" +{ $subsection reduce-output } +"Summing the output of a quotation:" +{ $subsection sum-outputs } ; + +ABOUT: "combinators.smart" diff --git a/basis/combinators/smart/smart-tests.factor b/basis/combinators/smart/smart-tests.factor new file mode 100644 index 0000000000..4be445e465 --- /dev/null +++ b/basis/combinators/smart/smart-tests.factor @@ -0,0 +1,21 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test combinators.smart math kernel ; +IN: combinators.smart.tests + +: test-bi ( -- 9 11 ) + 10 [ 1- ] [ 1+ ] bi ; + +[ [ test-bi ] output>array ] must-infer +[ { 9 11 } ] [ [ test-bi ] output>array ] unit-test + +[ { 9 11 } [ + ] inputsequence ( quot exemplar -- newquot ) + [ dup infer out>> ] dip + '[ @ _ _ nsequence ] ; + +: output>array ( quot -- newquot ) + { } output>sequence ; inline + +MACRO: input> ] keep + '[ _ firstn @ ] ; + +MACRO: reduce-output ( quot operation -- newquot ) + [ dup infer out>> 1 [-] ] dip n*quot compose ; + +: sum-outputs ( quot -- n ) + [ + ] reduce-output ; inline From 932631c901f167c6dd2ea55fbdc4307355fdca12 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 18:57:07 -0600 Subject: [PATCH 07/11] use combinators.smart for bit-count --- basis/math/bitwise/bitwise-tests.factor | 4 ++++ basis/math/bitwise/bitwise.factor | 16 +++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/basis/math/bitwise/bitwise-tests.factor b/basis/math/bitwise/bitwise-tests.factor index 979c62dcfb..40eb20642c 100644 --- a/basis/math/bitwise/bitwise-tests.factor +++ b/basis/math/bitwise/bitwise-tests.factor @@ -32,3 +32,7 @@ IN: math.bitwise.tests [ 8 ] [ 0 3 toggle-bit ] unit-test [ 0 ] [ 8 3 toggle-bit ] unit-test + +[ 4 ] [ BIN: 1010101 bit-count ] unit-test +[ 0 ] [ BIN: 0 bit-count ] unit-test +[ 1 ] [ BIN: 1 bit-count ] unit-test diff --git a/basis/math/bitwise/bitwise.factor b/basis/math/bitwise/bitwise.factor index 2c03164ae7..e60815bf60 100755 --- a/basis/math/bitwise/bitwise.factor +++ b/basis/math/bitwise/bitwise.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: arrays kernel math math.functions sequences sequences.private words namespaces macros hints -combinators fry io.binary ; +combinators fry io.binary combinators.smart ; IN: math.bitwise ! utilities @@ -76,12 +76,14 @@ DEFER: byte-bit-count GENERIC: (bit-count) ( x -- n ) M: fixnum (bit-count) - { - [ byte-bit-count ] - [ -8 shift byte-bit-count ] - [ -16 shift byte-bit-count ] - [ -24 shift byte-bit-count ] - } cleave + + + ; + [ + { + [ byte-bit-count ] + [ -8 shift byte-bit-count ] + [ -16 shift byte-bit-count ] + [ -24 shift byte-bit-count ] + } cleave + ] sum-outputs ; M: bignum (bit-count) dup 0 = [ drop 0 ] [ From e020df3d008a878d5205ab1ecc9763d193d492b5 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 18:57:23 -0600 Subject: [PATCH 08/11] use combinators.smart --- basis/tools/cocoa/cocoa.factor | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/basis/tools/cocoa/cocoa.factor b/basis/tools/cocoa/cocoa.factor index a8cdf6f41c..9dd1895a68 100644 --- a/basis/tools/cocoa/cocoa.factor +++ b/basis/tools/cocoa/cocoa.factor @@ -1,16 +1,18 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: arrays cocoa.messages cocoa.runtime combinators -prettyprint ; +prettyprint combinators.smart ; IN: tools.cocoa : method. ( method -- ) - { - [ method_getName sel_getName ] - [ method-return-type ] - [ method-arg-types ] - [ method_getImplementation ] - } cleave 4array . ; + [ + { + [ method_getName sel_getName ] + [ method-return-type ] + [ method-arg-types ] + [ method_getImplementation ] + } cleave + ] output>array . ; : methods. ( class -- ) [ method. ] each-method-in-class ; From 8cb0be6a0af905c6ed103ac9731070aa1cd8e04b Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 18:57:33 -0600 Subject: [PATCH 09/11] use combinators.smart --- basis/ui/gadgets/buttons/buttons.factor | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/basis/ui/gadgets/buttons/buttons.factor b/basis/ui/gadgets/buttons/buttons.factor index 75469671ef..dabc12d3ae 100644 --- a/basis/ui/gadgets/buttons/buttons.factor +++ b/basis/ui/gadgets/buttons/buttons.factor @@ -6,7 +6,7 @@ classes.tuple opengl opengl.gl math.vectors ui.commands ui.gadgets ui.gadgets.borders ui.gadgets.labels ui.gadgets.theme ui.gadgets.tracks ui.gadgets.packs ui.gadgets.worlds ui.gestures ui.render math.geometry.rect locals alien.c-types -specialized-arrays.float fry ; +specialized-arrays.float fry combinators.smart ; IN: ui.gadgets.buttons TUPLE: button < border pressed? selected? quot ; @@ -111,12 +111,14 @@ TUPLE: checkmark-paint < caching-pen color last-vertices ; array ; : checkmark-vertices ( dim -- vertices ) checkmark-points concat >float-array ; From ad53cb8635ba90bdf50419846e995875d3e20955 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 8 Jan 2009 19:07:46 -0600 Subject: [PATCH 10/11] Privatizing unicode.case:ch>{lower,upper,title} --- basis/ascii/ascii-docs.factor | 28 ++++++++++++++++++- basis/ascii/ascii-tests.factor | 5 ++++ basis/ascii/ascii.factor | 14 ++++++++++ basis/regexp/nfa/nfa.factor | 7 ++++- basis/regexp/parser/parser.factor | 10 +++---- basis/soundex/soundex.factor | 2 +- basis/tr/tr-tests.factor | 2 +- basis/tr/tr.factor | 4 +-- basis/unicode/case/case-docs.factor | 19 ------------- basis/unicode/case/case.factor | 2 +- basis/unicode/data/data.factor | 5 +--- basis/unicode/normalize/normalize.factor | 3 +- basis/xmode/marker/marker.factor | 2 +- .../reverse-complement.factor | 2 +- extra/parser-combinators/regexp/regexp.factor | 2 +- 15 files changed, 67 insertions(+), 40 deletions(-) diff --git a/basis/ascii/ascii-docs.factor b/basis/ascii/ascii-docs.factor index 6af697cf89..4c783e609c 100644 --- a/basis/ascii/ascii-docs.factor +++ b/basis/ascii/ascii-docs.factor @@ -37,6 +37,26 @@ HELP: quotable? { $values { "ch" "a character" } { "?" "a boolean" } } { $description "Tests for characters which may appear in a Factor string literal without escaping." } ; +HELP: ascii? +{ $values { "ch" "a character" } { "?" "a boolean" } } +{ $description "Tests for whether a number is an ASCII character." } ; + +HELP: ch>lower +{ $values { "ch" "a character" } { "lower" "a character" } } +{ $description "Converts an ASCII character to lower case." } ; + +HELP: ch>upper +{ $values { "ch" "a character" } { "upper" "a character" } } +{ $description "Converts an ASCII character to upper case." } ; + +HELP: >lower +{ $values { "str" "a string" } { "lower" "a string" } } +{ $description "Converts an ASCII string to lower case." } ; + +HELP: >upper +{ $values { "str" "a string" } { "upper" "a string" } } +{ $description "Converts an ASCII string to upper case." } ; + ARTICLE: "ascii" "ASCII character classes" "The " { $vocab-link "ascii" } " vocabulary implements traditional ASCII character classes:" { $subsection blank? } @@ -46,6 +66,12 @@ ARTICLE: "ascii" "ASCII character classes" { $subsection printable? } { $subsection control? } { $subsection quotable? } -"Modern applications should use Unicode 5.0 instead (" { $vocab-link "unicode.categories" } ")." ; +{ $subsection ascii? } +"ASCII case conversion is also implemented:" +{ $subsection ch>lower } +{ $subsection ch>upper } +{ $subsection >lower } +{ $subsection >upper } +"Modern applications should use Unicode 5.1 instead (" { $vocab-link "unicode.categories" } ")." ; ABOUT: "ascii" diff --git a/basis/ascii/ascii-tests.factor b/basis/ascii/ascii-tests.factor index 7dacce734b..6f39b32a01 100644 --- a/basis/ascii/ascii-tests.factor +++ b/basis/ascii/ascii-tests.factor @@ -12,3 +12,8 @@ IN: ascii.tests 0 "There are Four Upper Case characters" [ LETTER? [ 1+ ] when ] each ] unit-test + +[ t f ] [ CHAR: \s ascii? 400 ascii? ] unit-test + +[ "HELLO HOW ARE YOU?" ] [ "hellO hOw arE YOU?" >upper ] unit-test +[ "i'm good thx bai" ] [ "I'm Good THX bai" >lower ] unit-test diff --git a/basis/ascii/ascii.factor b/basis/ascii/ascii.factor index c009c66cde..a64a7b8eb5 100644 --- a/basis/ascii/ascii.factor +++ b/basis/ascii/ascii.factor @@ -4,6 +4,8 @@ USING: kernel math math.order sequences combinators.short-circuit ; IN: ascii +: ascii? ( ch -- ? ) 0 127 between? ; inline + : blank? ( ch -- ? ) " \t\n\r" member? ; inline : letter? ( ch -- ? ) CHAR: a CHAR: z between? ; inline @@ -25,3 +27,15 @@ IN: ascii : alpha? ( ch -- ? ) [ [ Letter? ] [ digit? ] ] 1|| ; + +: ch>lower ( ch -- lower ) + dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ; + +: >lower ( str -- lower ) + [ ch>lower ] map ; + +: ch>upper ( ch -- upper ) + dup CHAR: a CHAR: z between? [ HEX: 20 - ] when ; + +: >upper ( str -- upper ) + [ ch>upper ] map ; diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index 7620652948..dd116f3d7a 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -3,7 +3,10 @@ USING: accessors arrays assocs grouping kernel regexp.backend locals math namespaces regexp.parser sequences fry quotations math.order math.ranges vectors unicode.categories regexp.utils -regexp.transition-tables words sets regexp.classes unicode.case ; +regexp.transition-tables words sets regexp.classes unicode.case.private ; +! This uses unicode.case.private for ch>upper and ch>lower +! but case-insensitive matching should be done by case-folding everything +! before processing starts IN: regexp.nfa SYMBOL: negation-mode @@ -160,6 +163,8 @@ M: LETTER-class nfa-node ( node -- ) M: character-class-range nfa-node ( node -- ) case-insensitive option? [ + ! This should be implemented for Unicode by case-folding + ! the input and all strings in the regexp. dup [ from>> ] [ to>> ] bi 2dup [ Letter? ] bi@ and [ rot drop diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index 25509ec798..2f397538a0 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -3,8 +3,8 @@ USING: accessors arrays assocs combinators io io.streams.string kernel math math.parser namespaces sets quotations sequences splitting vectors math.order -unicode.categories strings regexp.backend regexp.utils -unicode.case words locals regexp.classes ; +strings regexp.backend regexp.utils +unicode.case unicode.categories words locals regexp.classes ; IN: regexp.parser FROM: math.ranges => [a,b] ; @@ -261,7 +261,7 @@ ERROR: bad-escaped-literals seq ; parse-til-E drop1 [ epsilon ] [ - [ quot call ] V{ } map-as + quot call [ ] V{ } map-as first|concatenation ] if-empty ; inline @@ -269,10 +269,10 @@ ERROR: bad-escaped-literals seq ; [ ] (parse-escaped-literals) ; : lower-case-literals ( -- obj ) - [ ch>lower ] (parse-escaped-literals) ; + [ >lower ] (parse-escaped-literals) ; : upper-case-literals ( -- obj ) - [ ch>upper ] (parse-escaped-literals) ; + [ >upper ] (parse-escaped-literals) ; : parse-escaped ( -- obj ) read1 diff --git a/basis/soundex/soundex.factor b/basis/soundex/soundex.factor index 416ec4a6bc..164f634185 100644 --- a/basis/soundex/soundex.factor +++ b/basis/soundex/soundex.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: sequences grouping assocs kernel ascii unicode.case tr ; +USING: sequences grouping assocs kernel ascii ascii tr ; IN: soundex TR: soundex-tr diff --git a/basis/tr/tr-tests.factor b/basis/tr/tr-tests.factor index c168f5384d..3434c28216 100644 --- a/basis/tr/tr-tests.factor +++ b/basis/tr/tr-tests.factor @@ -1,5 +1,5 @@ IN: tr.tests -USING: tr tools.test unicode.case ; +USING: tr tools.test ascii ; TR: tr-test ch>upper "ABC" "XYZ" ; diff --git a/basis/tr/tr.factor b/basis/tr/tr.factor index 66d8df7d44..ce535f335a 100644 --- a/basis/tr/tr.factor +++ b/basis/tr/tr.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: byte-arrays strings sequences sequences.private +USING: byte-arrays strings sequences sequences.private ascii fry kernel words parser lexer assocs math math.order summary ; IN: tr @@ -11,8 +11,6 @@ M: bad-tr summary lower } { $subsection >title } { $subsection >case-fold } -"There are analogous routines which operate on individual code points, but these should " { $emphasis "not be used" } " in general as they have slightly different behavior. In some cases, for example, they do not perform the case operation, as a single code point must expand to more than one." -{ $subsection ch>upper } -{ $subsection ch>lower } -{ $subsection ch>title } "To test if a string is in a given case:" { $subsection upper? } { $subsection lower? } @@ -53,18 +49,3 @@ HELP: title? HELP: case-fold? { $values { "string" string } { "?" "a boolean" } } { $description "Tests if a string is in case-folded form." } ; - -HELP: ch>lower -{ $values { "ch" "a code point" } { "lower" "a code point" } } -{ $description "Converts a code point to lower case." } -{ $warning "Don't use this unless you know what you're doing! " { $code ">lower" } " is not the same as " { $code "[ ch>lower ] map" } "." } ; - -HELP: ch>upper -{ $values { "ch" "a code point" } { "upper" "a code point" } } -{ $description "Converts a code point to upper case." } -{ $warning "Don't use this unless you know what you're doing! " { $code ">upper" } " is not the same as " { $code "[ ch>upper ] map" } "." } ; - -HELP: ch>title -{ $values { "ch" "a code point" } { "title" "a code point" } } -{ $description "Converts a code point to title case." } -{ $warning "Don't use this unless you know what you're doing! " { $code ">title" } " is not the same as " { $code "[ ch>title ] map" } "." } ; diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor index 99278cd72e..c800205704 100644 --- a/basis/unicode/case/case.factor +++ b/basis/unicode/case/case.factor @@ -7,11 +7,11 @@ IN: unicode.case : ch>lower ( ch -- lower ) simple-lower at-default ; : ch>upper ( ch -- upper ) simple-upper at-default ; : ch>title ( ch -- title ) simple-title at-default ; +PRIVATE> SYMBOL: locale ! Just casing locale, or overall? diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor index 61a93d9375..6cf913bffa 100644 --- a/basis/unicode/data/data.factor +++ b/basis/unicode/data/data.factor @@ -128,12 +128,9 @@ VALUE: properties cat categories index char table ?set-nth ] assoc-each table fill-ranges ] ; -: ascii-lower ( string -- lower ) - [ dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ] map ; - : process-names ( data -- names-hash ) 1 swap (process-data) [ - ascii-lower { { CHAR: \s CHAR: - } } substitute swap + >lower { { CHAR: \s CHAR: - } } substitute swap ] H{ } assoc-map-as ; : multihex ( hexstring -- string ) diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index c8d0eb3f7d..2fbe2e1843 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: sequences namespaces make unicode.data kernel math arrays -locals sorting.insertion accessors assocs math.order combinators ; +locals sorting.insertion accessors assocs math.order combinators +unicode.syntax ; IN: unicode.normalize upper "ACGTUMRYKVHDB" "TGCAAKYRMBDHV" ; diff --git a/extra/parser-combinators/regexp/regexp.factor b/extra/parser-combinators/regexp/regexp.factor index 2becd937f2..1c94308e93 100755 --- a/extra/parser-combinators/regexp/regexp.factor +++ b/extra/parser-combinators/regexp/regexp.factor @@ -2,7 +2,7 @@ USING: arrays combinators kernel lists math math.parser namespaces parser lexer parser-combinators parser-combinators.simple promises quotations sequences strings math.order assocs prettyprint.backend prettyprint.custom memoize -unicode.case unicode.categories combinators.short-circuit +ascii unicode.categories combinators.short-circuit accessors make io ; IN: parser-combinators.regexp From 58df6dad6f44ce45d5182a594d91e60f475ca42e Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 8 Jan 2009 22:23:39 -0600 Subject: [PATCH 11/11] Unicode cleanup and optimization --- basis/unicode/breaks/breaks.factor | 22 ++++++++++++---------- basis/unicode/normalize/normalize.factor | 13 ++++++------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index 1d2f821750..df3b2f03e8 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -192,22 +192,22 @@ to: word-table : word-table-nth ( class1 class2 -- ? ) word-table nth nth ; -: property-not= ( i str property -- ? ) - pick [ - [ ?nth ] dip swap - [ word-break-prop = not ] [ drop f ] if* - ] [ 3drop t ] if ; +:: property-not= ( i str property -- ? ) + i [ + i str ?nth [ word-break-prop property = not ] + [ f ] if* + ] [ t ] if ; : format/extended? ( ch -- ? ) word-break-prop { 4 5 } member? ; :: walk-up ( str i -- j ) i 1 + str [ format/extended? not ] find-from drop - 1+ str [ format/extended? not ] find-from drop ; ! possible bounds error? + [ 1+ str [ format/extended? not ] find-from drop ] [ f ] if* ; :: walk-down ( str i -- j ) i str [ format/extended? not ] find-last-from drop - 1- str [ format/extended? not ] find-last-from drop ; ! possible bounds error? + [ 1- str [ format/extended? not ] find-last-from drop ] [ f ] if* ; :: word-break? ( table-entry i str -- ? ) table-entry { @@ -224,9 +224,11 @@ to: word-table } case ; :: word-break-next ( old-class new-char i str -- next-class ? ) - new-char word-break-prop dup { 4 5 } member? - [ drop old-class dup { 1 2 3 } member? ] - [ old-class over word-table-nth i str word-break? ] if ; + new-char dup format/extended? + [ drop old-class dup { 1 2 3 } member? ] [ + word-break-prop old-class over word-table-nth + i str word-break? + ] if ; PRIVATE> diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index 2fbe2e1843..7a41a768cd 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: sequences namespaces make unicode.data kernel math arrays locals sorting.insertion accessors assocs math.order combinators -unicode.syntax ; +unicode.syntax strings sbufs ; IN: unicode.normalize ] | string [ - dup hangul? [ hangul>jamo % ] - [ dup quot call [ % ] [ , ] ?if ] if - ] each - ] "" make - dup reorder ; + dup hangul? [ hangul>jamo out push-all ] + [ dup quot call [ out push-all ] [ out push ] ?if ] if + ] each out >string + ] dup reorder ; : with-string ( str quot -- str ) over aux>> [ call ] [ drop ] if ; inline