diff --git a/basis/ascii/ascii-docs.factor b/basis/ascii/ascii-docs.factor index 6af697cf89..4c783e609c 100644 --- a/basis/ascii/ascii-docs.factor +++ b/basis/ascii/ascii-docs.factor @@ -37,6 +37,26 @@ HELP: quotable? { $values { "ch" "a character" } { "?" "a boolean" } } { $description "Tests for characters which may appear in a Factor string literal without escaping." } ; +HELP: ascii? +{ $values { "ch" "a character" } { "?" "a boolean" } } +{ $description "Tests for whether a number is an ASCII character." } ; + +HELP: ch>lower +{ $values { "ch" "a character" } { "lower" "a character" } } +{ $description "Converts an ASCII character to lower case." } ; + +HELP: ch>upper +{ $values { "ch" "a character" } { "upper" "a character" } } +{ $description "Converts an ASCII character to upper case." } ; + +HELP: >lower +{ $values { "str" "a string" } { "lower" "a string" } } +{ $description "Converts an ASCII string to lower case." } ; + +HELP: >upper +{ $values { "str" "a string" } { "upper" "a string" } } +{ $description "Converts an ASCII string to upper case." } ; + ARTICLE: "ascii" "ASCII character classes" "The " { $vocab-link "ascii" } " vocabulary implements traditional ASCII character classes:" { $subsection blank? } @@ -46,6 +66,12 @@ ARTICLE: "ascii" "ASCII character classes" { $subsection printable? } { $subsection control? } { $subsection quotable? } -"Modern applications should use Unicode 5.0 instead (" { $vocab-link "unicode.categories" } ")." ; +{ $subsection ascii? } +"ASCII case conversion is also implemented:" +{ $subsection ch>lower } +{ $subsection ch>upper } +{ $subsection >lower } +{ $subsection >upper } +"Modern applications should use Unicode 5.1 instead (" { $vocab-link "unicode.categories" } ")." ; ABOUT: "ascii" diff --git a/basis/ascii/ascii-tests.factor b/basis/ascii/ascii-tests.factor index 7dacce734b..6f39b32a01 100644 --- a/basis/ascii/ascii-tests.factor +++ b/basis/ascii/ascii-tests.factor @@ -12,3 +12,8 @@ IN: ascii.tests 0 "There are Four Upper Case characters" [ LETTER? [ 1+ ] when ] each ] unit-test + +[ t f ] [ CHAR: \s ascii? 400 ascii? ] unit-test + +[ "HELLO HOW ARE YOU?" ] [ "hellO hOw arE YOU?" >upper ] unit-test +[ "i'm good thx bai" ] [ "I'm Good THX bai" >lower ] unit-test diff --git a/basis/ascii/ascii.factor b/basis/ascii/ascii.factor index c009c66cde..a64a7b8eb5 100644 --- a/basis/ascii/ascii.factor +++ b/basis/ascii/ascii.factor @@ -4,6 +4,8 @@ USING: kernel math math.order sequences combinators.short-circuit ; IN: ascii +: ascii? ( ch -- ? ) 0 127 between? ; inline + : blank? ( ch -- ? ) " \t\n\r" member? ; inline : letter? ( ch -- ? ) CHAR: a CHAR: z between? ; inline @@ -25,3 +27,15 @@ IN: ascii : alpha? ( ch -- ? ) [ [ Letter? ] [ digit? ] ] 1|| ; + +: ch>lower ( ch -- lower ) + dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ; + +: >lower ( str -- lower ) + [ ch>lower ] map ; + +: ch>upper ( ch -- upper ) + dup CHAR: a CHAR: z between? [ HEX: 20 - ] when ; + +: >upper ( str -- upper ) + [ ch>upper ] map ; diff --git a/basis/combinators/smart/authors.txt b/basis/combinators/smart/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/basis/combinators/smart/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/basis/combinators/smart/smart-docs.factor b/basis/combinators/smart/smart-docs.factor new file mode 100644 index 0000000000..69ec3e7013 --- /dev/null +++ b/basis/combinators/smart/smart-docs.factor @@ -0,0 +1,91 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: help.markup help.syntax kernel quotations math sequences +multiline ; +IN: combinators.smart + +HELP: input<sequence +{ $values + { "quot" quotation } + { "newquot" quotation } +} +{ $description "Infers the number of inputs, " { $snippet "n" } ", to " { $snippet "quot" } " and calls the " { $snippet "quot" } " with the first " { $snippet "n" } " values from a sequence." } +{ $examples + { $example + "USING: combinators.smart math prettyprint ;" + "{ 1 2 3 } [ + + ] input<sequence ." + "6" + } +} ; + +HELP: output>array +{ $values + { "quot" quotation } + { "newquot" quotation } +} +{ $description "Infers the number or outputs from the quotation and constructs an array from those outputs." } +{ $examples + { $example + <" USING: combinators combinators.smart math prettyprint ; +9 [ + { [ 1- ] [ 1+ ] [ sq ] } cleave +] output>array ."> + "{ 8 10 81 }" + } +} ; + +HELP: output>sequence +{ $values + { "quot" quotation } { "exemplar" "an exemplar" } + { "newquot" quotation } +} +{ $description "Infers the number of outputs from the quotation and constructs a new sequence from those objects of the same type as the exemplar." } +{ $examples + { $example + "USING: combinators.smart kernel math prettyprint ;" + "4 [ [ 1 + ] [ 2 + ] [ 3 + ] tri ] V{ } output>sequence ." + "V{ 5 6 7 }" + } +} ; + +HELP: reduce-output +{ $values + { "quot" quotation } { "operation" quotation } + { "newquot" quotation } +} +{ $description "Infers the number of outputs from " { $snippet "quot" } " and reduces them using " { $snippet "operation" } ". The identity for the " { $link reduce } " operation is the first output." } +{ $examples + { $example + "USING: combinators.smart kernel math prettyprint ;" + "3 [ [ 4 * ] [ 4 / ] [ 4 - ] tri ] [ * ] reduce-output ." + "-9" + } +} ; + +HELP: sum-outputs +{ $values + { "quot" quotation } + { "n" integer } +} +{ $description "Infers the number of outputs from " { $snippet "quot" } " and returns their sum." } +{ $examples + { $example + "USING: combinators.smart kernel math prettyprint ;" + "10 [ [ 1- ] [ 1+ ] bi ] sum-outputs ." + "20" + } +} ; + +ARTICLE: "combinators.smart" "Smart combinators" +"The " { $vocab-link "combinators.smart" } " vocabulary implements " { $emphasis "smart combinators" } ". A smart combinator is one whose behavior depends on the static stack effect of an input quotation." $nl +"Smart inputs from a sequence:" +{ $subsection input<sequence } +"Smart outputs to a sequence:" +{ $subsection output>sequence } +{ $subsection output>array } +"Reducing the output of a quotation:" +{ $subsection reduce-output } +"Summing the output of a quotation:" +{ $subsection sum-outputs } ; + +ABOUT: "combinators.smart" diff --git a/basis/combinators/smart/smart-tests.factor b/basis/combinators/smart/smart-tests.factor new file mode 100644 index 0000000000..4be445e465 --- /dev/null +++ b/basis/combinators/smart/smart-tests.factor @@ -0,0 +1,21 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test combinators.smart math kernel ; +IN: combinators.smart.tests + +: test-bi ( -- 9 11 ) + 10 [ 1- ] [ 1+ ] bi ; + +[ [ test-bi ] output>array ] must-infer +[ { 9 11 } ] [ [ test-bi ] output>array ] unit-test + +[ { 9 11 } [ + ] input<sequence ] must-infer +[ 20 ] [ { 9 11 } [ + ] input<sequence ] unit-test + + + +[ 6 ] [ [ 1 2 3 ] [ + ] reduce-output ] unit-test + +[ [ 1 2 3 ] [ + ] reduce-output ] must-infer + +[ 6 ] [ [ 1 2 3 ] sum-outputs ] unit-test diff --git a/basis/combinators/smart/smart.factor b/basis/combinators/smart/smart.factor new file mode 100644 index 0000000000..fcd28aac74 --- /dev/null +++ b/basis/combinators/smart/smart.factor @@ -0,0 +1,22 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors fry generalizations kernel macros math.order +stack-checker math ; +IN: combinators.smart + +MACRO: output>sequence ( quot exemplar -- newquot ) + [ dup infer out>> ] dip + '[ @ _ _ nsequence ] ; + +: output>array ( quot -- newquot ) + { } output>sequence ; inline + +MACRO: input<sequence ( quot -- newquot ) + [ infer in>> ] keep + '[ _ firstn @ ] ; + +MACRO: reduce-output ( quot operation -- newquot ) + [ dup infer out>> 1 [-] ] dip n*quot compose ; + +: sum-outputs ( quot -- n ) + [ + ] reduce-output ; inline diff --git a/basis/io/files/info/unix/unix-docs.factor b/basis/io/files/info/unix/unix-docs.factor index 0dff2e4419..a6ee2b9597 100644 --- a/basis/io/files/info/unix/unix-docs.factor +++ b/basis/io/files/info/unix/unix-docs.factor @@ -22,11 +22,11 @@ HELP: file-permissions { "n" integer } } { $description "Returns the Unix file permissions for a given file." } ; -HELP: file-username +HELP: file-user-name { $values { "path" "a pathname string" } { "string" string } } -{ $description "Returns the username for a given file." } ; +{ $description "Returns the user-name for a given file." } ; HELP: file-user-id { $values @@ -110,7 +110,7 @@ HELP: set-file-times HELP: set-file-user { $values { "path" "a pathname string" } { "string/id" "a string or a user id" } } -{ $description "Sets a file's user id from the given user id or username." } ; +{ $description "Sets a file's user id from the given user id or user-name." } ; HELP: set-file-modified-time { $values @@ -258,7 +258,7 @@ ARTICLE: "unix-file-timestamps" "Unix file timestamps" ARTICLE: "unix-file-ids" "Unix file user and group ids" "Reading file user data:" { $subsection file-user-id } -{ $subsection file-username } +{ $subsection file-user-name } "Setting file user data:" { $subsection set-file-user } "Reading file group data:" diff --git a/basis/io/files/info/unix/unix.factor b/basis/io/files/info/unix/unix.factor index 66b95db144..9287e7f4ad 100644 --- a/basis/io/files/info/unix/unix.factor +++ b/basis/io/files/info/unix/unix.factor @@ -243,8 +243,8 @@ M: string set-file-group ( path string -- ) : file-user-id ( path -- uid ) normalize-path file-info uid>> ; -: file-username ( path -- string ) - file-user-id username ; +: file-user-name ( path -- string ) + file-user-id user-name ; : file-group-id ( path -- gid ) normalize-path file-info gid>> ; diff --git a/basis/math/bitwise/bitwise-tests.factor b/basis/math/bitwise/bitwise-tests.factor index 979c62dcfb..40eb20642c 100644 --- a/basis/math/bitwise/bitwise-tests.factor +++ b/basis/math/bitwise/bitwise-tests.factor @@ -32,3 +32,7 @@ IN: math.bitwise.tests [ 8 ] [ 0 3 toggle-bit ] unit-test [ 0 ] [ 8 3 toggle-bit ] unit-test + +[ 4 ] [ BIN: 1010101 bit-count ] unit-test +[ 0 ] [ BIN: 0 bit-count ] unit-test +[ 1 ] [ BIN: 1 bit-count ] unit-test diff --git a/basis/math/bitwise/bitwise.factor b/basis/math/bitwise/bitwise.factor index 2c03164ae7..e60815bf60 100755 --- a/basis/math/bitwise/bitwise.factor +++ b/basis/math/bitwise/bitwise.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: arrays kernel math math.functions sequences sequences.private words namespaces macros hints -combinators fry io.binary ; +combinators fry io.binary combinators.smart ; IN: math.bitwise ! utilities @@ -76,12 +76,14 @@ DEFER: byte-bit-count GENERIC: (bit-count) ( x -- n ) M: fixnum (bit-count) - { - [ byte-bit-count ] - [ -8 shift byte-bit-count ] - [ -16 shift byte-bit-count ] - [ -24 shift byte-bit-count ] - } cleave + + + ; + [ + { + [ byte-bit-count ] + [ -8 shift byte-bit-count ] + [ -16 shift byte-bit-count ] + [ -24 shift byte-bit-count ] + } cleave + ] sum-outputs ; M: bignum (bit-count) dup 0 = [ drop 0 ] [ diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index 7620652948..dd116f3d7a 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -3,7 +3,10 @@ USING: accessors arrays assocs grouping kernel regexp.backend locals math namespaces regexp.parser sequences fry quotations math.order math.ranges vectors unicode.categories regexp.utils -regexp.transition-tables words sets regexp.classes unicode.case ; +regexp.transition-tables words sets regexp.classes unicode.case.private ; +! This uses unicode.case.private for ch>upper and ch>lower +! but case-insensitive matching should be done by case-folding everything +! before processing starts IN: regexp.nfa SYMBOL: negation-mode @@ -160,6 +163,8 @@ M: LETTER-class nfa-node ( node -- ) M: character-class-range nfa-node ( node -- ) case-insensitive option? [ + ! This should be implemented for Unicode by case-folding + ! the input and all strings in the regexp. dup [ from>> ] [ to>> ] bi 2dup [ Letter? ] bi@ and [ rot drop diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index 25509ec798..2f397538a0 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -3,8 +3,8 @@ USING: accessors arrays assocs combinators io io.streams.string kernel math math.parser namespaces sets quotations sequences splitting vectors math.order -unicode.categories strings regexp.backend regexp.utils -unicode.case words locals regexp.classes ; +strings regexp.backend regexp.utils +unicode.case unicode.categories words locals regexp.classes ; IN: regexp.parser FROM: math.ranges => [a,b] ; @@ -261,7 +261,7 @@ ERROR: bad-escaped-literals seq ; parse-til-E drop1 [ epsilon ] [ - [ quot call <constant> ] V{ } map-as + quot call [ <constant> ] V{ } map-as first|concatenation ] if-empty ; inline @@ -269,10 +269,10 @@ ERROR: bad-escaped-literals seq ; [ ] (parse-escaped-literals) ; : lower-case-literals ( -- obj ) - [ ch>lower ] (parse-escaped-literals) ; + [ >lower ] (parse-escaped-literals) ; : upper-case-literals ( -- obj ) - [ ch>upper ] (parse-escaped-literals) ; + [ >upper ] (parse-escaped-literals) ; : parse-escaped ( -- obj ) read1 diff --git a/basis/soundex/soundex.factor b/basis/soundex/soundex.factor index 416ec4a6bc..164f634185 100644 --- a/basis/soundex/soundex.factor +++ b/basis/soundex/soundex.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: sequences grouping assocs kernel ascii unicode.case tr ; +USING: sequences grouping assocs kernel ascii ascii tr ; IN: soundex TR: soundex-tr diff --git a/basis/tools/cocoa/cocoa.factor b/basis/tools/cocoa/cocoa.factor index a8cdf6f41c..9dd1895a68 100644 --- a/basis/tools/cocoa/cocoa.factor +++ b/basis/tools/cocoa/cocoa.factor @@ -1,16 +1,18 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: arrays cocoa.messages cocoa.runtime combinators -prettyprint ; +prettyprint combinators.smart ; IN: tools.cocoa : method. ( method -- ) - { - [ method_getName sel_getName ] - [ method-return-type ] - [ method-arg-types ] - [ method_getImplementation ] - } cleave 4array . ; + [ + { + [ method_getName sel_getName ] + [ method-return-type ] + [ method-arg-types ] + [ method_getImplementation ] + } cleave + ] output>array . ; : methods. ( class -- ) [ method. ] each-method-in-class ; diff --git a/basis/tools/files/unix/unix.factor b/basis/tools/files/unix/unix.factor index 507c689a55..9757db171a 100755 --- a/basis/tools/files/unix/unix.factor +++ b/basis/tools/files/unix/unix.factor @@ -3,12 +3,9 @@ USING: accessors combinators kernel system unicode.case io.files io.files.info io.files.info.unix tools.files generalizations strings arrays sequences math.parser unix.groups unix.users -tools.files.private unix.stat math fry macros ; +tools.files.private unix.stat math fry macros combinators.smart ; IN: tools.files.unix -MACRO: cleave>array ( array -- quot ) - dup length '[ _ cleave _ narray ] ; - <PRIVATE : unix-execute>string ( str bools -- str' ) @@ -20,18 +17,20 @@ MACRO: cleave>array ( array -- quot ) } case ; : permissions-string ( permissions -- str ) - { - [ type>> file-type>ch 1string ] - [ user-read? read>string ] - [ user-write? write>string ] - [ [ uid? ] [ user-execute? ] bi 2array "s" unix-execute>string ] - [ group-read? read>string ] - [ group-write? write>string ] - [ [ gid? ] [ group-execute? ] bi 2array "s" unix-execute>string ] - [ other-read? read>string ] - [ other-write? write>string ] - [ [ sticky? ] [ other-execute? ] bi 2array "t" unix-execute>string ] - } cleave>array concat ; + [ + { + [ type>> file-type>ch 1string ] + [ user-read? read>string ] + [ user-write? write>string ] + [ [ uid? ] [ user-execute? ] bi 2array "s" unix-execute>string ] + [ group-read? read>string ] + [ group-write? write>string ] + [ [ gid? ] [ group-execute? ] bi 2array "s" unix-execute>string ] + [ other-read? read>string ] + [ other-write? write>string ] + [ [ sticky? ] [ other-execute? ] bi 2array "t" unix-execute>string ] + } cleave + ] output>array concat ; : mode>symbol ( mode -- ch ) S_IFMT bitand @@ -48,15 +47,16 @@ MACRO: cleave>array ( array -- quot ) M: unix (directory.) ( path -- lines ) [ [ [ - dup file-info - { - [ permissions-string ] - [ nlink>> number>string 3 CHAR: \s pad-left ] - [ uid>> user-name ] - [ gid>> group-name ] - [ size>> number>string 15 CHAR: \s pad-left ] - [ modified>> ls-timestamp ] - } cleave>array swap suffix " " join + dup file-info [ + { + [ permissions-string ] + [ nlink>> number>string 3 CHAR: \s pad-left ] + [ uid>> user-name ] + [ gid>> group-name ] + [ size>> number>string 15 CHAR: \s pad-left ] + [ modified>> ls-timestamp ] + } cleave + ] output>array swap suffix " " join ] map ] with-group-cache ] with-user-cache ; diff --git a/basis/tr/tr-tests.factor b/basis/tr/tr-tests.factor index c168f5384d..3434c28216 100644 --- a/basis/tr/tr-tests.factor +++ b/basis/tr/tr-tests.factor @@ -1,5 +1,5 @@ IN: tr.tests -USING: tr tools.test unicode.case ; +USING: tr tools.test ascii ; TR: tr-test ch>upper "ABC" "XYZ" ; diff --git a/basis/tr/tr.factor b/basis/tr/tr.factor index 66d8df7d44..ce535f335a 100644 --- a/basis/tr/tr.factor +++ b/basis/tr/tr.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: byte-arrays strings sequences sequences.private +USING: byte-arrays strings sequences sequences.private ascii fry kernel words parser lexer assocs math math.order summary ; IN: tr @@ -11,8 +11,6 @@ M: bad-tr summary <PRIVATE -: ascii? ( ch -- ? ) 0 127 between? ; inline - : tr-nth ( n mapping -- ch ) nth-unsafe 127 bitand ; inline : check-tr ( from to -- ) diff --git a/basis/ui/gadgets/buttons/buttons.factor b/basis/ui/gadgets/buttons/buttons.factor index f237a427a2..e264dd9aa8 100644 --- a/basis/ui/gadgets/buttons/buttons.factor +++ b/basis/ui/gadgets/buttons/buttons.factor @@ -6,7 +6,7 @@ classes.tuple opengl opengl.gl math.vectors ui.commands ui.gadgets ui.gadgets.borders ui.gadgets.labels ui.gadgets.theme ui.gadgets.tracks ui.gadgets.packs ui.gadgets.worlds ui.gestures ui.render math.geometry.rect locals alien.c-types -specialized-arrays.float fry ; +specialized-arrays.float fry combinators.smart ; IN: ui.gadgets.buttons TUPLE: button < border pressed? selected? quot ; @@ -111,12 +111,14 @@ TUPLE: checkmark-paint < caching-pen color last-vertices ; <PRIVATE : checkmark-points ( dim -- points ) - { - [ { 0 0 } v* { 0.5 0.5 } v+ ] - [ { 1 1 } v* { 0.5 0.5 } v+ ] - [ { 1 0 } v* { -0.3 0.5 } v+ ] - [ { 0 1 } v* { -0.3 0.5 } v+ ] - } cleave 4array ; + [ + { + [ { 0 0 } v* { 0.5 0.5 } v+ ] + [ { 1 1 } v* { 0.5 0.5 } v+ ] + [ { 1 0 } v* { -0.3 0.5 } v+ ] + [ { 0 1 } v* { -0.3 0.5 } v+ ] + } cleave + ] output>array ; : checkmark-vertices ( dim -- vertices ) checkmark-points concat >float-array ; diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index 1d2f821750..df3b2f03e8 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -192,22 +192,22 @@ to: word-table : word-table-nth ( class1 class2 -- ? ) word-table nth nth ; -: property-not= ( i str property -- ? ) - pick [ - [ ?nth ] dip swap - [ word-break-prop = not ] [ drop f ] if* - ] [ 3drop t ] if ; +:: property-not= ( i str property -- ? ) + i [ + i str ?nth [ word-break-prop property = not ] + [ f ] if* + ] [ t ] if ; : format/extended? ( ch -- ? ) word-break-prop { 4 5 } member? ; :: walk-up ( str i -- j ) i 1 + str [ format/extended? not ] find-from drop - 1+ str [ format/extended? not ] find-from drop ; ! possible bounds error? + [ 1+ str [ format/extended? not ] find-from drop ] [ f ] if* ; :: walk-down ( str i -- j ) i str [ format/extended? not ] find-last-from drop - 1- str [ format/extended? not ] find-last-from drop ; ! possible bounds error? + [ 1- str [ format/extended? not ] find-last-from drop ] [ f ] if* ; :: word-break? ( table-entry i str -- ? ) table-entry { @@ -224,9 +224,11 @@ to: word-table } case ; :: word-break-next ( old-class new-char i str -- next-class ? ) - new-char word-break-prop dup { 4 5 } member? - [ drop old-class dup { 1 2 3 } member? ] - [ old-class over word-table-nth i str word-break? ] if ; + new-char dup format/extended? + [ drop old-class dup { 1 2 3 } member? ] [ + word-break-prop old-class over word-table-nth + i str word-break? + ] if ; PRIVATE> diff --git a/basis/unicode/case/case-docs.factor b/basis/unicode/case/case-docs.factor index da582c659a..02da8e7635 100644 --- a/basis/unicode/case/case-docs.factor +++ b/basis/unicode/case/case-docs.factor @@ -9,10 +9,6 @@ ARTICLE: "unicode.case" "Case mapping" { $subsection >lower } { $subsection >title } { $subsection >case-fold } -"There are analogous routines which operate on individual code points, but these should " { $emphasis "not be used" } " in general as they have slightly different behavior. In some cases, for example, they do not perform the case operation, as a single code point must expand to more than one." -{ $subsection ch>upper } -{ $subsection ch>lower } -{ $subsection ch>title } "To test if a string is in a given case:" { $subsection upper? } { $subsection lower? } @@ -53,18 +49,3 @@ HELP: title? HELP: case-fold? { $values { "string" string } { "?" "a boolean" } } { $description "Tests if a string is in case-folded form." } ; - -HELP: ch>lower -{ $values { "ch" "a code point" } { "lower" "a code point" } } -{ $description "Converts a code point to lower case." } -{ $warning "Don't use this unless you know what you're doing! " { $code ">lower" } " is not the same as " { $code "[ ch>lower ] map" } "." } ; - -HELP: ch>upper -{ $values { "ch" "a code point" } { "upper" "a code point" } } -{ $description "Converts a code point to upper case." } -{ $warning "Don't use this unless you know what you're doing! " { $code ">upper" } " is not the same as " { $code "[ ch>upper ] map" } "." } ; - -HELP: ch>title -{ $values { "ch" "a code point" } { "title" "a code point" } } -{ $description "Converts a code point to title case." } -{ $warning "Don't use this unless you know what you're doing! " { $code ">title" } " is not the same as " { $code "[ ch>title ] map" } "." } ; diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor index b0472cd9cb..c800205704 100644 --- a/basis/unicode/case/case.factor +++ b/basis/unicode/case/case.factor @@ -7,18 +7,18 @@ IN: unicode.case <PRIVATE : at-default ( key assoc -- value/key ) [ at ] [ drop ] 2bi or ; -PRIVATE> : ch>lower ( ch -- lower ) simple-lower at-default ; : ch>upper ( ch -- upper ) simple-upper at-default ; : ch>title ( ch -- title ) simple-title at-default ; +PRIVATE> SYMBOL: locale ! Just casing locale, or overall? <PRIVATE : split-subseq ( string sep -- strings ) - [ dup ] swap '[ _ split1 swap ] [ ] produce nip ; + [ dup ] swap '[ _ split1-slice swap ] [ ] produce nip ; : replace ( old new str -- newstr ) [ split-subseq ] dip join ; diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor index cf4130ca4d..6cf913bffa 100644 --- a/basis/unicode/data/data.factor +++ b/basis/unicode/data/data.factor @@ -23,7 +23,7 @@ VALUE: properties : combine-chars ( a b -- char/f ) combine-map hash2 ; : compatibility-entry ( char -- seq ) compatibility-map at ; : combining-class ( char -- n ) class-map at ; -: non-starter? ( char -- ? ) class-map key? ; +: non-starter? ( char -- ? ) combining-class { 0 f } member? not ; : name>char ( name -- char ) name-map at ; : char>name ( char -- name ) name-map value-at ; : property? ( char property -- ? ) properties at interval-key? ; @@ -128,12 +128,9 @@ VALUE: properties cat categories index char table ?set-nth ] assoc-each table fill-ranges ] ; -: ascii-lower ( string -- lower ) - [ dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ] map ; - : process-names ( data -- names-hash ) 1 swap (process-data) [ - ascii-lower { { CHAR: \s CHAR: - } } substitute swap + >lower { { CHAR: \s CHAR: - } } substitute swap ] H{ } assoc-map-as ; : multihex ( hexstring -- string ) @@ -183,6 +180,13 @@ load-data { [ process-category to: category-map ] } cleave +: postprocess-class ( -- ) + combine-map [ [ second ] map ] map concat + [ combining-class not ] filter + [ 0 swap class-map set-at ] each ; + +postprocess-class + load-special-casing to: special-casing load-properties to: properties diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index 58ce412a2e..7a41a768cd 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: sequences namespaces make unicode.data kernel math arrays -locals sorting.insertion accessors assocs math.order ; +locals sorting.insertion accessors assocs math.order combinators +unicode.syntax strings sbufs ; IN: unicode.normalize <PRIVATE @@ -65,26 +66,29 @@ CONSTANT: final-count 28 over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ; :: decompose ( string quot -- decomposed ) - ! When there are 8 and 32-bit strings, this'll be - ! equivalent to clone on 8 and the contents of the last - ! main quotation on 32. - string [ 127 < ] all? [ string ] [ - [ - string [ - dup hangul? [ hangul>jamo % ] - [ dup quot call [ % ] [ , ] ?if ] if - ] each - ] "" make - dup reorder - ] if ; inline + [let | out [ string length <sbuf> ] | + string [ + dup hangul? [ hangul>jamo out push-all ] + [ dup quot call [ out push-all ] [ out push ] ?if ] if + ] each out >string + ] dup reorder ; + +: with-string ( str quot -- str ) + over aux>> [ call ] [ drop ] if ; inline + +: (nfd) ( string -- nfd ) + [ canonical-entry ] decompose ; + +: (nfkd) ( string -- nfkd ) + [ compatibility-entry ] decompose ; PRIVATE> : nfd ( string -- nfd ) - [ canonical-entry ] decompose ; + [ (nfd) ] with-string ; : nfkd ( string -- nfkd ) - [ compatibility-entry ] decompose ; + [ (nfkd) ] with-string ; : string-append ( s1 s2 -- string ) [ append ] keep @@ -138,20 +142,26 @@ DEFER: compose-iter : compose-iter ( last-class -- ) current [ - dup combining-class - [ try-compose to compose-iter ] - [ swap [ drop ] [ try-noncombining ] if ] if* + dup combining-class { + { f [ 2drop ] } + { 0 [ swap [ drop ] [ try-noncombining ] if ] } + [ try-compose to compose-iter ] + } case ] [ drop ] if* ; : ?new-after ( -- ) after [ dup empty? [ drop SBUF" " clone ] unless ] change ; +: compose-combining ( ch -- ) + char set to ?new-after + f compose-iter + char get , after get % ; + : (compose) ( -- ) current [ dup jamo? [ drop compose-jamo ] [ - char set to ?new-after - f compose-iter - char get , after get % + 1 get-str combining-class + [ compose-combining ] [ , to ] if ] if (compose) ] when* ; @@ -166,7 +176,7 @@ DEFER: compose-iter PRIVATE> : nfc ( string -- nfc ) - nfd combine ; + [ (nfd) combine ] with-string ; : nfkc ( string -- nfkc ) - nfkd combine ; + [ (nfkd) combine ] with-string ; diff --git a/basis/xmode/marker/marker.factor b/basis/xmode/marker/marker.factor index c37d60df14..3e632cc5af 100644 --- a/basis/xmode/marker/marker.factor +++ b/basis/xmode/marker/marker.factor @@ -5,7 +5,7 @@ USING: kernel namespaces make xmode.rules xmode.tokens xmode.marker.state xmode.marker.context xmode.utilities xmode.catalog sequences math assocs combinators strings parser-combinators.regexp splitting parser-combinators ascii -unicode.case combinators.short-circuit accessors ; +ascii combinators.short-circuit accessors ; ! Based on org.gjt.sp.jedit.syntax.TokenMarker diff --git a/extra/benchmark/reverse-complement/reverse-complement.factor b/extra/benchmark/reverse-complement/reverse-complement.factor index 3298706da3..4147ffabdf 100755 --- a/extra/benchmark/reverse-complement/reverse-complement.factor +++ b/extra/benchmark/reverse-complement/reverse-complement.factor @@ -3,7 +3,7 @@ USING: io io.files io.files.temp io.streams.duplex kernel sequences sequences.private strings vectors words memoize splitting grouping hints tr continuations io.encodings.ascii -unicode.case ; +ascii ; IN: benchmark.reverse-complement TR: trans-map ch>upper "ACGTUMRYKVHDB" "TGCAAKYRMBDHV" ; diff --git a/extra/geo-ip/geo-ip.factor b/extra/geo-ip/geo-ip.factor index c878306d7d..ad6302ca55 100644 --- a/extra/geo-ip/geo-ip.factor +++ b/extra/geo-ip/geo-ip.factor @@ -4,7 +4,7 @@ USING: kernel sequences io.files io.files.temp io.launcher io.pathnames io.encodings.ascii io.streams.string http.client generalizations combinators math.parser math.vectors math.intervals interval-maps memoize csv accessors assocs -strings math splitting grouping arrays ; +strings math splitting grouping arrays combinators.smart ; IN: geo-ip : db-path ( -- path ) "IpToCountry.csv" temp-file ; @@ -20,15 +20,17 @@ IN: geo-ip TUPLE: ip-entry from to registry assigned city cntry country ; : parse-ip-entry ( row -- ip-entry ) - 7 firstn { - [ string>number ] - [ string>number ] - [ ] - [ ] - [ ] - [ ] - [ ] - } spread ip-entry boa ; + [ + { + [ string>number ] + [ string>number ] + [ ] + [ ] + [ ] + [ ] + [ ] + } spread + ] input<sequence ip-entry boa ; MEMO: ip-db ( -- seq ) download-db ascii file-lines diff --git a/extra/parser-combinators/regexp/regexp.factor b/extra/parser-combinators/regexp/regexp.factor index 2becd937f2..1c94308e93 100755 --- a/extra/parser-combinators/regexp/regexp.factor +++ b/extra/parser-combinators/regexp/regexp.factor @@ -2,7 +2,7 @@ USING: arrays combinators kernel lists math math.parser namespaces parser lexer parser-combinators parser-combinators.simple promises quotations sequences strings math.order assocs prettyprint.backend prettyprint.custom memoize -unicode.case unicode.categories combinators.short-circuit +ascii unicode.categories combinators.short-circuit accessors make io ; IN: parser-combinators.regexp diff --git a/extra/usa-cities/usa-cities-tests.factor b/extra/usa-cities/usa-cities-tests.factor new file mode 100644 index 0000000000..2dbeafcfee --- /dev/null +++ b/extra/usa-cities/usa-cities-tests.factor @@ -0,0 +1,6 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors kernel tools.test usa-cities ; +IN: usa-cities.tests + +[ t ] [ 55406 find-zip-code name>> "Minneapolis" = ] unit-test diff --git a/extra/usa-cities/usa-cities.factor b/extra/usa-cities/usa-cities.factor index deb3e15845..25ec30ac78 100644 --- a/extra/usa-cities/usa-cities.factor +++ b/extra/usa-cities/usa-cities.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: io.files io.encodings.ascii sequences generalizations math.parser combinators kernel memoize csv summary -words accessors math.order binary-search ; +words accessors math.order binary-search combinators.smart ; IN: usa-cities SINGLETONS: AK AL AR AS AZ CA CO CT DC DE FL GA HI IA ID IL IN @@ -30,15 +30,17 @@ first-zip name state latitude longitude gmt-offset dst-offset ; MEMO: cities ( -- seq ) "resource:extra/usa-cities/zipcode.csv" ascii <file-reader> csv rest-slice [ - 7 firstn { - [ string>number ] - [ ] - [ string>state ] - [ string>number ] - [ string>number ] - [ string>number ] - [ string>number ] - } spread city boa + [ + { + [ string>number ] + [ ] + [ string>state ] + [ string>number ] + [ string>number ] + [ string>number ] + [ string>number ] + } spread + ] input<sequence city boa ] map ; MEMO: cities-named ( name -- cities )