From cf160c60ea38aaac4cf005e77eb2118a2345f664 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 17:48:17 -0600 Subject: [PATCH 01/29] use input> "Minneapolis" = ] unit-test diff --git a/extra/usa-cities/usa-cities.factor b/extra/usa-cities/usa-cities.factor index deb3e15845..25ec30ac78 100644 --- a/extra/usa-cities/usa-cities.factor +++ b/extra/usa-cities/usa-cities.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: io.files io.encodings.ascii sequences generalizations math.parser combinators kernel memoize csv summary -words accessors math.order binary-search ; +words accessors math.order binary-search combinators.smart ; IN: usa-cities SINGLETONS: AK AL AR AS AZ CA CO CT DC DE FL GA HI IA ID IL IN @@ -30,15 +30,17 @@ first-zip name state latitude longitude gmt-offset dst-offset ; MEMO: cities ( -- seq ) "resource:extra/usa-cities/zipcode.csv" ascii csv rest-slice [ - 7 firstn { - [ string>number ] - [ ] - [ string>state ] - [ string>number ] - [ string>number ] - [ string>number ] - [ string>number ] - } spread city boa + [ + { + [ string>number ] + [ ] + [ string>state ] + [ string>number ] + [ string>number ] + [ string>number ] + [ string>number ] + } spread + ] input Date: Thu, 8 Jan 2009 17:53:48 -0600 Subject: [PATCH 02/29] use smart combinator in geo-ip --- extra/geo-ip/geo-ip.factor | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/extra/geo-ip/geo-ip.factor b/extra/geo-ip/geo-ip.factor index c878306d7d..ad6302ca55 100644 --- a/extra/geo-ip/geo-ip.factor +++ b/extra/geo-ip/geo-ip.factor @@ -4,7 +4,7 @@ USING: kernel sequences io.files io.files.temp io.launcher io.pathnames io.encodings.ascii io.streams.string http.client generalizations combinators math.parser math.vectors math.intervals interval-maps memoize csv accessors assocs -strings math splitting grouping arrays ; +strings math splitting grouping arrays combinators.smart ; IN: geo-ip : db-path ( -- path ) "IpToCountry.csv" temp-file ; @@ -20,15 +20,17 @@ IN: geo-ip TUPLE: ip-entry from to registry assigned city cntry country ; : parse-ip-entry ( row -- ip-entry ) - 7 firstn { - [ string>number ] - [ string>number ] - [ ] - [ ] - [ ] - [ ] - [ ] - } spread ip-entry boa ; + [ + { + [ string>number ] + [ string>number ] + [ ] + [ ] + [ ] + [ ] + [ ] + } spread + ] input Date: Thu, 8 Jan 2009 17:55:04 -0600 Subject: [PATCH 03/29] use smart combinators in tools.files.unix --- basis/tools/files/unix/unix.factor | 50 +++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/basis/tools/files/unix/unix.factor b/basis/tools/files/unix/unix.factor index 507c689a55..9757db171a 100755 --- a/basis/tools/files/unix/unix.factor +++ b/basis/tools/files/unix/unix.factor @@ -3,12 +3,9 @@ USING: accessors combinators kernel system unicode.case io.files io.files.info io.files.info.unix tools.files generalizations strings arrays sequences math.parser unix.groups unix.users -tools.files.private unix.stat math fry macros ; +tools.files.private unix.stat math fry macros combinators.smart ; IN: tools.files.unix -MACRO: cleave>array ( array -- quot ) - dup length '[ _ cleave _ narray ] ; - string ( str bools -- str' ) @@ -20,18 +17,20 @@ MACRO: cleave>array ( array -- quot ) } case ; : permissions-string ( permissions -- str ) - { - [ type>> file-type>ch 1string ] - [ user-read? read>string ] - [ user-write? write>string ] - [ [ uid? ] [ user-execute? ] bi 2array "s" unix-execute>string ] - [ group-read? read>string ] - [ group-write? write>string ] - [ [ gid? ] [ group-execute? ] bi 2array "s" unix-execute>string ] - [ other-read? read>string ] - [ other-write? write>string ] - [ [ sticky? ] [ other-execute? ] bi 2array "t" unix-execute>string ] - } cleave>array concat ; + [ + { + [ type>> file-type>ch 1string ] + [ user-read? read>string ] + [ user-write? write>string ] + [ [ uid? ] [ user-execute? ] bi 2array "s" unix-execute>string ] + [ group-read? read>string ] + [ group-write? write>string ] + [ [ gid? ] [ group-execute? ] bi 2array "s" unix-execute>string ] + [ other-read? read>string ] + [ other-write? write>string ] + [ [ sticky? ] [ other-execute? ] bi 2array "t" unix-execute>string ] + } cleave + ] output>array concat ; : mode>symbol ( mode -- ch ) S_IFMT bitand @@ -48,15 +47,16 @@ MACRO: cleave>array ( array -- quot ) M: unix (directory.) ( path -- lines ) [ [ [ - dup file-info - { - [ permissions-string ] - [ nlink>> number>string 3 CHAR: \s pad-left ] - [ uid>> user-name ] - [ gid>> group-name ] - [ size>> number>string 15 CHAR: \s pad-left ] - [ modified>> ls-timestamp ] - } cleave>array swap suffix " " join + dup file-info [ + { + [ permissions-string ] + [ nlink>> number>string 3 CHAR: \s pad-left ] + [ uid>> user-name ] + [ gid>> group-name ] + [ size>> number>string 15 CHAR: \s pad-left ] + [ modified>> ls-timestamp ] + } cleave + ] output>array swap suffix " " join ] map ] with-group-cache ] with-user-cache ; From a773e592164c4197aef519ab5424e6bc58bc466c Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 17:56:03 -0600 Subject: [PATCH 04/29] username -> user-name --- basis/io/files/info/unix/unix-docs.factor | 8 ++++---- basis/io/files/info/unix/unix.factor | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/basis/io/files/info/unix/unix-docs.factor b/basis/io/files/info/unix/unix-docs.factor index 0dff2e4419..a6ee2b9597 100644 --- a/basis/io/files/info/unix/unix-docs.factor +++ b/basis/io/files/info/unix/unix-docs.factor @@ -22,11 +22,11 @@ HELP: file-permissions { "n" integer } } { $description "Returns the Unix file permissions for a given file." } ; -HELP: file-username +HELP: file-user-name { $values { "path" "a pathname string" } { "string" string } } -{ $description "Returns the username for a given file." } ; +{ $description "Returns the user-name for a given file." } ; HELP: file-user-id { $values @@ -110,7 +110,7 @@ HELP: set-file-times HELP: set-file-user { $values { "path" "a pathname string" } { "string/id" "a string or a user id" } } -{ $description "Sets a file's user id from the given user id or username." } ; +{ $description "Sets a file's user id from the given user id or user-name." } ; HELP: set-file-modified-time { $values @@ -258,7 +258,7 @@ ARTICLE: "unix-file-timestamps" "Unix file timestamps" ARTICLE: "unix-file-ids" "Unix file user and group ids" "Reading file user data:" { $subsection file-user-id } -{ $subsection file-username } +{ $subsection file-user-name } "Setting file user data:" { $subsection set-file-user } "Reading file group data:" diff --git a/basis/io/files/info/unix/unix.factor b/basis/io/files/info/unix/unix.factor index 66b95db144..9287e7f4ad 100644 --- a/basis/io/files/info/unix/unix.factor +++ b/basis/io/files/info/unix/unix.factor @@ -243,8 +243,8 @@ M: string set-file-group ( path string -- ) : file-user-id ( path -- uid ) normalize-path file-info uid>> ; -: file-username ( path -- string ) - file-user-id username ; +: file-user-name ( path -- string ) + file-user-id user-name ; : file-group-id ( path -- gid ) normalize-path file-info gid>> ; From cf3473cc911c5f0b404675217d2196e1a080f611 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 8 Jan 2009 17:56:52 -0600 Subject: [PATCH 05/29] Making normalization and case conversion faster --- basis/unicode/case/case.factor | 2 +- basis/unicode/data/data.factor | 9 +++- basis/unicode/normalize/normalize.factor | 56 ++++++++++++++---------- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor index b0472cd9cb..99278cd72e 100644 --- a/basis/unicode/case/case.factor +++ b/basis/unicode/case/case.factor @@ -18,7 +18,7 @@ SYMBOL: locale ! Just casing locale, or overall? char ( name -- char ) name-map at ; : char>name ( char -- name ) name-map value-at ; : property? ( char property -- ? ) properties at interval-key? ; @@ -183,6 +183,13 @@ load-data { [ process-category to: category-map ] } cleave +: postprocess-class ( -- ) + combine-map [ [ second ] map ] map concat + [ combining-class not ] filter + [ 0 swap class-map set-at ] each ; + +postprocess-class + load-special-casing to: special-casing load-properties to: properties diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index 58ce412a2e..c8d0eb3f7d 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: sequences namespaces make unicode.data kernel math arrays -locals sorting.insertion accessors assocs math.order ; +locals sorting.insertion accessors assocs math.order combinators ; IN: unicode.normalize jamo % ] - [ dup quot call [ % ] [ , ] ?if ] if - ] each - ] "" make - dup reorder - ] if ; inline + [ + string [ + dup hangul? [ hangul>jamo % ] + [ dup quot call [ % ] [ , ] ?if ] if + ] each + ] "" make + dup reorder ; + +: with-string ( str quot -- str ) + over aux>> [ call ] [ drop ] if ; inline + +: (nfd) ( string -- nfd ) + [ canonical-entry ] decompose ; + +: (nfkd) ( string -- nfkd ) + [ compatibility-entry ] decompose ; PRIVATE> : nfd ( string -- nfd ) - [ canonical-entry ] decompose ; + [ (nfd) ] with-string ; : nfkd ( string -- nfkd ) - [ compatibility-entry ] decompose ; + [ (nfkd) ] with-string ; : string-append ( s1 s2 -- string ) [ append ] keep @@ -138,20 +142,26 @@ DEFER: compose-iter : compose-iter ( last-class -- ) current [ - dup combining-class - [ try-compose to compose-iter ] - [ swap [ drop ] [ try-noncombining ] if ] if* + dup combining-class { + { f [ 2drop ] } + { 0 [ swap [ drop ] [ try-noncombining ] if ] } + [ try-compose to compose-iter ] + } case ] [ drop ] if* ; : ?new-after ( -- ) after [ dup empty? [ drop SBUF" " clone ] unless ] change ; +: compose-combining ( ch -- ) + char set to ?new-after + f compose-iter + char get , after get % ; + : (compose) ( -- ) current [ dup jamo? [ drop compose-jamo ] [ - char set to ?new-after - f compose-iter - char get , after get % + 1 get-str combining-class + [ compose-combining ] [ , to ] if ] if (compose) ] when* ; @@ -166,7 +176,7 @@ DEFER: compose-iter PRIVATE> : nfc ( string -- nfc ) - nfd combine ; + [ (nfd) combine ] with-string ; : nfkc ( string -- nfkc ) - nfkd combine ; + [ (nfkd) combine ] with-string ; From fe92608a1f753c83f53b9a91f7ba6df05f1ea919 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 18:56:49 -0600 Subject: [PATCH 06/29] Add combinators.smart --- basis/combinators/smart/authors.txt | 1 + basis/combinators/smart/smart-docs.factor | 91 ++++++++++++++++++++++ basis/combinators/smart/smart-tests.factor | 21 +++++ basis/combinators/smart/smart.factor | 22 ++++++ 4 files changed, 135 insertions(+) create mode 100644 basis/combinators/smart/authors.txt create mode 100644 basis/combinators/smart/smart-docs.factor create mode 100644 basis/combinators/smart/smart-tests.factor create mode 100644 basis/combinators/smart/smart.factor diff --git a/basis/combinators/smart/authors.txt b/basis/combinators/smart/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/basis/combinators/smart/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/basis/combinators/smart/smart-docs.factor b/basis/combinators/smart/smart-docs.factor new file mode 100644 index 0000000000..69ec3e7013 --- /dev/null +++ b/basis/combinators/smart/smart-docs.factor @@ -0,0 +1,91 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: help.markup help.syntax kernel quotations math sequences +multiline ; +IN: combinators.smart + +HELP: inputarray +{ $values + { "quot" quotation } + { "newquot" quotation } +} +{ $description "Infers the number or outputs from the quotation and constructs an array from those outputs." } +{ $examples + { $example + <" USING: combinators combinators.smart math prettyprint ; +9 [ + { [ 1- ] [ 1+ ] [ sq ] } cleave +] output>array ."> + "{ 8 10 81 }" + } +} ; + +HELP: output>sequence +{ $values + { "quot" quotation } { "exemplar" "an exemplar" } + { "newquot" quotation } +} +{ $description "Infers the number of outputs from the quotation and constructs a new sequence from those objects of the same type as the exemplar." } +{ $examples + { $example + "USING: combinators.smart kernel math prettyprint ;" + "4 [ [ 1 + ] [ 2 + ] [ 3 + ] tri ] V{ } output>sequence ." + "V{ 5 6 7 }" + } +} ; + +HELP: reduce-output +{ $values + { "quot" quotation } { "operation" quotation } + { "newquot" quotation } +} +{ $description "Infers the number of outputs from " { $snippet "quot" } " and reduces them using " { $snippet "operation" } ". The identity for the " { $link reduce } " operation is the first output." } +{ $examples + { $example + "USING: combinators.smart kernel math prettyprint ;" + "3 [ [ 4 * ] [ 4 / ] [ 4 - ] tri ] [ * ] reduce-output ." + "-9" + } +} ; + +HELP: sum-outputs +{ $values + { "quot" quotation } + { "n" integer } +} +{ $description "Infers the number of outputs from " { $snippet "quot" } " and returns their sum." } +{ $examples + { $example + "USING: combinators.smart kernel math prettyprint ;" + "10 [ [ 1- ] [ 1+ ] bi ] sum-outputs ." + "20" + } +} ; + +ARTICLE: "combinators.smart" "Smart combinators" +"The " { $vocab-link "combinators.smart" } " vocabulary implements " { $emphasis "smart combinators" } ". A smart combinator is one whose behavior depends on the static stack effect of an input quotation." $nl +"Smart inputs from a sequence:" +{ $subsection inputsequence } +{ $subsection output>array } +"Reducing the output of a quotation:" +{ $subsection reduce-output } +"Summing the output of a quotation:" +{ $subsection sum-outputs } ; + +ABOUT: "combinators.smart" diff --git a/basis/combinators/smart/smart-tests.factor b/basis/combinators/smart/smart-tests.factor new file mode 100644 index 0000000000..4be445e465 --- /dev/null +++ b/basis/combinators/smart/smart-tests.factor @@ -0,0 +1,21 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test combinators.smart math kernel ; +IN: combinators.smart.tests + +: test-bi ( -- 9 11 ) + 10 [ 1- ] [ 1+ ] bi ; + +[ [ test-bi ] output>array ] must-infer +[ { 9 11 } ] [ [ test-bi ] output>array ] unit-test + +[ { 9 11 } [ + ] inputsequence ( quot exemplar -- newquot ) + [ dup infer out>> ] dip + '[ @ _ _ nsequence ] ; + +: output>array ( quot -- newquot ) + { } output>sequence ; inline + +MACRO: input> ] keep + '[ _ firstn @ ] ; + +MACRO: reduce-output ( quot operation -- newquot ) + [ dup infer out>> 1 [-] ] dip n*quot compose ; + +: sum-outputs ( quot -- n ) + [ + ] reduce-output ; inline From 932631c901f167c6dd2ea55fbdc4307355fdca12 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 18:57:07 -0600 Subject: [PATCH 07/29] use combinators.smart for bit-count --- basis/math/bitwise/bitwise-tests.factor | 4 ++++ basis/math/bitwise/bitwise.factor | 16 +++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/basis/math/bitwise/bitwise-tests.factor b/basis/math/bitwise/bitwise-tests.factor index 979c62dcfb..40eb20642c 100644 --- a/basis/math/bitwise/bitwise-tests.factor +++ b/basis/math/bitwise/bitwise-tests.factor @@ -32,3 +32,7 @@ IN: math.bitwise.tests [ 8 ] [ 0 3 toggle-bit ] unit-test [ 0 ] [ 8 3 toggle-bit ] unit-test + +[ 4 ] [ BIN: 1010101 bit-count ] unit-test +[ 0 ] [ BIN: 0 bit-count ] unit-test +[ 1 ] [ BIN: 1 bit-count ] unit-test diff --git a/basis/math/bitwise/bitwise.factor b/basis/math/bitwise/bitwise.factor index 2c03164ae7..e60815bf60 100755 --- a/basis/math/bitwise/bitwise.factor +++ b/basis/math/bitwise/bitwise.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: arrays kernel math math.functions sequences sequences.private words namespaces macros hints -combinators fry io.binary ; +combinators fry io.binary combinators.smart ; IN: math.bitwise ! utilities @@ -76,12 +76,14 @@ DEFER: byte-bit-count GENERIC: (bit-count) ( x -- n ) M: fixnum (bit-count) - { - [ byte-bit-count ] - [ -8 shift byte-bit-count ] - [ -16 shift byte-bit-count ] - [ -24 shift byte-bit-count ] - } cleave + + + ; + [ + { + [ byte-bit-count ] + [ -8 shift byte-bit-count ] + [ -16 shift byte-bit-count ] + [ -24 shift byte-bit-count ] + } cleave + ] sum-outputs ; M: bignum (bit-count) dup 0 = [ drop 0 ] [ From e020df3d008a878d5205ab1ecc9763d193d492b5 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 18:57:23 -0600 Subject: [PATCH 08/29] use combinators.smart --- basis/tools/cocoa/cocoa.factor | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/basis/tools/cocoa/cocoa.factor b/basis/tools/cocoa/cocoa.factor index a8cdf6f41c..9dd1895a68 100644 --- a/basis/tools/cocoa/cocoa.factor +++ b/basis/tools/cocoa/cocoa.factor @@ -1,16 +1,18 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: arrays cocoa.messages cocoa.runtime combinators -prettyprint ; +prettyprint combinators.smart ; IN: tools.cocoa : method. ( method -- ) - { - [ method_getName sel_getName ] - [ method-return-type ] - [ method-arg-types ] - [ method_getImplementation ] - } cleave 4array . ; + [ + { + [ method_getName sel_getName ] + [ method-return-type ] + [ method-arg-types ] + [ method_getImplementation ] + } cleave + ] output>array . ; : methods. ( class -- ) [ method. ] each-method-in-class ; From 8cb0be6a0af905c6ed103ac9731070aa1cd8e04b Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 18:57:33 -0600 Subject: [PATCH 09/29] use combinators.smart --- basis/ui/gadgets/buttons/buttons.factor | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/basis/ui/gadgets/buttons/buttons.factor b/basis/ui/gadgets/buttons/buttons.factor index 75469671ef..dabc12d3ae 100644 --- a/basis/ui/gadgets/buttons/buttons.factor +++ b/basis/ui/gadgets/buttons/buttons.factor @@ -6,7 +6,7 @@ classes.tuple opengl opengl.gl math.vectors ui.commands ui.gadgets ui.gadgets.borders ui.gadgets.labels ui.gadgets.theme ui.gadgets.tracks ui.gadgets.packs ui.gadgets.worlds ui.gestures ui.render math.geometry.rect locals alien.c-types -specialized-arrays.float fry ; +specialized-arrays.float fry combinators.smart ; IN: ui.gadgets.buttons TUPLE: button < border pressed? selected? quot ; @@ -111,12 +111,14 @@ TUPLE: checkmark-paint < caching-pen color last-vertices ; array ; : checkmark-vertices ( dim -- vertices ) checkmark-points concat >float-array ; From ad53cb8635ba90bdf50419846e995875d3e20955 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 8 Jan 2009 19:07:46 -0600 Subject: [PATCH 10/29] Privatizing unicode.case:ch>{lower,upper,title} --- basis/ascii/ascii-docs.factor | 28 ++++++++++++++++++- basis/ascii/ascii-tests.factor | 5 ++++ basis/ascii/ascii.factor | 14 ++++++++++ basis/regexp/nfa/nfa.factor | 7 ++++- basis/regexp/parser/parser.factor | 10 +++---- basis/soundex/soundex.factor | 2 +- basis/tr/tr-tests.factor | 2 +- basis/tr/tr.factor | 4 +-- basis/unicode/case/case-docs.factor | 19 ------------- basis/unicode/case/case.factor | 2 +- basis/unicode/data/data.factor | 5 +--- basis/unicode/normalize/normalize.factor | 3 +- basis/xmode/marker/marker.factor | 2 +- .../reverse-complement.factor | 2 +- extra/parser-combinators/regexp/regexp.factor | 2 +- 15 files changed, 67 insertions(+), 40 deletions(-) diff --git a/basis/ascii/ascii-docs.factor b/basis/ascii/ascii-docs.factor index 6af697cf89..4c783e609c 100644 --- a/basis/ascii/ascii-docs.factor +++ b/basis/ascii/ascii-docs.factor @@ -37,6 +37,26 @@ HELP: quotable? { $values { "ch" "a character" } { "?" "a boolean" } } { $description "Tests for characters which may appear in a Factor string literal without escaping." } ; +HELP: ascii? +{ $values { "ch" "a character" } { "?" "a boolean" } } +{ $description "Tests for whether a number is an ASCII character." } ; + +HELP: ch>lower +{ $values { "ch" "a character" } { "lower" "a character" } } +{ $description "Converts an ASCII character to lower case." } ; + +HELP: ch>upper +{ $values { "ch" "a character" } { "upper" "a character" } } +{ $description "Converts an ASCII character to upper case." } ; + +HELP: >lower +{ $values { "str" "a string" } { "lower" "a string" } } +{ $description "Converts an ASCII string to lower case." } ; + +HELP: >upper +{ $values { "str" "a string" } { "upper" "a string" } } +{ $description "Converts an ASCII string to upper case." } ; + ARTICLE: "ascii" "ASCII character classes" "The " { $vocab-link "ascii" } " vocabulary implements traditional ASCII character classes:" { $subsection blank? } @@ -46,6 +66,12 @@ ARTICLE: "ascii" "ASCII character classes" { $subsection printable? } { $subsection control? } { $subsection quotable? } -"Modern applications should use Unicode 5.0 instead (" { $vocab-link "unicode.categories" } ")." ; +{ $subsection ascii? } +"ASCII case conversion is also implemented:" +{ $subsection ch>lower } +{ $subsection ch>upper } +{ $subsection >lower } +{ $subsection >upper } +"Modern applications should use Unicode 5.1 instead (" { $vocab-link "unicode.categories" } ")." ; ABOUT: "ascii" diff --git a/basis/ascii/ascii-tests.factor b/basis/ascii/ascii-tests.factor index 7dacce734b..6f39b32a01 100644 --- a/basis/ascii/ascii-tests.factor +++ b/basis/ascii/ascii-tests.factor @@ -12,3 +12,8 @@ IN: ascii.tests 0 "There are Four Upper Case characters" [ LETTER? [ 1+ ] when ] each ] unit-test + +[ t f ] [ CHAR: \s ascii? 400 ascii? ] unit-test + +[ "HELLO HOW ARE YOU?" ] [ "hellO hOw arE YOU?" >upper ] unit-test +[ "i'm good thx bai" ] [ "I'm Good THX bai" >lower ] unit-test diff --git a/basis/ascii/ascii.factor b/basis/ascii/ascii.factor index c009c66cde..a64a7b8eb5 100644 --- a/basis/ascii/ascii.factor +++ b/basis/ascii/ascii.factor @@ -4,6 +4,8 @@ USING: kernel math math.order sequences combinators.short-circuit ; IN: ascii +: ascii? ( ch -- ? ) 0 127 between? ; inline + : blank? ( ch -- ? ) " \t\n\r" member? ; inline : letter? ( ch -- ? ) CHAR: a CHAR: z between? ; inline @@ -25,3 +27,15 @@ IN: ascii : alpha? ( ch -- ? ) [ [ Letter? ] [ digit? ] ] 1|| ; + +: ch>lower ( ch -- lower ) + dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ; + +: >lower ( str -- lower ) + [ ch>lower ] map ; + +: ch>upper ( ch -- upper ) + dup CHAR: a CHAR: z between? [ HEX: 20 - ] when ; + +: >upper ( str -- upper ) + [ ch>upper ] map ; diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index 7620652948..dd116f3d7a 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -3,7 +3,10 @@ USING: accessors arrays assocs grouping kernel regexp.backend locals math namespaces regexp.parser sequences fry quotations math.order math.ranges vectors unicode.categories regexp.utils -regexp.transition-tables words sets regexp.classes unicode.case ; +regexp.transition-tables words sets regexp.classes unicode.case.private ; +! This uses unicode.case.private for ch>upper and ch>lower +! but case-insensitive matching should be done by case-folding everything +! before processing starts IN: regexp.nfa SYMBOL: negation-mode @@ -160,6 +163,8 @@ M: LETTER-class nfa-node ( node -- ) M: character-class-range nfa-node ( node -- ) case-insensitive option? [ + ! This should be implemented for Unicode by case-folding + ! the input and all strings in the regexp. dup [ from>> ] [ to>> ] bi 2dup [ Letter? ] bi@ and [ rot drop diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index 25509ec798..2f397538a0 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -3,8 +3,8 @@ USING: accessors arrays assocs combinators io io.streams.string kernel math math.parser namespaces sets quotations sequences splitting vectors math.order -unicode.categories strings regexp.backend regexp.utils -unicode.case words locals regexp.classes ; +strings regexp.backend regexp.utils +unicode.case unicode.categories words locals regexp.classes ; IN: regexp.parser FROM: math.ranges => [a,b] ; @@ -261,7 +261,7 @@ ERROR: bad-escaped-literals seq ; parse-til-E drop1 [ epsilon ] [ - [ quot call ] V{ } map-as + quot call [ ] V{ } map-as first|concatenation ] if-empty ; inline @@ -269,10 +269,10 @@ ERROR: bad-escaped-literals seq ; [ ] (parse-escaped-literals) ; : lower-case-literals ( -- obj ) - [ ch>lower ] (parse-escaped-literals) ; + [ >lower ] (parse-escaped-literals) ; : upper-case-literals ( -- obj ) - [ ch>upper ] (parse-escaped-literals) ; + [ >upper ] (parse-escaped-literals) ; : parse-escaped ( -- obj ) read1 diff --git a/basis/soundex/soundex.factor b/basis/soundex/soundex.factor index 416ec4a6bc..164f634185 100644 --- a/basis/soundex/soundex.factor +++ b/basis/soundex/soundex.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: sequences grouping assocs kernel ascii unicode.case tr ; +USING: sequences grouping assocs kernel ascii ascii tr ; IN: soundex TR: soundex-tr diff --git a/basis/tr/tr-tests.factor b/basis/tr/tr-tests.factor index c168f5384d..3434c28216 100644 --- a/basis/tr/tr-tests.factor +++ b/basis/tr/tr-tests.factor @@ -1,5 +1,5 @@ IN: tr.tests -USING: tr tools.test unicode.case ; +USING: tr tools.test ascii ; TR: tr-test ch>upper "ABC" "XYZ" ; diff --git a/basis/tr/tr.factor b/basis/tr/tr.factor index 66d8df7d44..ce535f335a 100644 --- a/basis/tr/tr.factor +++ b/basis/tr/tr.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: byte-arrays strings sequences sequences.private +USING: byte-arrays strings sequences sequences.private ascii fry kernel words parser lexer assocs math math.order summary ; IN: tr @@ -11,8 +11,6 @@ M: bad-tr summary lower } { $subsection >title } { $subsection >case-fold } -"There are analogous routines which operate on individual code points, but these should " { $emphasis "not be used" } " in general as they have slightly different behavior. In some cases, for example, they do not perform the case operation, as a single code point must expand to more than one." -{ $subsection ch>upper } -{ $subsection ch>lower } -{ $subsection ch>title } "To test if a string is in a given case:" { $subsection upper? } { $subsection lower? } @@ -53,18 +49,3 @@ HELP: title? HELP: case-fold? { $values { "string" string } { "?" "a boolean" } } { $description "Tests if a string is in case-folded form." } ; - -HELP: ch>lower -{ $values { "ch" "a code point" } { "lower" "a code point" } } -{ $description "Converts a code point to lower case." } -{ $warning "Don't use this unless you know what you're doing! " { $code ">lower" } " is not the same as " { $code "[ ch>lower ] map" } "." } ; - -HELP: ch>upper -{ $values { "ch" "a code point" } { "upper" "a code point" } } -{ $description "Converts a code point to upper case." } -{ $warning "Don't use this unless you know what you're doing! " { $code ">upper" } " is not the same as " { $code "[ ch>upper ] map" } "." } ; - -HELP: ch>title -{ $values { "ch" "a code point" } { "title" "a code point" } } -{ $description "Converts a code point to title case." } -{ $warning "Don't use this unless you know what you're doing! " { $code ">title" } " is not the same as " { $code "[ ch>title ] map" } "." } ; diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor index 99278cd72e..c800205704 100644 --- a/basis/unicode/case/case.factor +++ b/basis/unicode/case/case.factor @@ -7,11 +7,11 @@ IN: unicode.case : ch>lower ( ch -- lower ) simple-lower at-default ; : ch>upper ( ch -- upper ) simple-upper at-default ; : ch>title ( ch -- title ) simple-title at-default ; +PRIVATE> SYMBOL: locale ! Just casing locale, or overall? diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor index 61a93d9375..6cf913bffa 100644 --- a/basis/unicode/data/data.factor +++ b/basis/unicode/data/data.factor @@ -128,12 +128,9 @@ VALUE: properties cat categories index char table ?set-nth ] assoc-each table fill-ranges ] ; -: ascii-lower ( string -- lower ) - [ dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ] map ; - : process-names ( data -- names-hash ) 1 swap (process-data) [ - ascii-lower { { CHAR: \s CHAR: - } } substitute swap + >lower { { CHAR: \s CHAR: - } } substitute swap ] H{ } assoc-map-as ; : multihex ( hexstring -- string ) diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index c8d0eb3f7d..2fbe2e1843 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: sequences namespaces make unicode.data kernel math arrays -locals sorting.insertion accessors assocs math.order combinators ; +locals sorting.insertion accessors assocs math.order combinators +unicode.syntax ; IN: unicode.normalize upper "ACGTUMRYKVHDB" "TGCAAKYRMBDHV" ; diff --git a/extra/parser-combinators/regexp/regexp.factor b/extra/parser-combinators/regexp/regexp.factor index 2becd937f2..1c94308e93 100755 --- a/extra/parser-combinators/regexp/regexp.factor +++ b/extra/parser-combinators/regexp/regexp.factor @@ -2,7 +2,7 @@ USING: arrays combinators kernel lists math math.parser namespaces parser lexer parser-combinators parser-combinators.simple promises quotations sequences strings math.order assocs prettyprint.backend prettyprint.custom memoize -unicode.case unicode.categories combinators.short-circuit +ascii unicode.categories combinators.short-circuit accessors make io ; IN: parser-combinators.regexp From 58df6dad6f44ce45d5182a594d91e60f475ca42e Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 8 Jan 2009 22:23:39 -0600 Subject: [PATCH 11/29] Unicode cleanup and optimization --- basis/unicode/breaks/breaks.factor | 22 ++++++++++++---------- basis/unicode/normalize/normalize.factor | 13 ++++++------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index 1d2f821750..df3b2f03e8 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -192,22 +192,22 @@ to: word-table : word-table-nth ( class1 class2 -- ? ) word-table nth nth ; -: property-not= ( i str property -- ? ) - pick [ - [ ?nth ] dip swap - [ word-break-prop = not ] [ drop f ] if* - ] [ 3drop t ] if ; +:: property-not= ( i str property -- ? ) + i [ + i str ?nth [ word-break-prop property = not ] + [ f ] if* + ] [ t ] if ; : format/extended? ( ch -- ? ) word-break-prop { 4 5 } member? ; :: walk-up ( str i -- j ) i 1 + str [ format/extended? not ] find-from drop - 1+ str [ format/extended? not ] find-from drop ; ! possible bounds error? + [ 1+ str [ format/extended? not ] find-from drop ] [ f ] if* ; :: walk-down ( str i -- j ) i str [ format/extended? not ] find-last-from drop - 1- str [ format/extended? not ] find-last-from drop ; ! possible bounds error? + [ 1- str [ format/extended? not ] find-last-from drop ] [ f ] if* ; :: word-break? ( table-entry i str -- ? ) table-entry { @@ -224,9 +224,11 @@ to: word-table } case ; :: word-break-next ( old-class new-char i str -- next-class ? ) - new-char word-break-prop dup { 4 5 } member? - [ drop old-class dup { 1 2 3 } member? ] - [ old-class over word-table-nth i str word-break? ] if ; + new-char dup format/extended? + [ drop old-class dup { 1 2 3 } member? ] [ + word-break-prop old-class over word-table-nth + i str word-break? + ] if ; PRIVATE> diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index 2fbe2e1843..7a41a768cd 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: sequences namespaces make unicode.data kernel math arrays locals sorting.insertion accessors assocs math.order combinators -unicode.syntax ; +unicode.syntax strings sbufs ; IN: unicode.normalize ] | string [ - dup hangul? [ hangul>jamo % ] - [ dup quot call [ % ] [ , ] ?if ] if - ] each - ] "" make - dup reorder ; + dup hangul? [ hangul>jamo out push-all ] + [ dup quot call [ out push-all ] [ out push ] ?if ] if + ] each out >string + ] dup reorder ; : with-string ( str quot -- str ) over aux>> [ call ] [ drop ] if ; inline From 8d8efb6dcedb4be96b0f67eb2d89743d77a6a549 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 8 Jan 2009 22:49:24 -0600 Subject: [PATCH 12/29] Fixing Unicode bootstrap issue (hopefully) --- basis/bootstrap/unicode/unicode.factor | 5 ----- basis/unicode/case/case.factor | 2 +- basis/unicode/data/data.factor | 6 +++++- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/basis/bootstrap/unicode/unicode.factor b/basis/bootstrap/unicode/unicode.factor index 1e9f8b8642..e69de29bb2 100644 --- a/basis/bootstrap/unicode/unicode.factor +++ b/basis/bootstrap/unicode/unicode.factor @@ -1,5 +0,0 @@ -USING: strings.parser kernel namespaces unicode unicode.data ; -IN: bootstrap.unicode - -[ name>char [ "Invalid character" throw ] unless* ] -name>char-hook set-global diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor index c800205704..773bbeed5f 100644 --- a/basis/unicode/case/case.factor +++ b/basis/unicode/case/case.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: unicode.data sequences sequences.next namespaces make +USING: unicode.data sequences sequences.next namespaces make unicode.syntax unicode.normalize math unicode.categories combinators unicode.syntax assocs strings splitting kernel accessors unicode.breaks fry ; IN: unicode.case diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor index 6cf913bffa..e78b4c104a 100644 --- a/basis/unicode/data/data.factor +++ b/basis/unicode/data/data.factor @@ -4,7 +4,8 @@ USING: combinators.short-circuit assocs math kernel sequences io.files hashtables quotations splitting grouping arrays io math.parser hash2 math.order byte-arrays words namespaces words compiler.units parser io.encodings.ascii values interval-maps -ascii sets combinators locals math.ranges sorting make io.encodings.utf8 ; +ascii sets combinators locals math.ranges sorting make +strings.parser io.encodings.utf8 ; IN: unicode.data VALUE: simple-lower @@ -218,3 +219,6 @@ SYMBOL: interned : load-script ( filename -- table ) ascii parse-script process-script ; + +[ name>char [ "Invalid character" throw ] unless* ] +name>char-hook set-global From e661c67189bd27ae64977ef4f54386db9bec3d8d Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 8 Jan 2009 23:09:38 -0600 Subject: [PATCH 13/29] refactor a word to use smart combinators --- .../tree/propagation/inlining/inlining.factor | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/basis/compiler/tree/propagation/inlining/inlining.factor b/basis/compiler/tree/propagation/inlining/inlining.factor index bd6d657442..7b3135e85c 100644 --- a/basis/compiler/tree/propagation/inlining/inlining.factor +++ b/basis/compiler/tree/propagation/inlining/inlining.factor @@ -3,7 +3,7 @@ USING: accessors kernel arrays sequences math math.order math.partial-dispatch generic generic.standard generic.math classes.algebra classes.union sets quotations assocs combinators -words namespaces continuations classes fry +words namespaces continuations classes fry combinators.smart compiler.tree compiler.tree.builder compiler.tree.recursive @@ -134,17 +134,19 @@ DEFER: (flat-length) over 2 <= [ drop ] [ 2/ 1+ * ] if 24 swap [-] 4 /i ; : inlining-rank ( #call word -- n ) - [ classes-known? 2 0 ? ] [ - { - [ body-length-bias ] - [ "default" word-prop -4 0 ? ] - [ "specializer" word-prop 1 0 ? ] - [ method-body? 1 0 ? ] - } cleave - node-count-bias - loop-nesting get 0 or 2 * - ] bi* + + + + + + ; + [ classes-known? 2 0 ? ] + [ + { + [ body-length-bias ] + [ "default" word-prop -4 0 ? ] + [ "specializer" word-prop 1 0 ? ] + [ method-body? 1 0 ? ] + } cleave + node-count-bias + loop-nesting get 0 or 2 * + ] bi* + ] sum-outputs ; : should-inline? ( #call word -- ? ) dup "inline" word-prop [ 2drop t ] [ inlining-rank 5 >= ] if ; From 1ed964e53989341a94875dcf5d547dbba9b158e9 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Fri, 9 Jan 2009 14:03:33 -0600 Subject: [PATCH 14/29] Speeding up normalization --- basis/unicode/case/case.factor | 57 ++++--- .../unicode/normalize/normalize-tests.factor | 2 + basis/unicode/normalize/normalize.factor | 146 ++++++++++-------- 3 files changed, 114 insertions(+), 91 deletions(-) diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor index 773bbeed5f..555a39ac88 100644 --- a/basis/unicode/case/case.factor +++ b/basis/unicode/case/case.factor @@ -1,16 +1,18 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: unicode.data sequences sequences.next namespaces make unicode.syntax -unicode.normalize math unicode.categories combinators unicode.syntax -assocs strings splitting kernel accessors unicode.breaks fry ; +USING: unicode.data sequences sequences.next namespaces +sbufs make unicode.syntax unicode.normalize math hints +unicode.categories combinators unicode.syntax assocs +strings splitting kernel accessors unicode.breaks fry locals ; +QUALIFIED: ascii IN: unicode.case lower ( ch -- lower ) simple-lower at-default ; -: ch>upper ( ch -- upper ) simple-upper at-default ; -: ch>title ( ch -- title ) simple-title at-default ; +: ch>lower ( ch -- lower ) simple-lower at-default ; inline +: ch>upper ( ch -- upper ) simple-upper at-default ; inline +: ch>title ( ch -- title ) simple-title at-default ; inline PRIVATE> SYMBOL: locale ! Just casing locale, or overall? @@ -21,7 +23,7 @@ SYMBOL: locale ! Just casing locale, or overall? [ dup ] swap '[ _ split1-slice swap ] [ ] produce nip ; : replace ( old new str -- newstr ) - [ split-subseq ] dip join ; + [ split-subseq ] dip join ; inline : i-dot? ( -- ? ) locale get { "tr" "az" } member? ; @@ -44,24 +46,24 @@ SYMBOL: locale ! Just casing locale, or overall? [ [ "" ] [ dup first mark-above? [ CHAR: combining-dot-above prefix ] when - ] if-empty ] with-rest ; + ] if-empty ] with-rest ; inline : lithuanian>lower ( string -- lower ) "i" split add-dots "i" join - "j" split add-dots "i" join ; + "j" split add-dots "i" join ; inline : turk>upper ( string -- upper-i ) - "i" "I\u000307" replace ; + "i" "I\u000307" replace ; inline : turk>lower ( string -- lower-i ) "I\u000307" "i" replace - "I" "\u000131" replace ; + "I" "\u000131" replace ; inline : fix-sigma-end ( string -- string ) [ "" ] [ dup peek CHAR: greek-small-letter-sigma = [ 1 head* CHAR: greek-small-letter-final-sigma suffix ] when - ] if-empty ; + ] if-empty ; inline : sigma-map ( string -- string ) { CHAR: greek-capital-letter-sigma } split [ [ @@ -70,19 +72,20 @@ SYMBOL: locale ! Just casing locale, or overall? CHAR: greek-small-letter-final-sigma CHAR: greek-small-letter-sigma ? prefix ] if-empty - ] map ] with-rest concat fix-sigma-end ; + ] map ] with-rest concat fix-sigma-end ; inline : final-sigma ( string -- string ) CHAR: greek-capital-letter-sigma - over member? [ sigma-map ] when ; + over member? [ sigma-map ] when + "" like ; inline -: map-case ( string string-quot char-quot -- case ) - [ - [ - [ dup special-casing at ] 2dip - [ [ % ] compose ] [ [ , ] compose ] bi* ?if - ] 2curry each - ] "" make ; inline +:: map-case ( string string-quot char-quot -- case ) + string length :> out + string [ + dup special-casing at + [ string-quot call out push-all ] + [ char-quot call out push ] ?if + ] each out "" like ; inline PRIVATE> @@ -90,24 +93,30 @@ PRIVATE> i-dot? [ turk>lower ] when final-sigma [ lower>> ] [ ch>lower ] map-case ; +HINTS: >lower string ; + : >upper ( string -- upper ) i-dot? [ turk>upper ] when [ upper>> ] [ ch>upper ] map-case ; +HINTS: >upper string ; + title) ( string -- title ) i-dot? [ turk>upper ] when - [ title>> ] [ ch>title ] map-case ; + [ title>> ] [ ch>title ] map-case ; inline : title-word ( string -- title ) - unclip 1string [ >lower ] [ (>title) ] bi* prepend ; + unclip 1string [ >lower ] [ (>title) ] bi* prepend ; inline PRIVATE> : >title ( string -- title ) final-sigma >words [ title-word ] map concat ; +HINTS: >title string ; + : >case-fold ( string -- fold ) >upper >lower ; diff --git a/basis/unicode/normalize/normalize-tests.factor b/basis/unicode/normalize/normalize-tests.factor index 25d5ce365c..1242e1d358 100644 --- a/basis/unicode/normalize/normalize-tests.factor +++ b/basis/unicode/normalize/normalize-tests.factor @@ -3,6 +3,8 @@ unicode.data io.encodings.utf8 io.files splitting math.parser locals math quotations assocs combinators unicode.normalize.private ; IN: unicode.normalize.tests +{ nfc nfkc nfd nfkd } [ must-infer ] each + [ "ab\u000323\u000302cd" ] [ "ab\u000302" "\u000323cd" string-append ] unit-test [ "ab\u00064b\u000347\u00034e\u00034d\u000346" ] [ "ab\u000346\u000347\u00064b\u00034e\u00034d" dup reorder ] unit-test diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index 7a41a768cd..f7aa248028 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: sequences namespaces make unicode.data kernel math arrays +USING: ascii sequences namespaces make unicode.data kernel math arrays locals sorting.insertion accessors assocs math.order combinators -unicode.syntax strings sbufs ; +unicode.syntax strings sbufs hints combinators.short-circuit vectors ; IN: unicode.normalize jamo ( hangul -- jamo-string ) hangul-base - final-count /mod final-base + @@ -48,16 +48,16 @@ CONSTANT: final-count 28 : reorder-slice ( string start -- slice done? ) 2dup swap [ non-starter? not ] find-from drop - [ [ over length ] unless* rot ] keep not ; + [ [ over length ] unless* rot ] keep not ; inline : reorder-next ( string i -- new-i done? ) over [ non-starter? ] find-from drop [ reorder-slice [ dup [ combining-class ] insertion-sort to>> ] dip - ] [ length t ] if* ; + ] [ length t ] if* ; inline : reorder-loop ( string start -- ) - dupd reorder-next [ 2drop ] [ reorder-loop ] if ; + dupd reorder-next [ 2drop ] [ reorder-loop ] if ; inline recursive : reorder ( string -- ) 0 reorder-loop ; @@ -66,12 +66,14 @@ CONSTANT: final-count 28 over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ; :: decompose ( string quot -- decomposed ) - [let | out [ string length ] | - string [ + string length :> out + string [ + >fixnum dup ascii? [ out push ] [ dup hangul? [ hangul>jamo out push-all ] [ dup quot call [ out push-all ] [ out push ] ?if ] if - ] each out >string - ] dup reorder ; + ] if + ] each + out "" like dup reorder ; inline : with-string ( str quot -- str ) over aux>> [ call ] [ drop ] if ; inline @@ -79,9 +81,13 @@ CONSTANT: final-count 28 : (nfd) ( string -- nfd ) [ canonical-entry ] decompose ; +HINTS: (nfd) string ; + : (nfkd) ( string -- nfkd ) [ compatibility-entry ] decompose ; +HINTS: (nfkd) string ; + PRIVATE> : nfd ( string -- nfd ) @@ -95,83 +101,89 @@ PRIVATE> 0 over ?nth non-starter? [ length dupd reorder-back ] [ drop ] if ; +HINTS: string-append string string ; + hangul , ] + [ 3 + ] 2bi ; -: imf, ( -- ) - current to current to current jamo>hangul , ; +: im, ( str i -- str i ) + [ tail-slice first2 final-base jamo>hangul , ] + [ 2 + ] 2bi ; -: im, ( -- ) - current to current final-base jamo>hangul , ; +: compose-jamo ( str i -- str i ) + 2dup initial-medial? [ + 2dup --final? [ imf, ] [ im, ] if + ] [ 2dup swap nth , 1+ ] if ; -: compose-jamo ( -- ) - initial-medial? [ - --final? [ imf, ] [ im, ] if - ] [ current , ] if to ; +: pass-combining ( str -- str i ) + dup [ non-starter? not ] find drop + [ dup length ] unless* + 2dup head-slice % ; -: pass-combining ( -- ) - current non-starter? [ current , to pass-combining ] when ; +TUPLE: compose-state i str char after last-class ; -:: try-compose ( last-class new-char current-class -- new-class ) - last-class current-class = [ new-char after get push last-class ] [ - char get new-char combine-chars - [ char set last-class ] - [ new-char after get push current-class ] if* +: get-str ( state i -- ch ) + swap [ i>> + ] [ str>> ] bi ?nth ; +: current ( state -- ch ) 0 get-str ; +: to ( state -- state ) [ 1+ ] change-i ; +: push-after ( ch state -- state ) [ ?push ] change-after ; + +:: try-compose ( state new-char current-class -- state ) + state last-class>> current-class = + [ new-char state push-after ] [ + state char>> new-char combine-chars + [ state swap >>char ] [ + new-char state push-after + current-class >>last-class + ] if* ] if ; DEFER: compose-iter -: try-noncombining ( char -- ) - char get swap combine-chars - [ char set to f compose-iter ] when* ; +: try-noncombining ( char state -- state ) + tuck char>> swap combine-chars + [ >>char to f >>last-class compose-iter ] when* ; -: compose-iter ( last-class -- ) - current [ +: compose-iter ( state -- state ) + dup current [ dup combining-class { - { f [ 2drop ] } - { 0 [ swap [ drop ] [ try-noncombining ] if ] } + { f [ drop ] } + { 0 [ + over last-class>> + [ drop ] [ swap try-noncombining ] if ] } [ try-compose to compose-iter ] } case - ] [ drop ] if* ; + ] when* ; -: ?new-after ( -- ) - after [ dup empty? [ drop SBUF" " clone ] unless ] change ; +: compose-combining ( ch str i -- str i ) + compose-state new + swap >>i + swap >>str + swap >>char + compose-iter + { [ char>> , ] [ after>> % ] [ str>> ] [ i>> ] } cleave ; -: compose-combining ( ch -- ) - char set to ?new-after - f compose-iter - char get , after get % ; - -: (compose) ( -- ) - current [ - dup jamo? [ drop compose-jamo ] [ - 1 get-str combining-class - [ compose-combining ] [ , to ] if +:: (compose) ( str i -- ) + i str ?nth [ + dup jamo? [ drop str i compose-jamo ] [ + i 1+ str ?nth combining-class + [ str i 1+ compose-combining ] [ , str i 1+ ] if ] if (compose) ] when* ; : combine ( str -- comp ) - [ - main-str set - 0 ind set - SBUF" " clone after set - pass-combining (compose) - ] "" make ; + [ pass-combining (compose) ] "" make ; PRIVATE> From 69e4fe1f766ee410567c4b86741f3fe499adf9df Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 14:31:02 -0600 Subject: [PATCH 15/29] fix bootstrap on a couple platforms --- basis/io/files/info/unix/freebsd/freebsd.factor | 2 +- basis/io/files/info/unix/linux/linux.factor | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/io/files/info/unix/freebsd/freebsd.factor b/basis/io/files/info/unix/freebsd/freebsd.factor index 398e4ff968..11025e14e6 100644 --- a/basis/io/files/info/unix/freebsd/freebsd.factor +++ b/basis/io/files/info/unix/freebsd/freebsd.factor @@ -3,7 +3,7 @@ USING: accessors alien.c-types alien.syntax combinators io.backend io.files io.files.info io.files.unix kernel math system unix unix.statfs.freebsd unix.statvfs.freebsd unix.getfsstat.freebsd -sequences grouping alien.strings io.encodings.utf8 +sequences grouping alien.strings io.encodings.utf8 unix.types specialized-arrays.direct.uint arrays io.files.info.unix ; IN: io.files.info.unix.freebsd diff --git a/basis/io/files/info/unix/linux/linux.factor b/basis/io/files/info/unix/linux/linux.factor index 60313b3306..b447b6e54f 100644 --- a/basis/io/files/info/unix/linux/linux.factor +++ b/basis/io/files/info/unix/linux/linux.factor @@ -5,7 +5,7 @@ io.backend io.encodings.utf8 io.files io.files.info io.streams.string io.files.unix kernel math.order namespaces sequences sorting system unix unix.statfs.linux unix.statvfs.linux io.files.links specialized-arrays.direct.uint arrays io.files.info.unix assocs -io.pathnames ; +io.pathnames unix.types ; IN: io.files.info.unix.linux TUPLE: linux-file-system-info < unix-file-system-info From fe2a43b481a93170bc38666d0b6cdf89d5862f21 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 14:41:00 -0600 Subject: [PATCH 16/29] fix bootstrap --- basis/unix/statfs/freebsd/freebsd.factor | 2 +- basis/unix/statfs/linux/linux.factor | 2 +- basis/unix/statfs/macosx/macosx.factor | 3 ++- basis/unix/statfs/openbsd/openbsd.factor | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/basis/unix/statfs/freebsd/freebsd.factor b/basis/unix/statfs/freebsd/freebsd.factor index e6a033e09d..efd12b7d6c 100644 --- a/basis/unix/statfs/freebsd/freebsd.factor +++ b/basis/unix/statfs/freebsd/freebsd.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.syntax ; +USING: alien.syntax unix.types ; IN: unix.statfs.freebsd CONSTANT: MFSNAMELEN 16 ! length of type name including null */ diff --git a/basis/unix/statfs/linux/linux.factor b/basis/unix/statfs/linux/linux.factor index 6550ee572e..20688680fb 100644 --- a/basis/unix/statfs/linux/linux.factor +++ b/basis/unix/statfs/linux/linux.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.syntax ; +USING: alien.syntax unix.types ; IN: unix.statfs.linux C-STRUCT: statfs64 diff --git a/basis/unix/statfs/macosx/macosx.factor b/basis/unix/statfs/macosx/macosx.factor index f80eb29ccd..c262949730 100644 --- a/basis/unix/statfs/macosx/macosx.factor +++ b/basis/unix/statfs/macosx/macosx.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: alien.c-types io.encodings.utf8 io.encodings.string kernel sequences unix.stat accessors unix combinators math -grouping system alien.strings math.bitwise alien.syntax ; +grouping system alien.strings math.bitwise alien.syntax +unix.types ; IN: unix.statfs.macosx CONSTANT: MNT_RDONLY HEX: 00000001 diff --git a/basis/unix/statfs/openbsd/openbsd.factor b/basis/unix/statfs/openbsd/openbsd.factor index f495f2af4e..456883514a 100644 --- a/basis/unix/statfs/openbsd/openbsd.factor +++ b/basis/unix/statfs/openbsd/openbsd.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.syntax ; +USING: alien.syntax unix.types ; IN: unix.statfs.openbsd CONSTANT: MFSNAMELEN 16 From c8fe4b21e7d56088a9f8145078f603a9ebce94f5 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 15:39:47 -0600 Subject: [PATCH 17/29] Rename reduce-output -> reduce-outputs --- basis/combinators/smart/smart-docs.factor | 6 +++--- basis/combinators/smart/smart-tests.factor | 4 ++-- basis/combinators/smart/smart.factor | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/basis/combinators/smart/smart-docs.factor b/basis/combinators/smart/smart-docs.factor index 69ec3e7013..3df709c9fa 100644 --- a/basis/combinators/smart/smart-docs.factor +++ b/basis/combinators/smart/smart-docs.factor @@ -48,7 +48,7 @@ HELP: output>sequence } } ; -HELP: reduce-output +HELP: reduce-outputs { $values { "quot" quotation } { "operation" quotation } { "newquot" quotation } @@ -57,7 +57,7 @@ HELP: reduce-output { $examples { $example "USING: combinators.smart kernel math prettyprint ;" - "3 [ [ 4 * ] [ 4 / ] [ 4 - ] tri ] [ * ] reduce-output ." + "3 [ [ 4 * ] [ 4 / ] [ 4 - ] tri ] [ * ] reduce-outputs ." "-9" } } ; @@ -84,7 +84,7 @@ ARTICLE: "combinators.smart" "Smart combinators" { $subsection output>sequence } { $subsection output>array } "Reducing the output of a quotation:" -{ $subsection reduce-output } +{ $subsection reduce-outputs } "Summing the output of a quotation:" { $subsection sum-outputs } ; diff --git a/basis/combinators/smart/smart-tests.factor b/basis/combinators/smart/smart-tests.factor index 4be445e465..54c53477db 100644 --- a/basis/combinators/smart/smart-tests.factor +++ b/basis/combinators/smart/smart-tests.factor @@ -14,8 +14,8 @@ IN: combinators.smart.tests -[ 6 ] [ [ 1 2 3 ] [ + ] reduce-output ] unit-test +[ 6 ] [ [ 1 2 3 ] [ + ] reduce-outputs ] unit-test -[ [ 1 2 3 ] [ + ] reduce-output ] must-infer +[ [ 1 2 3 ] [ + ] reduce-outputs ] must-infer [ 6 ] [ [ 1 2 3 ] sum-outputs ] unit-test diff --git a/basis/combinators/smart/smart.factor b/basis/combinators/smart/smart.factor index fcd28aac74..7a68cb5c1c 100644 --- a/basis/combinators/smart/smart.factor +++ b/basis/combinators/smart/smart.factor @@ -15,8 +15,8 @@ MACRO: input> ] keep '[ _ firstn @ ] ; -MACRO: reduce-output ( quot operation -- newquot ) +MACRO: reduce-outputs ( quot operation -- newquot ) [ dup infer out>> 1 [-] ] dip n*quot compose ; : sum-outputs ( quot -- n ) - [ + ] reduce-output ; inline + [ + ] reduce-outputs ; inline From fb25d04061d595050533b9b98eceda8c7e74dfe1 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Fri, 9 Jan 2009 15:53:35 -0600 Subject: [PATCH 18/29] Optimizing and cleaning up unicode.breaks and unicode.normalize --- basis/unicode/breaks/breaks.factor | 127 +++++++++++------------ basis/unicode/normalize/normalize.factor | 20 ++-- 2 files changed, 72 insertions(+), 75 deletions(-) diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index df3b2f03e8..10bc235805 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -4,7 +4,8 @@ USING: combinators.short-circuit unicode.categories kernel math combinators splitting sequences math.parser io.files io assocs arrays namespaces make math.ranges unicode.normalize.private values io.encodings.ascii unicode.syntax unicode.data compiler.units fry -alien.syntax sets accessors interval-maps memoize locals words ; +alien.syntax sets accessors interval-maps memoize locals words +strings hints ; IN: unicode.breaks : first-grapheme ( str -- i ) unclip-slice grapheme-class over - [ grapheme-class tuck grapheme-break? ] find-index + [ grapheme-class tuck grapheme-break? ] find drop nip swap length or 1+ ; : last-grapheme ( str -- i ) unclip-last-slice grapheme-class swap - [ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ; + [ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ; -:: first-word ( str -- i ) - str unclip-slice word-break-prop over - [ swap str word-break-next ] assoc-find 2drop - nip swap length or 1+ ; +: first-word ( str -- i ) + [ unclip-slice word-break-prop over ] keep + '[ swap _ word-break-next ] assoc-find 2drop + nip swap length or 1+ ; inline + +HINTS: first-word string ; : >words ( str -- words ) [ first-word ] >pieces ; + +HINTS: >words string ; diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor index f7aa248028..892379dc89 100644 --- a/basis/unicode/normalize/normalize.factor +++ b/basis/unicode/normalize/normalize.factor @@ -134,10 +134,10 @@ HINTS: string-append string string ; TUPLE: compose-state i str char after last-class ; : get-str ( state i -- ch ) - swap [ i>> + ] [ str>> ] bi ?nth ; -: current ( state -- ch ) 0 get-str ; -: to ( state -- state ) [ 1+ ] change-i ; -: push-after ( ch state -- state ) [ ?push ] change-after ; + swap [ i>> + ] [ str>> ] bi ?nth ; inline +: current ( state -- ch ) 0 get-str ; inline +: to ( state -- state ) [ 1+ ] change-i ; inline +: push-after ( ch state -- state ) [ ?push ] change-after ; inline :: try-compose ( state new-char current-class -- state ) state last-class>> current-class = @@ -147,13 +147,13 @@ TUPLE: compose-state i str char after last-class ; new-char state push-after current-class >>last-class ] if* - ] if ; + ] if ; inline DEFER: compose-iter : try-noncombining ( char state -- state ) tuck char>> swap combine-chars - [ >>char to f >>last-class compose-iter ] when* ; + [ >>char to f >>last-class compose-iter ] when* ; inline : compose-iter ( state -- state ) dup current [ @@ -164,7 +164,7 @@ DEFER: compose-iter [ drop ] [ swap try-noncombining ] if ] } [ try-compose to compose-iter ] } case - ] when* ; + ] when* ; inline recursive : compose-combining ( ch str i -- str i ) compose-state new @@ -172,7 +172,7 @@ DEFER: compose-iter swap >>str swap >>char compose-iter - { [ char>> , ] [ after>> % ] [ str>> ] [ i>> ] } cleave ; + { [ char>> , ] [ after>> % ] [ str>> ] [ i>> ] } cleave ; inline :: (compose) ( str i -- ) i str ?nth [ @@ -180,11 +180,13 @@ DEFER: compose-iter i 1+ str ?nth combining-class [ str i 1+ compose-combining ] [ , str i 1+ ] if ] if (compose) - ] when* ; + ] when* ; inline recursive : combine ( str -- comp ) [ pass-combining (compose) ] "" make ; +HINTS: combine string ; + PRIVATE> : nfc ( string -- nfc ) From 7b36938e8c67db929b1a34b4ac6d7771fe442b18 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 16:07:16 -0600 Subject: [PATCH 19/29] use unix.stat to fix bootstrap --- basis/unix/statfs/freebsd/freebsd.factor | 2 +- basis/unix/statfs/linux/linux.factor | 2 +- basis/unix/statfs/openbsd/openbsd.factor | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/basis/unix/statfs/freebsd/freebsd.factor b/basis/unix/statfs/freebsd/freebsd.factor index efd12b7d6c..70e2d5e561 100644 --- a/basis/unix/statfs/freebsd/freebsd.factor +++ b/basis/unix/statfs/freebsd/freebsd.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.syntax unix.types ; +USING: alien.syntax unix.types unix.stat ; IN: unix.statfs.freebsd CONSTANT: MFSNAMELEN 16 ! length of type name including null */ diff --git a/basis/unix/statfs/linux/linux.factor b/basis/unix/statfs/linux/linux.factor index 20688680fb..c0db5ced1d 100644 --- a/basis/unix/statfs/linux/linux.factor +++ b/basis/unix/statfs/linux/linux.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.syntax unix.types ; +USING: alien.syntax unix.types unix.stat ; IN: unix.statfs.linux C-STRUCT: statfs64 diff --git a/basis/unix/statfs/openbsd/openbsd.factor b/basis/unix/statfs/openbsd/openbsd.factor index 456883514a..60590be4ea 100644 --- a/basis/unix/statfs/openbsd/openbsd.factor +++ b/basis/unix/statfs/openbsd/openbsd.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.syntax unix.types ; +USING: alien.syntax unix.types unix.stat ; IN: unix.statfs.openbsd CONSTANT: MFSNAMELEN 16 From e61acc5eee7f59d1b58746bc2b2624da773b3df7 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 16:34:31 -0600 Subject: [PATCH 20/29] username -> user-name in a couple of places --- basis/io/files/unix/unix-tests.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/io/files/unix/unix-tests.factor b/basis/io/files/unix/unix-tests.factor index 48a128d862..003cb40621 100644 --- a/basis/io/files/unix/unix-tests.factor +++ b/basis/io/files/unix/unix-tests.factor @@ -117,12 +117,12 @@ prepare-test-file [ ] [ test-file f f 2array set-file-times ] unit-test -[ ] [ test-file real-username set-file-user ] unit-test +[ ] [ test-file real-user-name set-file-user ] unit-test [ ] [ test-file real-user-id set-file-user ] unit-test [ ] [ test-file real-group-name set-file-group ] unit-test [ ] [ test-file real-group-id set-file-group ] unit-test -[ t ] [ test-file file-username real-username = ] unit-test +[ t ] [ test-file file-user-name real-user-name = ] unit-test [ t ] [ test-file file-group-name real-group-name = ] unit-test [ ] From 2714de3b85c570e252226f7344081deb0c76a78c Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 16:35:28 -0600 Subject: [PATCH 21/29] fix help-lint for values -- IN: scratchpad in an example --- basis/values/values-docs.factor | 1 + 1 file changed, 1 insertion(+) diff --git a/basis/values/values-docs.factor b/basis/values/values-docs.factor index 59bf77da3a..df38869fbf 100644 --- a/basis/values/values-docs.factor +++ b/basis/values/values-docs.factor @@ -19,6 +19,7 @@ HELP: VALUE: { $examples { $example "USING: values math prettyprint ;" + "IN: scratchpad" "VALUE: x" "2 2 + to: x" "x ." From 56808874f1a32dc20f10b90201386cc44d6bd9b5 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 15:28:10 -0600 Subject: [PATCH 22/29] fix group-name on netbsd --- basis/unix/groups/groups-docs.factor | 6 +++--- basis/unix/groups/groups-tests.factor | 2 ++ basis/unix/groups/groups.factor | 20 ++++++++++++-------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/basis/unix/groups/groups-docs.factor b/basis/unix/groups/groups-docs.factor index 18c2e2384a..07911bc96b 100644 --- a/basis/unix/groups/groups-docs.factor +++ b/basis/unix/groups/groups-docs.factor @@ -24,8 +24,8 @@ HELP: group-cache HELP: group-id { $values { "string" string } - { "id" integer } } -{ $description "Returns the group id given a group name." } ; + { "id/f" "an integer or f" } } +{ $description "Returns the group id given a group name. Returns " { $link f } " if the group does not exist." } ; HELP: group-name { $values @@ -36,7 +36,7 @@ HELP: group-name HELP: group-struct { $values { "obj" object } - { "group" "a group struct" } } + { "group/f" "a group struct or f" } } { $description "Returns an alien group struct to be turned into a group tuple by calling subsequent words." } ; HELP: real-group-id diff --git a/basis/unix/groups/groups-tests.factor b/basis/unix/groups/groups-tests.factor index 75f5d64b5f..2e989b32c0 100644 --- a/basis/unix/groups/groups-tests.factor +++ b/basis/unix/groups/groups-tests.factor @@ -27,3 +27,5 @@ IN: unix.groups.tests [ ] [ real-group-id group-name drop ] unit-test [ "888888888888888" ] [ 888888888888888 group-name ] unit-test +[ f ] +[ "please-oh-please-don't-have-a-group-named-this123lalala" group-struct ] unit-test diff --git a/basis/unix/groups/groups.factor b/basis/unix/groups/groups.factor index 164afa46fb..371bec9a70 100644 --- a/basis/unix/groups/groups.factor +++ b/basis/unix/groups/groups.factor @@ -13,7 +13,7 @@ TUPLE: group id name passwd members ; SYMBOL: group-cache -GENERIC: group-struct ( obj -- group ) +GENERIC: group-struct ( obj -- group/f ) tuck 4096 [ ] keep f ; -M: integer group-struct ( id -- group ) - (group-struct) getgrgid_r io-error ; +: check-group-struct ( group-struct ptr -- group-struct/f ) + *void* [ drop f ] unless ; -M: string group-struct ( string -- group ) - (group-struct) getgrnam_r 0 = [ (io-error) ] unless ; +M: integer group-struct ( id -- group/f ) + (group-struct) [ getgrgid_r io-error ] keep check-group-struct ; + +M: string group-struct ( string -- group/f ) + (group-struct) [ getgrnam_r io-error ] keep check-group-struct ; : group-struct>group ( group-struct -- group ) [ \ group new ] dip @@ -43,14 +46,15 @@ PRIVATE> : group-name ( id -- string ) dup group-cache get [ + "yo" print dupd at* [ name>> nip ] [ drop number>string ] if ] [ - group-struct group-gr_name + group-struct [ group-gr_name ] [ f ] if* ] if* [ nip ] [ number>string ] if* ; -: group-id ( string -- id ) - group-struct group-gr_gid ; +: group-id ( string -- id/f ) + group-struct [ group-gr_gid ] [ f ] if* ; Date: Fri, 9 Jan 2009 15:34:46 -0600 Subject: [PATCH 23/29] display available-space for file-systems. --- basis/tools/files/files.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basis/tools/files/files.factor b/basis/tools/files/files.factor index e6ca02d5f9..9066f3a219 100755 --- a/basis/tools/files/files.factor +++ b/basis/tools/files/files.factor @@ -65,7 +65,7 @@ percent-used percent-free ; [ [ unparse ] map ] bi prefix simple-table. ; : file-systems. ( -- ) - { device-name free-space used-space total-space percent-used mount-point } + { device-name available-space free-space used-space total-space percent-used mount-point } print-file-systems ; { From b08e1a02053aa82562db62b059362a27750c280b Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 17:44:21 -0600 Subject: [PATCH 24/29] remove debug line --- basis/unix/groups/groups.factor | 1 - 1 file changed, 1 deletion(-) diff --git a/basis/unix/groups/groups.factor b/basis/unix/groups/groups.factor index 371bec9a70..f4d91df245 100644 --- a/basis/unix/groups/groups.factor +++ b/basis/unix/groups/groups.factor @@ -46,7 +46,6 @@ PRIVATE> : group-name ( id -- string ) dup group-cache get [ - "yo" print dupd at* [ name>> nip ] [ drop number>string ] if ] [ group-struct [ group-gr_name ] [ f ] if* From ea4f8867c762e40680a50dae92f3215658714f9b Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 19:04:10 -0600 Subject: [PATCH 25/29] make monotonic-slice compile --- .../splitting/monotonic/monotonic-tests.factor | 2 ++ basis/splitting/monotonic/monotonic.factor | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/basis/splitting/monotonic/monotonic-tests.factor b/basis/splitting/monotonic/monotonic-tests.factor index 7bf9a38e8a..2b44f42394 100644 --- a/basis/splitting/monotonic/monotonic-tests.factor +++ b/basis/splitting/monotonic/monotonic-tests.factor @@ -15,6 +15,8 @@ USING: tools.test math arrays kernel sequences ; [ { { 1 } } ] [ { 1 } [ = ] slice monotonic-slice [ >array ] map ] unit-test +[ { 1 } [ = ] slice monotonic-slice ] must-infer + [ t ] [ { 1 1 1 2 2 3 3 4 } [ = ] slice monotonic-slice [ slice? ] all? ] unit-test diff --git a/basis/splitting/monotonic/monotonic.factor b/basis/splitting/monotonic/monotonic.factor index e39bba25ab..2e2ac74e30 100644 --- a/basis/splitting/monotonic/monotonic.factor +++ b/basis/splitting/monotonic/monotonic.factor @@ -24,13 +24,15 @@ PRIVATE> 1 over change-circular-start ] tri - [ @ not [ , ] [ drop ] if ] 3each - ] { } make - dup empty? [ over length 1- prefix ] when -1 prefix 2 clump - [ first2 [ 1+ ] bi@ rot roll boa ] with with map ; inline + [ + dupd '[ + [ length ] [ ] [ 1 over change-circular-start ] tri + [ @ not [ , ] [ drop ] if ] 3each + ] { } make + dup empty? [ over length 1- prefix ] when -1 prefix 2 clump + swap + ] dip + '[ first2 [ 1+ ] bi@ _ _ boa ] map ; inline PRIVATE> @@ -39,7 +41,7 @@ PRIVATE> { 0 [ 2drop ] } { 1 [ nip [ 0 1 rot ] dip boa 1array ] } [ drop (monotonic-slice) ] - } case ; + } case ; inline TUPLE: downward-slice < slice ; TUPLE: stable-slice < slice ; From f3c7a870650af971c755f53797430559ade4fbaf Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 19:12:49 -0600 Subject: [PATCH 26/29] clarify how sort works --- core/sorting/sorting-docs.factor | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/sorting/sorting-docs.factor b/core/sorting/sorting-docs.factor index 6ea1485425..290ca1470c 100644 --- a/core/sorting/sorting-docs.factor +++ b/core/sorting/sorting-docs.factor @@ -20,7 +20,8 @@ ABOUT: "sequences-sorting" HELP: sort { $values { "seq" "a sequence" } { "quot" { $quotation "( obj1 obj2 -- <=> )" } } { "sortedseq" "a new sorted sequence" } } -{ $description "Sorts the elements into a new array." } ; +{ $description "Sorts the elements into a new array using a stable sort." } +{ $notes "The algorithm used is the merge sort." } ; HELP: sort-keys { $values { "seq" "an alist" } { "sortedseq" "a new sorted sequence" } } From 5ae9c7fe2fbac48b8167db13bb853dc99afb1de0 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 9 Jan 2009 20:35:01 -0600 Subject: [PATCH 27/29] document 3each, 3map, 3map-as --- core/sequences/sequences-docs.factor | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/core/sequences/sequences-docs.factor b/core/sequences/sequences-docs.factor index 9f18fd4e66..eb621b3225 100644 --- a/core/sequences/sequences-docs.factor +++ b/core/sequences/sequences-docs.factor @@ -338,6 +338,10 @@ HELP: 2each { $values { "seq1" sequence } { "seq2" sequence } { "quot" { $quotation "( elt1 elt2 -- )" } } } { $description "Applies the quotation to pairs of elements from " { $snippet "seq1" } " and " { $snippet "seq2" } "." } ; +HELP: 3each +{ $values { "seq1" sequence } { "seq2" sequence } { "seq3" sequence } { "quot" { $quotation "( elt1 elt2 elt3 -- )" } } } +{ $description "Applies the quotation to triples of elements from " { $snippet "seq1" } ", " { $snippet "seq2" } " and " { $snippet "seq3" } "." } ; + HELP: 2reduce { $values { "seq1" sequence } { "seq2" sequence } @@ -350,10 +354,18 @@ HELP: 2map { $values { "seq1" sequence } { "seq2" sequence } { "quot" { $quotation "( elt1 elt2 -- new )" } } { "newseq" "a new sequence" } } { $description "Applies the quotation to each pair of elements in turn, yielding new elements which are collected into a new sequence having the same class as " { $snippet "seq1" } "." } ; +HELP: 3map +{ $values { "seq1" sequence } { "seq2" sequence } { "seq3" sequence } { "quot" { $quotation "( elt1 elt2 elt3 -- new )" } } { "newseq" "a new sequence" } } +{ $description "Applies the quotation to each triple of elements in turn, yielding new elements which are collected into a new sequence having the same class as " { $snippet "seq1" } "." } ; + HELP: 2map-as { $values { "seq1" sequence } { "seq2" sequence } { "quot" { $quotation "( elt1 elt2 -- new )" } } { "exemplar" sequence } { "newseq" "a new sequence" } } { $description "Applies the quotation to each pair of elements in turn, yielding new elements which are collected into a new sequence having the same class as " { $snippet "exemplar" } "." } ; +HELP: 3map-as +{ $values { "seq1" sequence } { "seq2" sequence } { "seq3" sequence } { "quot" { $quotation "( elt1 elt2 elt3 -- new )" } } { "exemplar" sequence } { "newseq" "a new sequence" } } +{ $description "Applies the quotation to each triple of elements in turn, yielding new elements which are collected into a new sequence having the same class as " { $snippet "exemplar" } "." } ; + HELP: 2all? { $values { "seq1" sequence } { "seq2" sequence } { "quot" { $quotation "( elt1 elt2 -- ? )" } } { "?" "a boolean" } } { $description "Tests the predicate pairwise against elements of " { $snippet "seq1" } " and " { $snippet "seq2" } "." } ; @@ -1422,16 +1434,23 @@ ARTICLE: "sequences-combinators" "Sequence combinators" { $subsection all? } "Testing how elements are related:" { $subsection monotonic? } -{ $subsection "sequence-2combinators" } ; +{ $subsection "sequence-2combinators" } +{ $subsection "sequence-3combinators" } ; ARTICLE: "sequence-2combinators" "Pair-wise sequence combinators" -"There is a set of combinators which traverse two sequences pairwise. If one sequence is shorter than the other, than only the prefix having the length of the minimum of the two is examined." +"There is a set of combinators which traverse two sequences pairwise. If one sequence is shorter than the other, then only the prefix having the length of the minimum of the two is examined." { $subsection 2each } { $subsection 2reduce } { $subsection 2map } { $subsection 2map-as } { $subsection 2all? } ; +ARTICLE: "sequence-3combinators" "Triple-wise sequence combinators" +"There is a set of combinators which traverse three sequences triple-wise. If one sequence is shorter than the others, then only the prefix having the length of the minimum of the three is examined." +{ $subsection 3each } +{ $subsection 3map } +{ $subsection 3map-as } ; + ARTICLE: "sequences-tests" "Testing sequences" "Testing for an empty sequence:" { $subsection empty? } From a977691d82e06033d04d5c7e4afe3f95572dde20 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Fri, 9 Jan 2009 21:35:49 -0600 Subject: [PATCH 28/29] Add iota virtual sequence which will eventually replace integers-as-sequences --- core/sequences/sequences-docs.factor | 11 +++++++++++ core/sequences/sequences.factor | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/core/sequences/sequences-docs.factor b/core/sequences/sequences-docs.factor index 9f18fd4e66..f3a12d9209 100644 --- a/core/sequences/sequences-docs.factor +++ b/core/sequences/sequences-docs.factor @@ -1262,6 +1262,17 @@ HELP: shorten "V{ 1 2 3 }" } } ; +HELP: iota +{ $values { "n" integer } { "iota" iota } } +{ $description "Creates an immutable virtual sequence containing the integers from 0 to " { $snippet "n-1" } "." } +{ $examples + { $example + "USING: math.parser sequences ;" + "3 iota [ sq ] map ." + "{ \"0\" \"1\" \"2\" }" + } +} ; + ARTICLE: "sequences-unsafe" "Unsafe sequence operations" "The " { $link nth-unsafe } " and " { $link set-nth-unsafe } " sequence protocol bypasses bounds checks for increased performance." $nl diff --git a/core/sequences/sequences.factor b/core/sequences/sequences.factor index 91c9d52404..2a499845bd 100644 --- a/core/sequences/sequences.factor +++ b/core/sequences/sequences.factor @@ -101,6 +101,16 @@ M: integer nth-unsafe drop ; INSTANCE: integer immutable-sequence +! In the future, this will replace integer sequences +TUPLE: iota { n read-only } ; + +: iota ( n -- iota ) \ iota boa ; inline + +M: iota length n>> ; +M: iota nth-unsafe drop ; + +INSTANCE: iota immutable-sequence + : first-unsafe ( seq -- first ) 0 swap nth-unsafe ; inline From f4cffe8a1b2e6a002c5895d498c6183b40437b1d Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 10 Jan 2009 13:05:25 -0600 Subject: [PATCH 29/29] move iota to sequences, fix example, add unit tests, make iota's slot integers only --- core/sequences/sequences-docs.factor | 4 ++-- core/sequences/sequences-tests.factor | 6 +++++- core/sequences/sequences.factor | 6 +++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/core/sequences/sequences-docs.factor b/core/sequences/sequences-docs.factor index 957b33198e..651c8e8a14 100644 --- a/core/sequences/sequences-docs.factor +++ b/core/sequences/sequences-docs.factor @@ -1279,9 +1279,9 @@ HELP: iota { $description "Creates an immutable virtual sequence containing the integers from 0 to " { $snippet "n-1" } "." } { $examples { $example - "USING: math.parser sequences ;" + "USING: math sequences prettyprint ;" "3 iota [ sq ] map ." - "{ \"0\" \"1\" \"2\" }" + "{ 0 1 4 }" } } ; diff --git a/core/sequences/sequences-tests.factor b/core/sequences/sequences-tests.factor index 80352faf72..9adc6bc602 100644 --- a/core/sequences/sequences-tests.factor +++ b/core/sequences/sequences-tests.factor @@ -276,4 +276,8 @@ M: bogus-hashcode hashcode* 2drop 0 >bignum ; { 3 0 } [ [ 3drop ] 3each ] must-infer-as -[ V{ 0 3 } ] [ "A" { "A" "B" "C" "A" "D" } indices ] unit-test \ No newline at end of file +[ V{ 0 3 } ] [ "A" { "A" "B" "C" "A" "D" } indices ] unit-test + +[ "asdf" iota ] must-fail +[ T{ iota { n 10 } } ] [ 10 iota ] unit-test +[ 0 ] [ 10 iota first ] unit-test diff --git a/core/sequences/sequences.factor b/core/sequences/sequences.factor index 2a499845bd..5a92dcaf2d 100644 --- a/core/sequences/sequences.factor +++ b/core/sequences/sequences.factor @@ -101,11 +101,15 @@ M: integer nth-unsafe drop ; INSTANCE: integer immutable-sequence +PRIVATE> + ! In the future, this will replace integer sequences -TUPLE: iota { n read-only } ; +TUPLE: iota { n integer read-only } ; : iota ( n -- iota ) \ iota boa ; inline +> ; M: iota nth-unsafe drop ;