From 18a36c334a5c37895b6111dd1a7d023b84d3d6da Mon Sep 17 00:00:00 2001 From: John Benediktsson Date: Sun, 18 May 2014 20:47:36 -0700 Subject: [PATCH] unicode: some performance improvements to category checking. --- basis/unicode/categories/categories.factor | 2 +- basis/unicode/categories/syntax/syntax.factor | 4 +- basis/unicode/data/data.factor | 39 ++++++++++--------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/basis/unicode/categories/categories.factor b/basis/unicode/categories/categories.factor index 4ca5c9a90e..1e6a7b3a99 100644 --- a/basis/unicode/categories/categories.factor +++ b/basis/unicode/categories/categories.factor @@ -11,6 +11,6 @@ CATEGORY: digit Nd Nl No ; CATEGORY-NOT: printable Cc Cf Cs Co Cn ; CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ; CATEGORY: control Cc ; -CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ; +CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ; CATEGORY-NOT: character Cn ; CATEGORY: math Sm | "Other_Math" property? ; diff --git a/basis/unicode/categories/syntax/syntax.factor b/basis/unicode/categories/syntax/syntax.factor index 4c763bd2bc..acb3fbd9bb 100644 --- a/basis/unicode/categories/syntax/syntax.factor +++ b/basis/unicode/categories/syntax/syntax.factor @@ -11,7 +11,7 @@ SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs fixnum-strict dup category# _ member? [ drop t ] _ if ] ; : integer-predicate-class ( word predicate -- ) integer swap define-predicate-class ; @@ -24,7 +24,7 @@ SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs : parse-category ( -- word tokens quot ) scan-new-class \ ; parse-until { | } split1 - [ [ name>> categories-map at ] map ] + [ [ name>> categories-map at ] B{ } map-as ] [ [ [ ] like ] [ [ drop f ] ] if* ] bi* ; PRIVATE> diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor index 94c4e12075..fe60acc840 100644 --- a/basis/unicode/data/data.factor +++ b/basis/unicode/data/data.factor @@ -1,11 +1,10 @@ ! Copyright (C) 2008, 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: combinators.short-circuit assocs math kernel sequences -io.files hashtables quotations splitting grouping arrays io -math.parser math.order byte-arrays namespaces math.bitwise -compiler.units parser io.encodings.ascii interval-maps -ascii sets combinators locals math.ranges sorting make -strings.parser io.encodings.utf8 memoize simple-flat-file ; +USING: arrays ascii assocs byte-arrays combinators +combinators.short-circuit grouping hashtables interval-maps +io.encodings.utf8 io.files kernel locals make math math.bitwise +math.order math.parser math.ranges memoize namespaces sequences +sets simple-flat-file sorting splitting strings.parser ; IN: unicode.data char ( name -- char ) name-map at ; inline : char>name ( char -- name ) name-map value-at ; inline -: property? ( char property -- ? ) properties at interval-key? ; inline +: property ( property -- interval-map ) properties at ; foldable +: property? ( char property -- ? ) property interval-key? ; inline : ch>lower ( ch -- lower ) simple-lower ?at drop ; inline : ch>upper ( ch -- upper ) simple-upper ?at drop ; inline : ch>title ( ch -- title ) simple-title ?at drop ; inline : special-case ( ch -- casing-tuple ) special-casing at ; inline ! For non-existent characters, use Cn -CONSTANT: categories - { "Cn" - "Lu" "Ll" "Lt" "Lm" "Lo" - "Mn" "Mc" "Me" - "Nd" "Nl" "No" - "Pc" "Pd" "Ps" "Pe" "Pi" "Pf" "Po" - "Sm" "Sc" "Sk" "So" - "Zs" "Zl" "Zp" - "Cc" "Cf" "Cs" "Co" } +CONSTANT: categories { + "Cn" + "Lu" "Ll" "Lt" "Lm" "Lo" + "Mn" "Mc" "Me" + "Nd" "Nl" "No" + "Pc" "Pd" "Ps" "Pe" "Pi" "Pf" "Po" + "Sm" "Sc" "Sk" "So" + "Zs" "Zl" "Zp" + "Cc" "Cf" "Cs" "Co" +} ! that this gives Cf or Mn ! Cf = 26; Mn = 5; Cn = 29 ! Use a compressed array instead? - dup category-map get-global ?nth [ ] [ + dup category-map ?nth [ ] [ dup 0xE0001 0xE007F between? [ drop 26 ] [ 0xE0100 0xE01EF between? 5 29 ? @@ -203,7 +204,7 @@ load-data { [ process-combining class-map swap assoc-union! drop ] [ process-canonical canonical-map swap assoc-union! drop combine-map swap assoc-union! drop ] [ process-compatibility compatibility-map swap assoc-union! drop ] - [ process-category category-map set-global ] + [ process-category category-map push-all ] } cleave combine-map keys [ 2ch> nip ] map