Unicode cleanup and optimization

2009-01-08 22:23:39 -06:00 · 2009-01-08 22:23:39 -06:00 · 58df6dad6f
parent 43c18cd7fa
commit 58df6dad6f
2 changed files with 18 additions and 17 deletions
--- a/basis/unicode/breaks/breaks.factor
+++ b/basis/unicode/breaks/breaks.factor
@ -192,22 +192,22 @@ to: word-table
 : word-table-nth ( class1 class2 -- ? )
    word-table nth nth ;
-: property-not= ( i str property -- ? )
+:: property-not= ( i str property -- ? )
-    pick [
+    i [
-        [ ?nth ] dip swap
+        i str ?nth [ word-break-prop property = not ]
-        [ word-break-prop = not ] [ drop f ] if*
+        [ f ] if*
-    ] [ 3drop t ] if ;
+    ] [ t ] if ;
 : format/extended? ( ch -- ? )
    word-break-prop { 4 5 } member? ;
 :: walk-up ( str i -- j )
    i 1 + str [ format/extended? not ] find-from drop
-    1+ str [ format/extended? not ] find-from drop ; ! possible bounds error?
+    [ 1+ str [ format/extended? not ] find-from drop ] [ f ] if* ;
 :: walk-down ( str i -- j )
    i str [ format/extended? not ] find-last-from drop
-    1- str [ format/extended? not ] find-last-from drop ; ! possible bounds error?
+    [ 1- str [ format/extended? not ] find-last-from drop ] [ f ] if* ;
 :: word-break? ( table-entry i str -- ? )
    table-entry {
@ -224,9 +224,11 @@ to: word-table
    } case ;
 :: word-break-next ( old-class new-char i str -- next-class ? )
-    new-char word-break-prop dup { 4 5 } member?
+    new-char dup format/extended?
-    [ drop old-class dup { 1 2 3 } member? ]
+    [ drop old-class dup { 1 2 3 } member? ] [
-    [ old-class over word-table-nth i str word-break? ] if ;
+        word-break-prop old-class over word-table-nth
        i str word-break?
    ] if ;
 PRIVATE>
--- a/basis/unicode/normalize/normalize.factor
+++ b/basis/unicode/normalize/normalize.factor
@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: sequences namespaces make unicode.data kernel math arrays
 locals sorting.insertion accessors assocs math.order combinators
-unicode.syntax ;
+unicode.syntax strings sbufs ;
 IN: unicode.normalize
 <PRIVATE
@ -66,13 +66,12 @@ CONSTANT: final-count 28
    over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ;
 :: decompose ( string quot -- decomposed )
-    [
+    [let | out [ string length <sbuf> ] |
        string [
-            dup hangul? [ hangul>jamo % ]
+            dup hangul? [ hangul>jamo out push-all ]
-            [ dup quot call [ % ] [ , ] ?if ] if
+            [ dup quot call [ out push-all ] [ out push ] ?if ] if
-        ] each
+        ] each out >string
-    ] "" make
+    ] dup reorder ;
    dup reorder ;
 : with-string ( str quot -- str )
    over aux>> [ call ] [ drop ] if ; inline