From 58df6dad6f44ce45d5182a594d91e60f475ca42e Mon Sep 17 00:00:00 2001
From: Daniel Ehrenberg <littledan@Macintosh-103.local>
Date: Thu, 8 Jan 2009 22:23:39 -0600
Subject: [PATCH] Unicode cleanup and optimization

---
 basis/unicode/breaks/breaks.factor       | 22 ++++++++++++----------
 basis/unicode/normalize/normalize.factor | 13 ++++++-------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor
index 1d2f821750..df3b2f03e8 100644
--- a/basis/unicode/breaks/breaks.factor
+++ b/basis/unicode/breaks/breaks.factor
@@ -192,22 +192,22 @@ to: word-table
 : word-table-nth ( class1 class2 -- ? )
     word-table nth nth ;
 
-: property-not= ( i str property -- ? )
-    pick [
-        [ ?nth ] dip swap
-        [ word-break-prop = not ] [ drop f ] if*
-    ] [ 3drop t ] if ;
+:: property-not= ( i str property -- ? )
+    i [
+        i str ?nth [ word-break-prop property = not ]
+        [ f ] if*
+    ] [ t ] if ;
 
 : format/extended? ( ch -- ? )
     word-break-prop { 4 5 } member? ;
 
 :: walk-up ( str i -- j )
     i 1 + str [ format/extended? not ] find-from drop
-    1+ str [ format/extended? not ] find-from drop ; ! possible bounds error?
+    [ 1+ str [ format/extended? not ] find-from drop ] [ f ] if* ;
 
 :: walk-down ( str i -- j )
     i str [ format/extended? not ] find-last-from drop
-    1- str [ format/extended? not ] find-last-from drop ; ! possible bounds error?
+    [ 1- str [ format/extended? not ] find-last-from drop ] [ f ] if* ;
 
 :: word-break? ( table-entry i str -- ? )
     table-entry {
@@ -224,9 +224,11 @@ to: word-table
     } case ;
 
 :: word-break-next ( old-class new-char i str -- next-class ? )
-    new-char word-break-prop dup { 4 5 } member?
-    [ drop old-class dup { 1 2 3 } member? ]
-    [ old-class over word-table-nth i str word-break? ] if ;
+    new-char dup format/extended?
+    [ drop old-class dup { 1 2 3 } member? ] [
+        word-break-prop old-class over word-table-nth
+        i str word-break?
+    ] if ;
 
 PRIVATE>
 
diff --git a/basis/unicode/normalize/normalize.factor b/basis/unicode/normalize/normalize.factor
index 2fbe2e1843..7a41a768cd 100644
--- a/basis/unicode/normalize/normalize.factor
+++ b/basis/unicode/normalize/normalize.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: sequences namespaces make unicode.data kernel math arrays
 locals sorting.insertion accessors assocs math.order combinators
-unicode.syntax ;
+unicode.syntax strings sbufs ;
 IN: unicode.normalize
 
 <PRIVATE
@@ -66,13 +66,12 @@ CONSTANT: final-count 28
     over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ;
 
 :: decompose ( string quot -- decomposed )
-    [
+    [let | out [ string length <sbuf> ] |
         string [
-            dup hangul? [ hangul>jamo % ]
-            [ dup quot call [ % ] [ , ] ?if ] if
-        ] each
-    ] "" make
-    dup reorder ;
+            dup hangul? [ hangul>jamo out push-all ]
+            [ dup quot call [ out push-all ] [ out push ] ?if ] if
+        ] each out >string
+    ] dup reorder ;
 
 : with-string ( str quot -- str )
     over aux>> [ call ] [ drop ] if ; inline