Unicode cleanup and optimization

db4
Daniel Ehrenberg 2009-01-08 22:23:39 -06:00
parent 43c18cd7fa
commit 58df6dad6f
2 changed files with 18 additions and 17 deletions

View File

@ -192,22 +192,22 @@ to: word-table
: word-table-nth ( class1 class2 -- ? ) : word-table-nth ( class1 class2 -- ? )
word-table nth nth ; word-table nth nth ;
: property-not= ( i str property -- ? ) :: property-not= ( i str property -- ? )
pick [ i [
[ ?nth ] dip swap i str ?nth [ word-break-prop property = not ]
[ word-break-prop = not ] [ drop f ] if* [ f ] if*
] [ 3drop t ] if ; ] [ t ] if ;
: format/extended? ( ch -- ? ) : format/extended? ( ch -- ? )
word-break-prop { 4 5 } member? ; word-break-prop { 4 5 } member? ;
:: walk-up ( str i -- j ) :: walk-up ( str i -- j )
i 1 + str [ format/extended? not ] find-from drop i 1 + str [ format/extended? not ] find-from drop
1+ str [ format/extended? not ] find-from drop ; ! possible bounds error? [ 1+ str [ format/extended? not ] find-from drop ] [ f ] if* ;
:: walk-down ( str i -- j ) :: walk-down ( str i -- j )
i str [ format/extended? not ] find-last-from drop i str [ format/extended? not ] find-last-from drop
1- str [ format/extended? not ] find-last-from drop ; ! possible bounds error? [ 1- str [ format/extended? not ] find-last-from drop ] [ f ] if* ;
:: word-break? ( table-entry i str -- ? ) :: word-break? ( table-entry i str -- ? )
table-entry { table-entry {
@ -224,9 +224,11 @@ to: word-table
} case ; } case ;
:: word-break-next ( old-class new-char i str -- next-class ? ) :: word-break-next ( old-class new-char i str -- next-class ? )
new-char word-break-prop dup { 4 5 } member? new-char dup format/extended?
[ drop old-class dup { 1 2 3 } member? ] [ drop old-class dup { 1 2 3 } member? ] [
[ old-class over word-table-nth i str word-break? ] if ; word-break-prop old-class over word-table-nth
i str word-break?
] if ;
PRIVATE> PRIVATE>

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: sequences namespaces make unicode.data kernel math arrays USING: sequences namespaces make unicode.data kernel math arrays
locals sorting.insertion accessors assocs math.order combinators locals sorting.insertion accessors assocs math.order combinators
unicode.syntax ; unicode.syntax strings sbufs ;
IN: unicode.normalize IN: unicode.normalize
<PRIVATE <PRIVATE
@ -66,13 +66,12 @@ CONSTANT: final-count 28
over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ; over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ;
:: decompose ( string quot -- decomposed ) :: decompose ( string quot -- decomposed )
[ [let | out [ string length <sbuf> ] |
string [ string [
dup hangul? [ hangul>jamo % ] dup hangul? [ hangul>jamo out push-all ]
[ dup quot call [ % ] [ , ] ?if ] if [ dup quot call [ out push-all ] [ out push ] ?if ] if
] each ] each out >string
] "" make ] dup reorder ;
dup reorder ;
: with-string ( str quot -- str ) : with-string ( str quot -- str )
over aux>> [ call ] [ drop ] if ; inline over aux>> [ call ] [ drop ] if ; inline