Unicode cleanup and optimization
parent
43c18cd7fa
commit
58df6dad6f
|
@ -192,22 +192,22 @@ to: word-table
|
||||||
: word-table-nth ( class1 class2 -- ? )
|
: word-table-nth ( class1 class2 -- ? )
|
||||||
word-table nth nth ;
|
word-table nth nth ;
|
||||||
|
|
||||||
: property-not= ( i str property -- ? )
|
:: property-not= ( i str property -- ? )
|
||||||
pick [
|
i [
|
||||||
[ ?nth ] dip swap
|
i str ?nth [ word-break-prop property = not ]
|
||||||
[ word-break-prop = not ] [ drop f ] if*
|
[ f ] if*
|
||||||
] [ 3drop t ] if ;
|
] [ t ] if ;
|
||||||
|
|
||||||
: format/extended? ( ch -- ? )
|
: format/extended? ( ch -- ? )
|
||||||
word-break-prop { 4 5 } member? ;
|
word-break-prop { 4 5 } member? ;
|
||||||
|
|
||||||
:: walk-up ( str i -- j )
|
:: walk-up ( str i -- j )
|
||||||
i 1 + str [ format/extended? not ] find-from drop
|
i 1 + str [ format/extended? not ] find-from drop
|
||||||
1+ str [ format/extended? not ] find-from drop ; ! possible bounds error?
|
[ 1+ str [ format/extended? not ] find-from drop ] [ f ] if* ;
|
||||||
|
|
||||||
:: walk-down ( str i -- j )
|
:: walk-down ( str i -- j )
|
||||||
i str [ format/extended? not ] find-last-from drop
|
i str [ format/extended? not ] find-last-from drop
|
||||||
1- str [ format/extended? not ] find-last-from drop ; ! possible bounds error?
|
[ 1- str [ format/extended? not ] find-last-from drop ] [ f ] if* ;
|
||||||
|
|
||||||
:: word-break? ( table-entry i str -- ? )
|
:: word-break? ( table-entry i str -- ? )
|
||||||
table-entry {
|
table-entry {
|
||||||
|
@ -224,9 +224,11 @@ to: word-table
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
:: word-break-next ( old-class new-char i str -- next-class ? )
|
:: word-break-next ( old-class new-char i str -- next-class ? )
|
||||||
new-char word-break-prop dup { 4 5 } member?
|
new-char dup format/extended?
|
||||||
[ drop old-class dup { 1 2 3 } member? ]
|
[ drop old-class dup { 1 2 3 } member? ] [
|
||||||
[ old-class over word-table-nth i str word-break? ] if ;
|
word-break-prop old-class over word-table-nth
|
||||||
|
i str word-break?
|
||||||
|
] if ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: sequences namespaces make unicode.data kernel math arrays
|
USING: sequences namespaces make unicode.data kernel math arrays
|
||||||
locals sorting.insertion accessors assocs math.order combinators
|
locals sorting.insertion accessors assocs math.order combinators
|
||||||
unicode.syntax ;
|
unicode.syntax strings sbufs ;
|
||||||
IN: unicode.normalize
|
IN: unicode.normalize
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
@ -66,13 +66,12 @@ CONSTANT: final-count 28
|
||||||
over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ;
|
over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ;
|
||||||
|
|
||||||
:: decompose ( string quot -- decomposed )
|
:: decompose ( string quot -- decomposed )
|
||||||
[
|
[let | out [ string length <sbuf> ] |
|
||||||
string [
|
string [
|
||||||
dup hangul? [ hangul>jamo % ]
|
dup hangul? [ hangul>jamo out push-all ]
|
||||||
[ dup quot call [ % ] [ , ] ?if ] if
|
[ dup quot call [ out push-all ] [ out push ] ?if ] if
|
||||||
] each
|
] each out >string
|
||||||
] "" make
|
] dup reorder ;
|
||||||
dup reorder ;
|
|
||||||
|
|
||||||
: with-string ( str quot -- str )
|
: with-string ( str quot -- str )
|
||||||
over aux>> [ call ] [ drop ] if ; inline
|
over aux>> [ call ] [ drop ] if ; inline
|
||||||
|
|
Loading…
Reference in New Issue