From 1536ac8b4976d05efae48ea865a0c433da40a098 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg <littledan@Macintosh-103.local> Date: Sun, 11 Jan 2009 19:41:48 -0600 Subject: [PATCH] Fixing Unicode collation bug --- basis/unicode/breaks/breaks.factor | 13 ++++--------- basis/unicode/collation/collation.factor | 10 +++++----- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index 10bc235805..0524825d43 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -4,8 +4,7 @@ USING: combinators.short-circuit unicode.categories kernel math combinators splitting sequences math.parser io.files io assocs arrays namespaces make math.ranges unicode.normalize.private values io.encodings.ascii unicode.syntax unicode.data compiler.units fry -alien.syntax sets accessors interval-maps memoize locals words -strings hints ; +alien.syntax sets accessors interval-maps memoize locals words ; IN: unicode.breaks <PRIVATE @@ -212,25 +211,21 @@ to: word-table [ dupd walk-up wNumeric property-not= ] } { check-number-before [ dupd walk-down wNumeric property-not= ] } - } case ; inline + } case ; :: word-break-next ( old-class new-char i str -- next-class ? ) new-char dup format/extended? [ drop old-class dup { 1 2 3 } member? ] [ word-break-prop old-class over word-table-nth i str word-break? - ] if ; inline + ] if ; PRIVATE> : first-word ( str -- i ) [ unclip-slice word-break-prop over <enum> ] keep '[ swap _ word-break-next ] assoc-find 2drop - nip swap length or 1+ ; inline - -HINTS: first-word string ; + nip swap length or 1+ ; : >words ( str -- words ) [ first-word ] >pieces ; - -HINTS: >words string ; diff --git a/basis/unicode/collation/collation.factor b/basis/unicode/collation/collation.factor index 90b280ee09..5718ae12a7 100644 --- a/basis/unicode/collation/collation.factor +++ b/basis/unicode/collation/collation.factor @@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser ascii io assocs strings math namespaces make sorting combinators math.order arrays unicode.normalize unicode.data locals unicode.syntax macros sequences.deep words unicode.breaks -quotations ; +quotations combinators.short-circuit ; IN: unicode.collation <PRIVATE @@ -71,12 +71,12 @@ ducet insert-helpers building get empty? [ 0 ] [ building get peek peek ] if ; : blocked? ( char -- ? ) - combining-class [ - last combining-class = - ] [ last combining-class ] if* ; + combining-class dup { 0 f } member? + [ drop last non-starter? ] + [ last combining-class = ] if ; : possible-bases ( -- slice-of-building ) - building get dup [ first combining-class not ] find-last + building get dup [ first non-starter? not ] find-last drop [ 0 ] unless* tail-slice ; :: ?combine ( char slice i -- ? )