Fixing Unicode collation bug

db4
Daniel Ehrenberg 2009-01-11 19:41:48 -06:00
parent fa52490fe9
commit 1536ac8b49
2 changed files with 9 additions and 14 deletions

View File

@ -4,8 +4,7 @@ USING: combinators.short-circuit unicode.categories kernel math
combinators splitting sequences math.parser io.files io assocs
arrays namespaces make math.ranges unicode.normalize.private values
io.encodings.ascii unicode.syntax unicode.data compiler.units fry
alien.syntax sets accessors interval-maps memoize locals words
strings hints ;
alien.syntax sets accessors interval-maps memoize locals words ;
IN: unicode.breaks
<PRIVATE
@ -212,25 +211,21 @@ to: word-table
[ dupd walk-up wNumeric property-not= ] }
{ check-number-before
[ dupd walk-down wNumeric property-not= ] }
} case ; inline
} case ;
:: word-break-next ( old-class new-char i str -- next-class ? )
new-char dup format/extended?
[ drop old-class dup { 1 2 3 } member? ] [
word-break-prop old-class over word-table-nth
i str word-break?
] if ; inline
] if ;
PRIVATE>
: first-word ( str -- i )
[ unclip-slice word-break-prop over <enum> ] keep
'[ swap _ word-break-next ] assoc-find 2drop
nip swap length or 1+ ; inline
HINTS: first-word string ;
nip swap length or 1+ ;
: >words ( str -- words )
[ first-word ] >pieces ;
HINTS: >words string ;

View File

@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser
ascii io assocs strings math namespaces make sorting combinators
math.order arrays unicode.normalize unicode.data locals
unicode.syntax macros sequences.deep words unicode.breaks
quotations ;
quotations combinators.short-circuit ;
IN: unicode.collation
<PRIVATE
@ -71,12 +71,12 @@ ducet insert-helpers
building get empty? [ 0 ] [ building get peek peek ] if ;
: blocked? ( char -- ? )
combining-class [
last combining-class =
] [ last combining-class ] if* ;
combining-class dup { 0 f } member?
[ drop last non-starter? ]
[ last combining-class = ] if ;
: possible-bases ( -- slice-of-building )
building get dup [ first combining-class not ] find-last
building get dup [ first non-starter? not ] find-last
drop [ 0 ] unless* tail-slice ;
:: ?combine ( char slice i -- ? )