Fixing Unicode collation bug

db4
Daniel Ehrenberg 2009-01-11 19:41:48 -06:00
parent fa52490fe9
commit 1536ac8b49
2 changed files with 9 additions and 14 deletions

View File

@ -4,8 +4,7 @@ USING: combinators.short-circuit unicode.categories kernel math
combinators splitting sequences math.parser io.files io assocs combinators splitting sequences math.parser io.files io assocs
arrays namespaces make math.ranges unicode.normalize.private values arrays namespaces make math.ranges unicode.normalize.private values
io.encodings.ascii unicode.syntax unicode.data compiler.units fry io.encodings.ascii unicode.syntax unicode.data compiler.units fry
alien.syntax sets accessors interval-maps memoize locals words alien.syntax sets accessors interval-maps memoize locals words ;
strings hints ;
IN: unicode.breaks IN: unicode.breaks
<PRIVATE <PRIVATE
@ -212,25 +211,21 @@ to: word-table
[ dupd walk-up wNumeric property-not= ] } [ dupd walk-up wNumeric property-not= ] }
{ check-number-before { check-number-before
[ dupd walk-down wNumeric property-not= ] } [ dupd walk-down wNumeric property-not= ] }
} case ; inline } case ;
:: word-break-next ( old-class new-char i str -- next-class ? ) :: word-break-next ( old-class new-char i str -- next-class ? )
new-char dup format/extended? new-char dup format/extended?
[ drop old-class dup { 1 2 3 } member? ] [ [ drop old-class dup { 1 2 3 } member? ] [
word-break-prop old-class over word-table-nth word-break-prop old-class over word-table-nth
i str word-break? i str word-break?
] if ; inline ] if ;
PRIVATE> PRIVATE>
: first-word ( str -- i ) : first-word ( str -- i )
[ unclip-slice word-break-prop over <enum> ] keep [ unclip-slice word-break-prop over <enum> ] keep
'[ swap _ word-break-next ] assoc-find 2drop '[ swap _ word-break-next ] assoc-find 2drop
nip swap length or 1+ ; inline nip swap length or 1+ ;
HINTS: first-word string ;
: >words ( str -- words ) : >words ( str -- words )
[ first-word ] >pieces ; [ first-word ] >pieces ;
HINTS: >words string ;

View File

@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser
ascii io assocs strings math namespaces make sorting combinators ascii io assocs strings math namespaces make sorting combinators
math.order arrays unicode.normalize unicode.data locals math.order arrays unicode.normalize unicode.data locals
unicode.syntax macros sequences.deep words unicode.breaks unicode.syntax macros sequences.deep words unicode.breaks
quotations ; quotations combinators.short-circuit ;
IN: unicode.collation IN: unicode.collation
<PRIVATE <PRIVATE
@ -71,12 +71,12 @@ ducet insert-helpers
building get empty? [ 0 ] [ building get peek peek ] if ; building get empty? [ 0 ] [ building get peek peek ] if ;
: blocked? ( char -- ? ) : blocked? ( char -- ? )
combining-class [ combining-class dup { 0 f } member?
last combining-class = [ drop last non-starter? ]
] [ last combining-class ] if* ; [ last combining-class = ] if ;
: possible-bases ( -- slice-of-building ) : possible-bases ( -- slice-of-building )
building get dup [ first combining-class not ] find-last building get dup [ first non-starter? not ] find-last
drop [ 0 ] unless* tail-slice ; drop [ 0 ] unless* tail-slice ;
:: ?combine ( char slice i -- ? ) :: ?combine ( char slice i -- ? )