! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: combinators.short-circuit unicode.categories kernel math combinators splitting sequences math.parser io.files io assocs arrays namespaces make math.ranges unicode.normalize.private values io.encodings.ascii unicode.syntax unicode.data compiler.units alien.syntax sets accessors interval-maps memoize locals words ; IN: unicode.breaks ] curry replicate ; SYMBOL: table : finish-table ( -- table ) table get [ [ 1 = ] map ] map ; : set-table ( class1 class2 val -- ) -rot table get nth [ swap or ] change-nth ; : connect ( class1 class2 -- ) 1 set-table ; : disconnect ( class1 class2 -- ) 0 set-table ; : check-before ( class classes value -- ) [ set-table ] curry with each ; : check-after ( classes class value -- ) [ set-table ] 2curry each ; : connect-before ( class classes -- ) 1 check-before ; : connect-after ( classes class -- ) 1 check-after ; : break-around ( classes1 classes2 -- ) [ [ 2dup disconnect swap disconnect ] with each ] curry each ; : make-grapheme-table ( -- ) CR LF connect Control CR LF 3array graphemes break-around L L V LV LVT 4array connect-before V V T 2array connect-before LV V T 2array connect-before T T connect LVT T connect graphemes Extend connect-after graphemes SpacingMark connect-after Prepend graphemes connect-before ; VALUE: grapheme-table : grapheme-break? ( class1 class2 -- ? ) grapheme-table nth nth not ; : chars ( i str n -- str[i] str[i+n] ) swap [ dupd + ] dip [ ?nth ] curry bi@ ; : find-index ( seq quot -- i ) find drop ; inline : find-last-index ( seq quot -- i ) find-last drop ; inline PRIVATE> : first-grapheme ( str -- i ) unclip-slice grapheme-class over [ grapheme-class tuck grapheme-break? ] find-index nip swap length or 1+ ; pieces) ( str quot -- ) str [ dup quot call cut-slice swap , quot (>pieces) ] unless-empty ; inline recursive : >pieces ( str quot -- graphemes ) [ (>pieces) ] { } make ; inline PRIVATE> : >graphemes ( str -- graphemes ) [ first-grapheme ] >pieces ; : string-reverse ( str -- rts ) >graphemes reverse concat ; : last-grapheme ( str -- i ) unclip-last-slice grapheme-class swap [ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ; :: first-word ( str -- i ) str unclip-slice word-break-prop over [ swap str word-break-next ] assoc-find 2drop nip swap length or 1+ ; : >words ( str -- words ) [ first-word ] >pieces ;