unicode.breaks: >words is now 90+% faster.

db4
John Benediktsson 2011-10-12 11:03:39 -07:00
parent 4cbbfe82aa
commit e7489ba16a
1 changed files with 23 additions and 13 deletions

View File

@ -1,12 +1,12 @@
! Copyright (C) 2008 Daniel Ehrenberg. ! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: combinators.short-circuit unicode.categories kernel math USING: accessors alien.syntax arrays assocs combinators
combinators splitting sequences math.parser io.files io assocs combinators.short-circuit compiler.units fry interval-maps io
arrays namespaces make math.ranges unicode.normalize io.encodings.ascii io.files kernel literals locals make math
unicode.normalize.private values io.encodings.ascii math.parser math.ranges memoize namespaces sequences
unicode.data compiler.units fry unicode.categories.syntax sequences.private sets simple-flat-file splitting
alien.syntax sets accessors interval-maps memoize locals words unicode.categories unicode.categories.syntax unicode.data
simple-flat-file ; unicode.normalize unicode.normalize.private values words ;
FROM: sequences => change-nth ; FROM: sequences => change-nth ;
IN: unicode.breaks IN: unicode.breaks
@ -209,8 +209,11 @@ words init-table table
[ f ] if* [ f ] if*
] [ t ] if ; ] [ t ] if ;
: (format/extended?) ( class -- ? )
${ wExtend wFormat } member? ; inline
: format/extended? ( ch -- ? ) : format/extended? ( ch -- ? )
word-break-prop { 4 5 } member? ; word-break-prop (format/extended?) ;
: (walk-up) ( str i -- j ) : (walk-up) ( str i -- j )
swap [ format/extended? not ] find-from drop ; swap [ format/extended? not ] find-from drop ;
@ -239,17 +242,24 @@ words init-table table
} case ; } case ;
:: word-break-next ( old-class new-char i str -- next-class ? ) :: word-break-next ( old-class new-char i str -- next-class ? )
new-char format/extended? new-char word-break-prop :> new-class
[ old-class dup { 1 2 3 } member? ] [ new-class (format/extended?)
new-char word-break-prop old-class over word-table-nth [ old-class dup ${ wCR wLF wNewline } member? ] [
new-class old-class over word-table-nth
[ str i ] dip word-break? [ str i ] dip word-break?
] if ; ] if ;
: (find-index) ( seq quot quot' -- i elt )
pick [ [ (each-index) ] dip call ] dip finish-find ; inline
: find-index ( ... seq quot: ( ... elt i -- ... ? ) -- ... i elt )
[ find-integer ] (find-index) ; inline
PRIVATE> PRIVATE>
: first-word ( str -- i ) : first-word ( str -- i )
[ unclip-slice word-break-prop over <enum> ] keep [ unclip-slice word-break-prop over ] keep
'[ swap _ word-break-next ] assoc-find 2drop '[ _ word-break-next ] find-index drop
nip swap length or 1 + ; nip swap length or 1 + ;
: >words ( str -- words ) : >words ( str -- words )