unicode.breaks: >words is now 90+% faster.
parent
4cbbfe82aa
commit
e7489ba16a
|
@ -1,12 +1,12 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg.
|
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: combinators.short-circuit unicode.categories kernel math
|
USING: accessors alien.syntax arrays assocs combinators
|
||||||
combinators splitting sequences math.parser io.files io assocs
|
combinators.short-circuit compiler.units fry interval-maps io
|
||||||
arrays namespaces make math.ranges unicode.normalize
|
io.encodings.ascii io.files kernel literals locals make math
|
||||||
unicode.normalize.private values io.encodings.ascii
|
math.parser math.ranges memoize namespaces sequences
|
||||||
unicode.data compiler.units fry unicode.categories.syntax
|
sequences.private sets simple-flat-file splitting
|
||||||
alien.syntax sets accessors interval-maps memoize locals words
|
unicode.categories unicode.categories.syntax unicode.data
|
||||||
simple-flat-file ;
|
unicode.normalize unicode.normalize.private values words ;
|
||||||
FROM: sequences => change-nth ;
|
FROM: sequences => change-nth ;
|
||||||
IN: unicode.breaks
|
IN: unicode.breaks
|
||||||
|
|
||||||
|
@ -209,8 +209,11 @@ words init-table table
|
||||||
[ f ] if*
|
[ f ] if*
|
||||||
] [ t ] if ;
|
] [ t ] if ;
|
||||||
|
|
||||||
|
: (format/extended?) ( class -- ? )
|
||||||
|
${ wExtend wFormat } member? ; inline
|
||||||
|
|
||||||
: format/extended? ( ch -- ? )
|
: format/extended? ( ch -- ? )
|
||||||
word-break-prop { 4 5 } member? ;
|
word-break-prop (format/extended?) ;
|
||||||
|
|
||||||
: (walk-up) ( str i -- j )
|
: (walk-up) ( str i -- j )
|
||||||
swap [ format/extended? not ] find-from drop ;
|
swap [ format/extended? not ] find-from drop ;
|
||||||
|
@ -239,17 +242,24 @@ words init-table table
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
:: word-break-next ( old-class new-char i str -- next-class ? )
|
:: word-break-next ( old-class new-char i str -- next-class ? )
|
||||||
new-char format/extended?
|
new-char word-break-prop :> new-class
|
||||||
[ old-class dup { 1 2 3 } member? ] [
|
new-class (format/extended?)
|
||||||
new-char word-break-prop old-class over word-table-nth
|
[ old-class dup ${ wCR wLF wNewline } member? ] [
|
||||||
|
new-class old-class over word-table-nth
|
||||||
[ str i ] dip word-break?
|
[ str i ] dip word-break?
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
|
: (find-index) ( seq quot quot' -- i elt )
|
||||||
|
pick [ [ (each-index) ] dip call ] dip finish-find ; inline
|
||||||
|
|
||||||
|
: find-index ( ... seq quot: ( ... elt i -- ... ? ) -- ... i elt )
|
||||||
|
[ find-integer ] (find-index) ; inline
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: first-word ( str -- i )
|
: first-word ( str -- i )
|
||||||
[ unclip-slice word-break-prop over <enum> ] keep
|
[ unclip-slice word-break-prop over ] keep
|
||||||
'[ swap _ word-break-next ] assoc-find 2drop
|
'[ _ word-break-next ] find-index drop
|
||||||
nip swap length or 1 + ;
|
nip swap length or 1 + ;
|
||||||
|
|
||||||
: >words ( str -- words )
|
: >words ( str -- words )
|
||||||
|
|
Loading…
Reference in New Issue