Add utf8-index> and >utf8-index words for dealing with broken C APIs
parent
190226d29e
commit
abdf153374
|
@ -1,7 +1,8 @@
|
||||||
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: math kernel sequences sbufs vectors growable io continuations
|
USING: math math.order kernel sequences sbufs vectors growable io
|
||||||
namespaces io.encodings combinators strings ;
|
continuations namespaces io.encodings combinators strings
|
||||||
|
binary-search ;
|
||||||
IN: io.encodings.utf8
|
IN: io.encodings.utf8
|
||||||
|
|
||||||
! Decoding UTF-8
|
! Decoding UTF-8
|
||||||
|
@ -30,9 +31,9 @@ SINGLETON: utf8
|
||||||
: begin-utf8 ( stream byte -- stream char )
|
: begin-utf8 ( stream byte -- stream char )
|
||||||
{
|
{
|
||||||
{ [ dup -7 shift zero? ] [ ] }
|
{ [ dup -7 shift zero? ] [ ] }
|
||||||
{ [ dup -5 shift BIN: 110 number= ] [ double ] }
|
{ [ dup -5 shift BIN: 110 = ] [ double ] }
|
||||||
{ [ dup -4 shift BIN: 1110 number= ] [ triple ] }
|
{ [ dup -4 shift BIN: 1110 = ] [ triple ] }
|
||||||
{ [ dup -3 shift BIN: 11110 number= ] [ quadruple ] }
|
{ [ dup -3 shift BIN: 11110 = ] [ quadruple ] }
|
||||||
[ drop replacement-char ]
|
[ drop replacement-char ]
|
||||||
} cond ; inline
|
} cond ; inline
|
||||||
|
|
||||||
|
@ -71,3 +72,20 @@ M: utf8 encode-char
|
||||||
drop swap char>utf8 ;
|
drop swap char>utf8 ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
: code-point-length ( n -- x )
|
||||||
|
log2 {
|
||||||
|
{ [ dup 0 7 between? ] [ 1 ] }
|
||||||
|
{ [ dup 8 11 between? ] [ 2 ] }
|
||||||
|
{ [ dup 12 16 between? ] [ 3 ] }
|
||||||
|
{ [ dup 17 21 between? ] [ 4 ] }
|
||||||
|
} cond nip ;
|
||||||
|
|
||||||
|
: code-point-offsets ( string -- indices )
|
||||||
|
0 [ code-point-length + ] accumulate swap suffix ;
|
||||||
|
|
||||||
|
: utf8-index> ( n string -- n' )
|
||||||
|
code-point-offsets natural-search drop ;
|
||||||
|
|
||||||
|
: >utf8-index ( n string -- n' )
|
||||||
|
code-point-offsets nth ;
|
|
@ -17,8 +17,8 @@ IN: sequences.tests
|
||||||
|
|
||||||
[ 5040 ] [ [ 1 2 3 4 5 6 7 ] 1 [ * ] reduce ] unit-test
|
[ 5040 ] [ [ 1 2 3 4 5 6 7 ] 1 [ * ] reduce ] unit-test
|
||||||
|
|
||||||
[ 5040 [ 1 1 2 6 24 120 720 ] ]
|
[ 5040 { 1 1 2 6 24 120 720 } ]
|
||||||
[ [ 1 2 3 4 5 6 7 ] 1 [ * ] accumulate ] unit-test
|
[ { 1 2 3 4 5 6 7 } 1 [ * ] accumulate ] unit-test
|
||||||
|
|
||||||
[ f f ] [ [ ] [ ] find ] unit-test
|
[ f f ] [ [ ] [ ] find ] unit-test
|
||||||
[ 0 1 ] [ [ 1 ] [ ] find ] unit-test
|
[ 0 1 ] [ [ 1 ] [ ] find ] unit-test
|
||||||
|
|
|
@ -416,7 +416,7 @@ PRIVATE>
|
||||||
over map-into ; inline
|
over map-into ; inline
|
||||||
|
|
||||||
: accumulate ( seq identity quot -- final newseq )
|
: accumulate ( seq identity quot -- final newseq )
|
||||||
swapd [ [ call ] [ 2drop ] 3bi ] curry map ; inline
|
swapd [ [ call ] [ 2drop ] 3bi ] curry { } map-as ; inline
|
||||||
|
|
||||||
: 2each ( seq1 seq2 quot -- )
|
: 2each ( seq1 seq2 quot -- )
|
||||||
(2each) each-integer ; inline
|
(2each) each-integer ; inline
|
||||||
|
|
Loading…
Reference in New Issue