Add utf8-index> and >utf8-index words for dealing with broken C APIs

db4
Slava Pestov 2009-03-04 20:43:04 -06:00
parent 190226d29e
commit abdf153374
3 changed files with 26 additions and 8 deletions

View File

@ -1,7 +1,8 @@
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors growable io continuations
namespaces io.encodings combinators strings ;
USING: math math.order kernel sequences sbufs vectors growable io
continuations namespaces io.encodings combinators strings
binary-search ;
IN: io.encodings.utf8
! Decoding UTF-8
@ -30,9 +31,9 @@ SINGLETON: utf8
: begin-utf8 ( stream byte -- stream char )
{
{ [ dup -7 shift zero? ] [ ] }
{ [ dup -5 shift BIN: 110 number= ] [ double ] }
{ [ dup -4 shift BIN: 1110 number= ] [ triple ] }
{ [ dup -3 shift BIN: 11110 number= ] [ quadruple ] }
{ [ dup -5 shift BIN: 110 = ] [ double ] }
{ [ dup -4 shift BIN: 1110 = ] [ triple ] }
{ [ dup -3 shift BIN: 11110 = ] [ quadruple ] }
[ drop replacement-char ]
} cond ; inline
@ -71,3 +72,20 @@ M: utf8 encode-char
drop swap char>utf8 ;
PRIVATE>
: code-point-length ( n -- x )
log2 {
{ [ dup 0 7 between? ] [ 1 ] }
{ [ dup 8 11 between? ] [ 2 ] }
{ [ dup 12 16 between? ] [ 3 ] }
{ [ dup 17 21 between? ] [ 4 ] }
} cond nip ;
: code-point-offsets ( string -- indices )
0 [ code-point-length + ] accumulate swap suffix ;
: utf8-index> ( n string -- n' )
code-point-offsets natural-search drop ;
: >utf8-index ( n string -- n' )
code-point-offsets nth ;

View File

@ -17,8 +17,8 @@ IN: sequences.tests
[ 5040 ] [ [ 1 2 3 4 5 6 7 ] 1 [ * ] reduce ] unit-test
[ 5040 [ 1 1 2 6 24 120 720 ] ]
[ [ 1 2 3 4 5 6 7 ] 1 [ * ] accumulate ] unit-test
[ 5040 { 1 1 2 6 24 120 720 } ]
[ { 1 2 3 4 5 6 7 } 1 [ * ] accumulate ] unit-test
[ f f ] [ [ ] [ ] find ] unit-test
[ 0 1 ] [ [ 1 ] [ ] find ] unit-test

View File

@ -416,7 +416,7 @@ PRIVATE>
over map-into ; inline
: accumulate ( seq identity quot -- final newseq )
swapd [ [ call ] [ 2drop ] 3bi ] curry map ; inline
swapd [ [ call ] [ 2drop ] 3bi ] curry { } map-as ; inline
: 2each ( seq1 seq2 quot -- )
(2each) each-integer ; inline