From abdf15337432c41deaa86b05a66d0efb254b45af Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 4 Mar 2009 20:43:04 -0600 Subject: [PATCH] Add utf8-index> and >utf8-index words for dealing with broken C APIs --- core/io/encodings/utf8/utf8.factor | 28 ++++++++++++++++++++++----- core/sequences/sequences-tests.factor | 4 ++-- core/sequences/sequences.factor | 2 +- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/core/io/encodings/utf8/utf8.factor b/core/io/encodings/utf8/utf8.factor index 8030d6265e..69a6abf2c7 100644 --- a/core/io/encodings/utf8/utf8.factor +++ b/core/io/encodings/utf8/utf8.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2006, 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: math kernel sequences sbufs vectors growable io continuations -namespaces io.encodings combinators strings ; +USING: math math.order kernel sequences sbufs vectors growable io +continuations namespaces io.encodings combinators strings +binary-search ; IN: io.encodings.utf8 ! Decoding UTF-8 @@ -30,9 +31,9 @@ SINGLETON: utf8 : begin-utf8 ( stream byte -- stream char ) { { [ dup -7 shift zero? ] [ ] } - { [ dup -5 shift BIN: 110 number= ] [ double ] } - { [ dup -4 shift BIN: 1110 number= ] [ triple ] } - { [ dup -3 shift BIN: 11110 number= ] [ quadruple ] } + { [ dup -5 shift BIN: 110 = ] [ double ] } + { [ dup -4 shift BIN: 1110 = ] [ triple ] } + { [ dup -3 shift BIN: 11110 = ] [ quadruple ] } [ drop replacement-char ] } cond ; inline @@ -71,3 +72,20 @@ M: utf8 encode-char drop swap char>utf8 ; PRIVATE> + +: code-point-length ( n -- x ) + log2 { + { [ dup 0 7 between? ] [ 1 ] } + { [ dup 8 11 between? ] [ 2 ] } + { [ dup 12 16 between? ] [ 3 ] } + { [ dup 17 21 between? ] [ 4 ] } + } cond nip ; + +: code-point-offsets ( string -- indices ) + 0 [ code-point-length + ] accumulate swap suffix ; + +: utf8-index> ( n string -- n' ) + code-point-offsets natural-search drop ; + +: >utf8-index ( n string -- n' ) + code-point-offsets nth ; \ No newline at end of file diff --git a/core/sequences/sequences-tests.factor b/core/sequences/sequences-tests.factor index 3eb287301c..866eb6aad6 100644 --- a/core/sequences/sequences-tests.factor +++ b/core/sequences/sequences-tests.factor @@ -17,8 +17,8 @@ IN: sequences.tests [ 5040 ] [ [ 1 2 3 4 5 6 7 ] 1 [ * ] reduce ] unit-test -[ 5040 [ 1 1 2 6 24 120 720 ] ] -[ [ 1 2 3 4 5 6 7 ] 1 [ * ] accumulate ] unit-test +[ 5040 { 1 1 2 6 24 120 720 } ] +[ { 1 2 3 4 5 6 7 } 1 [ * ] accumulate ] unit-test [ f f ] [ [ ] [ ] find ] unit-test [ 0 1 ] [ [ 1 ] [ ] find ] unit-test diff --git a/core/sequences/sequences.factor b/core/sequences/sequences.factor index 992f822507..394b2b50d8 100755 --- a/core/sequences/sequences.factor +++ b/core/sequences/sequences.factor @@ -416,7 +416,7 @@ PRIVATE> over map-into ; inline : accumulate ( seq identity quot -- final newseq ) - swapd [ [ call ] [ 2drop ] 3bi ] curry map ; inline + swapd [ [ call ] [ 2drop ] 3bi ] curry { } map-as ; inline : 2each ( seq1 seq2 quot -- ) (2each) each-integer ; inline