diff --git a/core/io/encodings/utf8/utf8-tests.factor b/core/io/encodings/utf8/utf8-tests.factor index e30e9be0d0..6cd3ee8033 100755 --- a/core/io/encodings/utf8/utf8-tests.factor +++ b/core/io/encodings/utf8/utf8-tests.factor @@ -1,5 +1,5 @@ USING: io.encodings.utf8 tools.test io.encodings.string strings arrays -bootstrap.unicode ; +bootstrap.unicode kernel sequences ; IN: io.encodings.utf8.tests : decode-utf8-w/stream ( array -- newarray ) @@ -25,3 +25,7 @@ IN: io.encodings.utf8.tests [ 3 ] [ 1 "日本語" >utf8-index ] unit-test [ 3 ] [ 9 "日本語" utf8-index> ] unit-test + +[ 3 ] [ 2 "lápis" >utf8-index ] unit-test + +[ V{ } ] [ 100000 [ [ code-point-length ] [ 1string utf8 encode length ] bi = not ] filter ] unit-test diff --git a/core/io/encodings/utf8/utf8.factor b/core/io/encodings/utf8/utf8.factor index aca36c8551..83ecc33b8e 100755 --- a/core/io/encodings/utf8/utf8.factor +++ b/core/io/encodings/utf8/utf8.factor @@ -73,7 +73,7 @@ M: utf8 encode-char PRIVATE> : code-point-length ( n -- x ) - log2 { + next-power-of-2 log2 { { [ dup 0 7 between? ] [ 1 ] } { [ dup 8 11 between? ] [ 2 ] } { [ dup 12 16 between? ] [ 3 ] } @@ -87,4 +87,4 @@ PRIVATE> code-point-offsets [ <= ] with find drop ; : >utf8-index ( n string -- n' ) - code-point-offsets nth ; \ No newline at end of file + code-point-offsets nth ;