diff --git a/basis/ui/gadgets/editors/editors.factor b/basis/ui/gadgets/editors/editors.factor index bda9938f8a..55622503b6 100755 --- a/basis/ui/gadgets/editors/editors.factor +++ b/basis/ui/gadgets/editors/editors.factor @@ -8,7 +8,8 @@ continuations ui.clipboards ui.commands ui.gadgets ui.gadgets.borders ui.gadgets.buttons ui.gadgets.labels ui.gadgets.scrollers ui.gadgets.menus ui.gadgets.wrappers ui.render ui.pens.solid ui.gadgets.line-support ui.text ui.gestures ui.baseline-alignment -math.rectangles splitting unicode.categories fonts grouping ; +math.rectangles splitting unicode.categories grouping ; +EXCLUDE: fonts => selection ; IN: ui.gadgets.editors TUPLE: editor < line-gadget diff --git a/basis/unicode/script/script-docs.factor b/basis/unicode/script/script-docs.factor index 730c361fb9..2860f83bef 100644 --- a/basis/unicode/script/script-docs.factor +++ b/basis/unicode/script/script-docs.factor @@ -3,8 +3,12 @@ USING: help.syntax help.markup strings ; IN: unicode.script -ABOUT: script-of +ABOUT: "unicode.script" + +ARTICLE: "unicode.script" "Unicode script properties" +"The unicode standard gives every character a script. Note that this is different from a language, and that it is non-trivial to detect language from a string. To get the script of a character, use" +{ $subsection script-of } ; HELP: script-of -{ $values { "char" "a code point" } { "script" "a symbol" } } -{ $description "Gets a symbol representing the code point of a given character. The word name of the symbol is the same as the one " } ; +{ $values { "char" "a code point" } { "script" string } } +{ $description "Finds the script of the given Unicode code point, represented as a string." } ; diff --git a/basis/x11/clipboard/clipboard.factor b/basis/x11/clipboard/clipboard.factor index 8375636a72..87b91624af 100644 --- a/basis/x11/clipboard/clipboard.factor +++ b/basis/x11/clipboard/clipboard.factor @@ -26,7 +26,7 @@ TUPLE: x-clipboard atom contents ; CurrentTime XConvertSelection drop ; : snarf-property ( prop-return -- string ) - dup *void* [ *void* ascii alien>string ] [ drop f ] if ; + dup *void* [ *void* utf8 alien>string ] [ drop f ] if ; : window-property ( win prop delete? -- string ) [ [ dpy get ] 2dip 0 -1 ] dip AnyPropertyType @@ -37,7 +37,7 @@ TUPLE: x-clipboard atom contents ; swap XSelectionEvent-property zero? [ drop f ] [ - selection-property 1 window-property utf8 decode + selection-property 1 window-property ] if ; : own-selection ( prop win -- ) diff --git a/core/io/encodings/utf8/utf8-tests.factor b/core/io/encodings/utf8/utf8-tests.factor index e30e9be0d0..6cd3ee8033 100755 --- a/core/io/encodings/utf8/utf8-tests.factor +++ b/core/io/encodings/utf8/utf8-tests.factor @@ -1,5 +1,5 @@ USING: io.encodings.utf8 tools.test io.encodings.string strings arrays -bootstrap.unicode ; +bootstrap.unicode kernel sequences ; IN: io.encodings.utf8.tests : decode-utf8-w/stream ( array -- newarray ) @@ -25,3 +25,7 @@ IN: io.encodings.utf8.tests [ 3 ] [ 1 "日本語" >utf8-index ] unit-test [ 3 ] [ 9 "日本語" utf8-index> ] unit-test + +[ 3 ] [ 2 "lápis" >utf8-index ] unit-test + +[ V{ } ] [ 100000 [ [ code-point-length ] [ 1string utf8 encode length ] bi = not ] filter ] unit-test diff --git a/core/io/encodings/utf8/utf8.factor b/core/io/encodings/utf8/utf8.factor index aca36c8551..4846b06f32 100755 --- a/core/io/encodings/utf8/utf8.factor +++ b/core/io/encodings/utf8/utf8.factor @@ -73,12 +73,14 @@ M: utf8 encode-char PRIVATE> : code-point-length ( n -- x ) - log2 { - { [ dup 0 7 between? ] [ 1 ] } - { [ dup 8 11 between? ] [ 2 ] } - { [ dup 12 16 between? ] [ 3 ] } - { [ dup 17 21 between? ] [ 4 ] } - } cond nip ; + dup zero? [ drop 1 ] [ + log2 { + { [ dup 0 6 between? ] [ 1 ] } + { [ dup 7 10 between? ] [ 2 ] } + { [ dup 11 15 between? ] [ 3 ] } + { [ dup 16 20 between? ] [ 4 ] } + } cond nip + ] if ; : code-point-offsets ( string -- indices ) 0 [ code-point-length + ] accumulate swap suffix ; @@ -87,4 +89,4 @@ PRIVATE> code-point-offsets [ <= ] with find drop ; : >utf8-index ( n string -- n' ) - code-point-offsets nth ; \ No newline at end of file + code-point-offsets nth ;