Merge branch 'master' of git://factorcode.org/git/factor

2009-03-19 20:02:52 -05:00 · 2009-03-19 20:02:52 -05:00 · 53e519b87b
parent 7f4c967ace db876598ed
commit 53e519b87b
5 changed files with 25 additions and 14 deletions
--- a/basis/ui/gadgets/editors/editors.factor
+++ b/basis/ui/gadgets/editors/editors.factor
@ -8,7 +8,8 @@ continuations ui.clipboards ui.commands ui.gadgets ui.gadgets.borders
 ui.gadgets.buttons ui.gadgets.labels ui.gadgets.scrollers
 ui.gadgets.menus ui.gadgets.wrappers ui.render ui.pens.solid
 ui.gadgets.line-support ui.text ui.gestures ui.baseline-alignment
-math.rectangles splitting unicode.categories fonts grouping ;
+math.rectangles splitting unicode.categories grouping ;
+EXCLUDE: fonts => selection ;
 IN: ui.gadgets.editors

 TUPLE: editor < line-gadget
--- a/basis/unicode/script/script-docs.factor
+++ b/basis/unicode/script/script-docs.factor
@ -3,8 +3,12 @@
 USING: help.syntax help.markup strings ;
 IN: unicode.script

-ABOUT: script-of
+ABOUT: "unicode.script"
+
+ARTICLE: "unicode.script" "Unicode script properties"
+"The unicode standard gives every character a script. Note that this is different from a language, and that it is non-trivial to detect language from a string. To get the script of a character, use"
+{ $subsection script-of } ;

 HELP: script-of
-{ $values { "char" "a code point" } { "script" "a symbol" } }
-{ $description "Gets a symbol representing the code point of a given character. The word name of the symbol is the same as the one " } ;
+{ $values { "char" "a code point" } { "script" string } }
+{ $description "Finds the script of the given Unicode code point, represented as a string." } ;
--- a/basis/x11/clipboard/clipboard.factor
+++ b/basis/x11/clipboard/clipboard.factor
@ -26,7 +26,7 @@ TUPLE: x-clipboard atom contents ;
    CurrentTime XConvertSelection drop ;

 : snarf-property ( prop-return -- string )
-    dup *void* [ *void* ascii alien>string ] [ drop f ] if ;
+    dup *void* [ *void* utf8 alien>string ] [ drop f ] if ;

 : window-property ( win prop delete? -- string )
    [ [ dpy get ] 2dip 0 -1 ] dip AnyPropertyType
@ -37,7 +37,7 @@ TUPLE: x-clipboard atom contents ;
    swap XSelectionEvent-property zero? [
        drop f
    ] [
-        selection-property 1 window-property utf8 decode
+        selection-property 1 window-property
    ] if ;

 : own-selection ( prop win -- )
--- a/core/io/encodings/utf8/utf8-tests.factor
+++ b/core/io/encodings/utf8/utf8-tests.factor
@ -1,5 +1,5 @@
 USING: io.encodings.utf8 tools.test io.encodings.string strings arrays
-bootstrap.unicode ;
+bootstrap.unicode kernel sequences ;
 IN: io.encodings.utf8.tests

 : decode-utf8-w/stream ( array -- newarray )
@ -25,3 +25,7 @@ IN: io.encodings.utf8.tests

 [ 3 ] [ 1 "日本語" >utf8-index ] unit-test
 [ 3 ] [ 9 "日本語" utf8-index> ] unit-test
+
+[ 3 ] [ 2 "lápis" >utf8-index ] unit-test
+
+[ V{ } ] [ 100000 [ [ code-point-length ] [ 1string utf8 encode length ] bi = not ] filter ] unit-test
--- a/core/io/encodings/utf8/utf8.factor
+++ b/core/io/encodings/utf8/utf8.factor
@ -73,12 +73,14 @@ M: utf8 encode-char
 PRIVATE>

 : code-point-length ( n -- x )
-    log2 {
-        { [ dup 0 7 between? ] [ 1 ] }
-        { [ dup 8 11 between? ] [ 2 ] }
-        { [ dup 12 16 between? ] [ 3 ] }
-        { [ dup 17 21 between? ] [ 4 ] }
-    } cond nip ;
+    dup zero? [ drop 1 ] [
+        log2 {
+            { [ dup 0 6 between? ] [ 1 ] }
+            { [ dup 7 10 between? ] [ 2 ] }
+            { [ dup 11 15 between? ] [ 3 ] }
+            { [ dup 16 20 between? ] [ 4 ] }
+        } cond nip
+    ] if ;

 : code-point-offsets ( string -- indices )
    0 [ code-point-length + ] accumulate swap suffix ;
@ -87,4 +89,4 @@ PRIVATE>
    code-point-offsets [ <= ] with find drop ;

 : >utf8-index ( n string -- n' )
-    code-point-offsets nth ;
+    code-point-offsets nth ;