Merge branch 'master' into new_ui
commit
feb5688296
|
@ -139,15 +139,6 @@ HELP: -nrot
|
|||
}
|
||||
} ;
|
||||
|
||||
HELP: nrev
|
||||
{ $values { "n" integer } }
|
||||
{ $description "A generalization of " { $link spin } " that reverses any number of items at the top of the stack."
|
||||
}
|
||||
{ $examples
|
||||
{ $example "USING: prettyprint generalizations ;" "1 2 3 4 4 nrev .s" "4\n3\n2\n1" }
|
||||
"The " { $link spin } " word is equivalent to " { $snippet "3 nrev" } "."
|
||||
} ;
|
||||
|
||||
HELP: ndip
|
||||
{ $values { "quot" quotation } { "n" integer } }
|
||||
{ $description "A generalization of " { $link dip } " that can work "
|
||||
|
@ -327,7 +318,6 @@ $nl
|
|||
{ $subsection nnip }
|
||||
{ $subsection ndrop }
|
||||
{ $subsection ntuck }
|
||||
{ $subsection nrev }
|
||||
{ $subsection mnswap }
|
||||
"Generalized combinators:"
|
||||
{ $subsection ndip }
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
! Copyright (C) 2007, 2008 Chris Double, Doug Coleman, Eduardo
|
||||
! Cavazos, Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel sequences sequences.private math math.ranges
|
||||
combinators macros quotations fry macros locals ;
|
||||
USING: kernel sequences sequences.private math combinators
|
||||
macros quotations fry ;
|
||||
IN: generalizations
|
||||
|
||||
<<
|
||||
|
@ -51,9 +51,6 @@ MACRO: nnip ( n -- )
|
|||
MACRO: ntuck ( n -- )
|
||||
2 + '[ dup _ -nrot ] ;
|
||||
|
||||
MACRO: nrev ( n -- )
|
||||
1 [a,b] [ ] [ '[ @ _ -nrot ] ] reduce ;
|
||||
|
||||
MACRO: ndip ( quot n -- )
|
||||
[ '[ _ dip ] ] times ;
|
||||
|
||||
|
|
|
@ -3,17 +3,11 @@
|
|||
USING: accessors kernel combinators math namespaces make assocs
|
||||
sequences splitting sorting sets strings vectors hashtables
|
||||
quotations arrays byte-arrays math.parser calendar
|
||||
calendar.format present urls
|
||||
|
||||
calendar.format present urls fry
|
||||
io io.encodings io.encodings.iana io.encodings.binary
|
||||
io.encodings.8-bit io.crlf
|
||||
|
||||
unicode.case unicode.categories
|
||||
|
||||
http.parsers ;
|
||||
|
||||
EXCLUDE: fry => , ;
|
||||
|
||||
IN: http
|
||||
|
||||
: (read-header) ( -- alist )
|
||||
|
@ -217,5 +211,7 @@ TUPLE: post-data data params content-type content-encoding ;
|
|||
" " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ;
|
||||
|
||||
: parse-content-type ( content-type -- type encoding )
|
||||
";" split1 parse-content-type-attributes "charset" swap at
|
||||
name>encoding over "text/" head? latin1 binary ? or ;
|
||||
";" split1
|
||||
parse-content-type-attributes "charset" swap at
|
||||
[ name>encoding ]
|
||||
[ dup "text/" head? latin1 binary ? ] if* ;
|
||||
|
|
|
@ -1,6 +1,21 @@
|
|||
USING: http http.server math sequences continuations tools.test ;
|
||||
USING: http http.server math sequences continuations tools.test
|
||||
io.encodings.utf8 io.encodings.binary accessors ;
|
||||
IN: http.server.tests
|
||||
|
||||
[ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test
|
||||
|
||||
\ make-http-error must-infer
|
||||
|
||||
[ "text/plain; charset=UTF-8" ] [
|
||||
<response>
|
||||
"text/plain" >>content-type
|
||||
utf8 >>content-charset
|
||||
unparse-content-type
|
||||
] unit-test
|
||||
|
||||
[ "text/xml" ] [
|
||||
<response>
|
||||
"text/xml" >>content-type
|
||||
binary >>content-charset
|
||||
unparse-content-type
|
||||
] unit-test
|
|
@ -97,10 +97,8 @@ GENERIC: write-full-response ( request response -- )
|
|||
tri ;
|
||||
|
||||
: unparse-content-type ( request -- content-type )
|
||||
[ content-type>> "application/octet-stream" or ]
|
||||
[ content-charset>> encoding>name ]
|
||||
bi
|
||||
[ "; charset=" glue ] when* ;
|
||||
[ content-type>> "application/octet-stream" or ] [ content-charset>> ] bi
|
||||
dup binary eq? [ drop ] [ encoding>name "; charset=" glue ] if ;
|
||||
|
||||
: ensure-domain ( cookie -- cookie )
|
||||
[
|
||||
|
|
|
@ -3,31 +3,33 @@
|
|||
USING: math.parser arrays io.encodings sequences kernel assocs
|
||||
hashtables io.encodings.ascii generic parser classes.tuple words
|
||||
words.symbol io io.files splitting namespaces math
|
||||
compiler.units accessors ;
|
||||
compiler.units accessors classes.singleton classes.mixin
|
||||
io.encodings.iana ;
|
||||
IN: io.encodings.8-bit
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: mappings {
|
||||
{ "latin1" "8859-1" }
|
||||
{ "latin2" "8859-2" }
|
||||
{ "latin3" "8859-3" }
|
||||
{ "latin4" "8859-4" }
|
||||
{ "latin/cyrillic" "8859-5" }
|
||||
{ "latin/arabic" "8859-6" }
|
||||
{ "latin/greek" "8859-7" }
|
||||
{ "latin/hebrew" "8859-8" }
|
||||
{ "latin5" "8859-9" }
|
||||
{ "latin6" "8859-10" }
|
||||
{ "latin/thai" "8859-11" }
|
||||
{ "latin7" "8859-13" }
|
||||
{ "latin8" "8859-14" }
|
||||
{ "latin9" "8859-15" }
|
||||
{ "latin10" "8859-16" }
|
||||
{ "koi8-r" "KOI8-R" }
|
||||
{ "windows-1252" "CP1252" }
|
||||
{ "ebcdic" "CP037" }
|
||||
{ "mac-roman" "ROMAN" }
|
||||
! encoding-name iana-name file-name
|
||||
{ "latin1" "ISO_8859-1:1987" "8859-1" }
|
||||
{ "latin2" "ISO_8859-2:1987" "8859-2" }
|
||||
{ "latin3" "ISO_8859-3:1988" "8859-3" }
|
||||
{ "latin4" "ISO_8859-4:1988" "8859-4" }
|
||||
{ "latin/cyrillic" "ISO_8859-5:1988" "8859-5" }
|
||||
{ "latin/arabic" "ISO_8859-6:1987" "8859-6" }
|
||||
{ "latin/greek" "ISO_8859-7:1987" "8859-7" }
|
||||
{ "latin/hebrew" "ISO_8859-8:1988" "8859-8" }
|
||||
{ "latin5" "ISO_8859-9:1989" "8859-9" }
|
||||
{ "latin6" "ISO-8859-10" "8859-10" }
|
||||
{ "latin/thai" "TIS-620" "8859-11" }
|
||||
{ "latin7" "ISO-8859-13" "8859-13" }
|
||||
{ "latin8" "ISO-8859-14" "8859-14" }
|
||||
{ "latin9" "ISO-8859-15" "8859-15" }
|
||||
{ "latin10" "ISO-8859-16" "8859-16" }
|
||||
{ "koi8-r" "KOI8-R" "KOI8-R" }
|
||||
{ "windows-1252" "windows-1252" "CP1252" }
|
||||
{ "ebcdic" "IBM037" "CP037" }
|
||||
{ "mac-roman" "macintosh" "ROMAN" }
|
||||
} ;
|
||||
|
||||
: encoding-file ( file-name -- stream )
|
||||
|
@ -65,8 +67,7 @@ M: 8-bit encode-char encode>> encode-8-bit ;
|
|||
|
||||
M: 8-bit decode-char decode>> decode-8-bit ;
|
||||
|
||||
PREDICATE: 8-bit-encoding < word
|
||||
8-bit-encodings get-global key? ;
|
||||
MIXIN: 8-bit-encoding
|
||||
|
||||
M: 8-bit-encoding <encoder>
|
||||
8-bit-encodings get-global at <encoder> ;
|
||||
|
@ -74,15 +75,21 @@ M: 8-bit-encoding <encoder>
|
|||
M: 8-bit-encoding <decoder>
|
||||
8-bit-encodings get-global at <decoder> ;
|
||||
|
||||
: create-encoding ( name -- word )
|
||||
"io.encodings.8-bit" create
|
||||
[ define-singleton-class ]
|
||||
[ 8-bit-encoding add-mixin-instance ]
|
||||
[ ] tri ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
[
|
||||
mappings [
|
||||
[ "io.encodings.8-bit" create ]
|
||||
first3
|
||||
[ create-encoding ]
|
||||
[ dupd register-encoding ]
|
||||
[ encoding-file parse-file 8-bit boa ]
|
||||
bi*
|
||||
] assoc-map
|
||||
[ keys [ define-symbol ] each ]
|
||||
[ 8-bit-encodings set-global ]
|
||||
bi
|
||||
tri*
|
||||
] H{ } map>assoc
|
||||
8-bit-encodings set-global
|
||||
] with-compilation-unit
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io io.encodings kernel math io.encodings.private ;
|
||||
USING: io io.encodings kernel math io.encodings.private io.encodings.iana ;
|
||||
IN: io.encodings.ascii
|
||||
|
||||
<PRIVATE
|
||||
|
@ -20,3 +20,5 @@ M: ascii encode-char
|
|||
|
||||
M: ascii decode-char
|
||||
128 decode-if< ;
|
||||
|
||||
ascii "ANSI_X3.4-1968" register-encoding
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.syntax help.markup ;
|
||||
IN: io.encodings.chinese
|
||||
|
||||
ARTICLE: "io.encodings.chinese" "Chinese text encodings"
|
||||
"The " { $vocab-link "io.encodings.chinese" } " vocabulary implements encodings used for Chinese text besides the standard UTF encodings for Unicode strings."
|
||||
{ $subsection gb18030 } ;
|
||||
|
||||
ABOUT: "io.encodings.chinese"
|
||||
|
||||
HELP: gb18030
|
||||
{ $class-description "The encoding descriptor for GB 18030, a Chinese national standard for text encoding. GB 18030 consists of a unique encoding for each Unicode code point, and for this reason has been described as a UTF. It is backwards compatible with the earlier encodings GB 2312 and GBK." }
|
||||
{ $see-also "encodings-introduction" } ;
|
|
@ -0,0 +1,26 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io.encodings.chinese io.encodings.string strings tools.test arrays ;
|
||||
IN: io.encodings.chinese.tests
|
||||
|
||||
[ "hello" ] [ "hello" gb18030 encode >string ] unit-test
|
||||
[ "hello" ] [ "hello" gb18030 decode ] unit-test
|
||||
[ B{ HEX: A1 HEX: A4 HEX: 81 HEX: 30 HEX: 86 HEX: 30 } ]
|
||||
[ B{ HEX: B7 HEX: B8 } gb18030 encode ] unit-test
|
||||
[ { HEX: B7 HEX: B8 } ]
|
||||
[ B{ HEX: A1 HEX: A4 HEX: 81 HEX: 30 HEX: 86 HEX: 30 } gb18030 decode >array ] unit-test
|
||||
[ { HEX: B7 CHAR: replacement-character } ]
|
||||
[ B{ HEX: A1 HEX: A4 HEX: 81 HEX: 30 HEX: 86 } gb18030 decode >array ] unit-test
|
||||
[ { HEX: B7 CHAR: replacement-character } ]
|
||||
[ B{ HEX: A1 HEX: A4 HEX: 81 HEX: 30 } gb18030 decode >array ] unit-test
|
||||
[ { HEX: B7 CHAR: replacement-character } ]
|
||||
[ B{ HEX: A1 HEX: A4 HEX: 81 } gb18030 decode >array ] unit-test
|
||||
[ { HEX: B7 } ]
|
||||
[ B{ HEX: A1 HEX: A4 } gb18030 decode >array ] unit-test
|
||||
[ { CHAR: replacement-character } ]
|
||||
[ B{ HEX: A1 } gb18030 decode >array ] unit-test
|
||||
[ { HEX: 44D7 HEX: 464B } ]
|
||||
[ B{ HEX: 82 HEX: 33 HEX: A3 HEX: 39 HEX: 82 HEX: 33 HEX: C9 HEX: 31 }
|
||||
gb18030 decode >array ] unit-test
|
||||
[ { HEX: 82 HEX: 33 HEX: A3 HEX: 39 HEX: 82 HEX: 33 HEX: C9 HEX: 31 } ]
|
||||
[ { HEX: 44D7 HEX: 464B } gb18030 encode >array ] unit-test
|
|
@ -0,0 +1,135 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: xml xml.data kernel io io.encodings interval-maps splitting fry
|
||||
math.parser sequences combinators assocs locals accessors math
|
||||
arrays values io.encodings.ascii ascii io.files biassocs math.order
|
||||
combinators.short-circuit io.binary io.encodings.iana ;
|
||||
IN: io.encodings.chinese
|
||||
|
||||
SINGLETON: gb18030
|
||||
|
||||
gb18030 "GB18030" register-encoding
|
||||
|
||||
<PRIVATE
|
||||
|
||||
! GB to mean GB18030 is a terrible abuse of notation
|
||||
|
||||
! Resource file from:
|
||||
! http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
|
||||
|
||||
TUPLE: range ufirst ulast bfirst blast ;
|
||||
|
||||
: b>byte-array ( string -- byte-array )
|
||||
" " split [ hex> ] B{ } map-as ;
|
||||
|
||||
: add-range ( contained ranges -- )
|
||||
[
|
||||
{
|
||||
[ "uFirst" attr hex> ]
|
||||
[ "uLast" attr hex> ]
|
||||
[ "bFirst" attr b>byte-array ]
|
||||
[ "bLast" attr b>byte-array ]
|
||||
} cleave range boa
|
||||
] dip push ;
|
||||
|
||||
: add-mapping ( contained mapping -- )
|
||||
[
|
||||
[ "b" attr b>byte-array ]
|
||||
[ "u" attr hex> ] bi
|
||||
] dip set-at ;
|
||||
|
||||
: xml>gb-data ( stream -- mapping ranges )
|
||||
[let | mapping [ H{ } clone ] ranges [ V{ } clone ] |
|
||||
[
|
||||
dup contained? [
|
||||
dup name>> main>> {
|
||||
{ "range" [ ranges add-range ] }
|
||||
{ "a" [ mapping add-mapping ] }
|
||||
[ 2drop ]
|
||||
} case
|
||||
] [ drop ] if
|
||||
] each-element mapping ranges
|
||||
] ;
|
||||
|
||||
! Algorithms from:
|
||||
! http://www-128.ibm.com/developerworks/library/u-china.html
|
||||
|
||||
: linear ( bytes -- num )
|
||||
! This hard-codes bMin and bMax
|
||||
reverse first4
|
||||
10 * + 126 * + 10 * + ;
|
||||
|
||||
: unlinear ( num -- bytes )
|
||||
B{ HEX: 81 HEX: 30 HEX: 81 HEX: 30 } linear -
|
||||
10 /mod swap [ HEX: 30 + ] dip
|
||||
126 /mod swap [ HEX: 81 + ] dip
|
||||
10 /mod swap [ HEX: 30 + ] dip
|
||||
HEX: 81 +
|
||||
B{ } 4sequence reverse ;
|
||||
|
||||
: >interval-map-by ( start-quot end-quot value-quot seq -- interval-map )
|
||||
'[ _ [ @ 2array ] _ tri ] { } map>assoc <interval-map> ; inline
|
||||
|
||||
: ranges-u>gb ( ranges -- interval-map )
|
||||
[ ufirst>> ] [ ulast>> ] [ ] >interval-map-by ;
|
||||
|
||||
: ranges-gb>u ( ranges -- interval-map )
|
||||
[ bfirst>> linear ] [ blast>> linear ] [ ] >interval-map-by ;
|
||||
|
||||
VALUE: gb>u
|
||||
VALUE: u>gb
|
||||
VALUE: mapping
|
||||
|
||||
"resource:basis/io/encodings/chinese/gb-18030-2000.xml"
|
||||
ascii <file-reader> xml>gb-data
|
||||
[ ranges-u>gb to: u>gb ] [ ranges-gb>u to: gb>u ] bi
|
||||
>biassoc to: mapping
|
||||
|
||||
: lookup-range ( char -- byte-array )
|
||||
dup u>gb interval-at [
|
||||
[ ufirst>> - ] [ bfirst>> linear ] bi + unlinear
|
||||
] [ encode-error ] if* ;
|
||||
|
||||
M: gb18030 encode-char ( char stream encoding -- )
|
||||
drop [
|
||||
dup mapping at
|
||||
[ ] [ lookup-range ] ?if
|
||||
] dip stream-write ;
|
||||
|
||||
: second-byte? ( ch -- ? ) ! of a double-byte character
|
||||
{ [ HEX: 40 HEX: 7E between? ] [ HEX: 80 HEX: fe between? ] } 1|| ;
|
||||
|
||||
: quad-1/3? ( ch -- ? ) HEX: 81 HEX: fe between? ;
|
||||
|
||||
: quad-2/4? ( ch -- ? ) HEX: 30 HEX: 39 between? ;
|
||||
|
||||
: last-bytes? ( byte-array -- ? )
|
||||
{ [ length 2 = ] [ first quad-1/3? ] [ second quad-2/4? ] } 1&& ;
|
||||
|
||||
: decode-quad ( byte-array -- char )
|
||||
dup mapping value-at [ ] [
|
||||
linear dup gb>u interval-at [
|
||||
[ bfirst>> linear - ] [ ufirst>> ] bi +
|
||||
] [ drop replacement-char ] if*
|
||||
] ?if ;
|
||||
|
||||
: four-byte ( stream byte1 byte2 -- char )
|
||||
rot 2 swap stream-read dup last-bytes?
|
||||
[ first2 B{ } 4sequence decode-quad ]
|
||||
[ 3drop replacement-char ] if ;
|
||||
|
||||
: two-byte ( stream byte -- char )
|
||||
over stream-read1 {
|
||||
{ [ dup not ] [ 3drop replacement-char ] }
|
||||
{ [ dup second-byte? ] [ B{ } 2sequence mapping value-at nip ] }
|
||||
{ [ dup quad-2/4? ] [ four-byte ] }
|
||||
[ 3drop replacement-char ]
|
||||
} cond ;
|
||||
|
||||
M: gb18030 decode-char ( stream encoding -- char )
|
||||
drop dup stream-read1 {
|
||||
{ [ dup not ] [ 2drop f ] }
|
||||
{ [ dup ascii? ] [ nip 1array B{ } like mapping value-at ] }
|
||||
{ [ dup quad-1/3? ] [ two-byte ] }
|
||||
[ 2drop replacement-char ]
|
||||
} cond ;
|
File diff suppressed because it is too large
Load Diff
|
@ -1,12 +1,35 @@
|
|||
USING: help.syntax help.markup ;
|
||||
USING: help.syntax help.markup strings ;
|
||||
IN: io.encodings.iana
|
||||
|
||||
ABOUT: "io.encodings.iana"
|
||||
|
||||
ARTICLE: "io.encodings.iana" "IANA-registered encoding names"
|
||||
"The " { $vocab-link "io.encodings.iana" } " vocabulary provides words for accessing the names of encodings and the encoding descriptors corresponding to names." $nl
|
||||
"Most text encodings in common use have been registered with IANA. There is a standard set of names for each encoding. Simple conversion functions:"
|
||||
{ $subsection name>encoding }
|
||||
{ $subsection encoding>name }
|
||||
"To let a new encoding be used with the above words, use the following:"
|
||||
{ $subsection register-encoding }
|
||||
"Exceptions when encodings or names are not found:"
|
||||
{ $subsection missing-encoding }
|
||||
{ $subsection missing-name } ;
|
||||
|
||||
HELP: missing-encoding
|
||||
{ $error-description "The error called from " { $link name>encoding } " when there is no encoding descriptor registered corresponding to the given name." } ;
|
||||
|
||||
HELP: missing-name
|
||||
{ $error-description "The error called from " { $link encoding>name } " when there is no name registered corresponding to the given encoding." } ;
|
||||
|
||||
HELP: name>encoding
|
||||
{ $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } }
|
||||
{ "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;
|
||||
{ $description "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;
|
||||
|
||||
HELP: encoding>name
|
||||
{ $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } }
|
||||
{ "Given an encoding descriptor, return the preferred IANA name." } ;
|
||||
{ $description "Given an encoding descriptor, return the preferred IANA name." } ;
|
||||
|
||||
{ name>encoding encoding>name } related-words
|
||||
|
||||
HELP: register-encoding
|
||||
{ $values { "descriptor" "an encoding descriptor" } { "name" string } }
|
||||
{ $description "Registers an encoding descriptor with the given name, available for lookup through " { $link name>encoding } " and " { $link encoding>name } ". IANA-registered aliases are automatically included. The name given must be the first name in the " { $snippet "resources:basis/io/encodings/iana/character-sets" } " file." } ;
|
||||
|
|
|
@ -1,5 +1,28 @@
|
|||
USING: io.encodings.iana io.encodings.ascii tools.test ;
|
||||
USING: io.encodings.iana io.encodings.iana.private
|
||||
io.encodings.utf8 tools.test assocs namespaces ;
|
||||
IN: io.encodings.iana.tests
|
||||
|
||||
[ ascii ] [ "US-ASCII" name>encoding ] unit-test
|
||||
[ ascii ] [ "ASCII" name>encoding ] unit-test
|
||||
[ "US-ASCII" ] [ ascii encoding>name ] unit-test
|
||||
[ utf8 ] [ "UTF-8" name>encoding ] unit-test
|
||||
[ utf8 ] [ "utf8" name>encoding ] unit-test
|
||||
[ "UTF-8" ] [ utf8 encoding>name ] unit-test
|
||||
|
||||
! We will never implement EBCDIC-FI-SE-A
|
||||
SINGLETON: ebcdic-fisea
|
||||
ebcdic-fisea "EBCDIC-FI-SE-A" register-encoding
|
||||
[ ebcdic-fisea ] [ "EBCDIC-FI-SE-A" name>encoding ] unit-test
|
||||
[ ebcdic-fisea ] [ "csEBCDICFISEA" name>encoding ] unit-test
|
||||
[ "EBCDIC-FI-SE-A" ] [ ebcdic-fisea encoding>name ] unit-test
|
||||
|
||||
! Clean up after myself
|
||||
[ ] [
|
||||
"EBCDIC-FI-SE-A" n>e-table get delete-at
|
||||
"csEBCDICFISEA" n>e-table get delete-at
|
||||
ebcdic-fisea e>n-table get delete-at
|
||||
] unit-test
|
||||
[ "EBCDIC-FI-SE-A" name>encoding ] must-fail
|
||||
[ "csEBCDICFISEA" name>encoding ] must-fail
|
||||
[ ebcdic-fisea encoding>name ] must-fail
|
||||
|
||||
[ ebcdic-fisea "foobar" register-encoding ] must-fail
|
||||
[ "foobar" name>encoding ] must-fail
|
||||
[ ebcdic-fisea encoding>name ] must-fail
|
||||
|
|
|
@ -1,37 +1,24 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel strings values io.files assocs
|
||||
splitting sequences io namespaces sets io.encodings.8-bit
|
||||
io.encodings.ascii io.encodings.utf8 io.encodings.utf16 ;
|
||||
splitting sequences io namespaces sets io.encodings.utf8 ;
|
||||
IN: io.encodings.iana
|
||||
|
||||
<PRIVATE
|
||||
VALUE: n>e-table
|
||||
|
||||
: e>n-table H{
|
||||
{ ascii "US-ASCII" }
|
||||
{ utf8 "UTF-8" }
|
||||
{ utf16 "UTF-16" }
|
||||
{ utf16be "UTF-16BE" }
|
||||
{ utf16le "UTF-16LE" }
|
||||
{ latin1 "ISO-8859-1" }
|
||||
{ latin2 "ISO-8859-2" }
|
||||
{ latin3 "ISO-8859-3" }
|
||||
{ latin4 "ISO-8859-4" }
|
||||
{ latin/cyrillic "ISO-8859-5" }
|
||||
{ latin/arabic "ISO-8859-6" }
|
||||
{ latin/greek "ISO-8859-7" }
|
||||
{ latin/hebrew "ISO-8859-8" }
|
||||
{ latin5 "ISO-8859-9" }
|
||||
{ latin6 "ISO-8859-10" }
|
||||
} ;
|
||||
SYMBOL: n>e-table
|
||||
SYMBOL: e>n-table
|
||||
SYMBOL: aliases
|
||||
PRIVATE>
|
||||
|
||||
ERROR: missing-encoding name ;
|
||||
|
||||
: name>encoding ( name -- encoding )
|
||||
n>e-table at ;
|
||||
dup n>e-table get-global at [ ] [ missing-encoding ] ?if ;
|
||||
|
||||
ERROR: missing-name encoding ;
|
||||
|
||||
: encoding>name ( encoding -- name )
|
||||
e>n-table at ;
|
||||
dup e>n-table get-global at [ ] [ missing-name ] ?if ;
|
||||
|
||||
<PRIVATE
|
||||
: parse-iana ( stream -- synonym-set )
|
||||
|
@ -39,24 +26,33 @@ PRIVATE>
|
|||
[ " " split ] map
|
||||
[ first { "Name:" "Alias:" } member? ] filter
|
||||
[ second ] map { "None" } diff
|
||||
] map ;
|
||||
] map harvest ;
|
||||
|
||||
: more-aliases ( -- assoc )
|
||||
: make-aliases ( stream -- n>e )
|
||||
parse-iana [ [ first ] [ ] bi ] H{ } map>assoc ;
|
||||
|
||||
: initial-n>e ( -- assoc )
|
||||
H{
|
||||
{ "UTF8" utf8 }
|
||||
{ "utf8" utf8 }
|
||||
{ "utf-8" utf8 }
|
||||
} ;
|
||||
{ "UTF-8" utf8 }
|
||||
} clone ;
|
||||
|
||||
: initial-e>n ( -- assoc )
|
||||
H{ { utf8 "UTF-8" } } clone ;
|
||||
|
||||
: make-n>e ( stream -- n>e )
|
||||
parse-iana [ [
|
||||
dup [
|
||||
e>n-table value-at
|
||||
[ swap [ set ] with each ]
|
||||
[ drop ] if*
|
||||
] with each
|
||||
] each ] H{ } make-assoc more-aliases assoc-union ;
|
||||
PRIVATE>
|
||||
|
||||
"resource:basis/io/encodings/iana/character-sets"
|
||||
ascii <file-reader> make-n>e to: n>e-table
|
||||
utf8 <file-reader> make-aliases aliases set-global
|
||||
|
||||
n>e-table global [ initial-n>e or ] change-at
|
||||
e>n-table global [ initial-e>n or ] change-at
|
||||
|
||||
: register-encoding ( descriptor name -- )
|
||||
[
|
||||
aliases get at [
|
||||
[ n>e-table get-global set-at ] with each
|
||||
] [ "Bad encoding registration" throw ] if*
|
||||
] [ swap e>n-table get-global set-at ] 2bi ;
|
||||
|
|
|
@ -3,19 +3,33 @@
|
|||
USING: sequences kernel io io.files combinators.short-circuit
|
||||
math.order values assocs io.encodings io.binary fry strings
|
||||
math io.encodings.ascii arrays accessors splitting math.parser
|
||||
biassocs ;
|
||||
biassocs io.encodings.iana ;
|
||||
IN: io.encodings.japanese
|
||||
|
||||
VALUE: shift-jis
|
||||
SINGLETON: shift-jis
|
||||
|
||||
VALUE: windows-31j
|
||||
shift-jis "Shift_JIS" register-encoding
|
||||
|
||||
SINGLETON: windows-31j
|
||||
|
||||
windows-31j "Windows-31J" register-encoding
|
||||
|
||||
<PRIVATE
|
||||
|
||||
VALUE: shift-jis-table
|
||||
|
||||
M: shift-jis <encoder> drop shift-jis-table <encoder> ;
|
||||
M: shift-jis <decoder> drop shift-jis-table <decoder> ;
|
||||
|
||||
VALUE: windows-31j-table
|
||||
|
||||
M: windows-31j <encoder> drop windows-31j-table <encoder> ;
|
||||
M: windows-31j <decoder> drop windows-31j-table <decoder> ;
|
||||
|
||||
TUPLE: jis assoc ;
|
||||
|
||||
: <jis> ( assoc -- jis )
|
||||
[ nip ] assoc-filter H{ } assoc-like
|
||||
[ nip ] assoc-filter
|
||||
>biassoc jis boa ;
|
||||
|
||||
: ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
|
||||
|
@ -31,10 +45,10 @@ TUPLE: jis assoc ;
|
|||
ascii file-lines process-jis <jis> ;
|
||||
|
||||
"resource:basis/io/encodings/japanese/CP932.txt"
|
||||
make-jis to: windows-31j
|
||||
make-jis to: windows-31j-table
|
||||
|
||||
"resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt"
|
||||
make-jis to: shift-jis
|
||||
make-jis to: shift-jis-table
|
||||
|
||||
: small? ( char -- ? )
|
||||
! ASCII range or single-byte halfwidth katakana
|
||||
|
|
|
@ -1,15 +1,21 @@
|
|||
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
||||
! Copyright (C) 2006, 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math kernel sequences sbufs vectors namespaces io.binary
|
||||
io.encodings combinators splitting io byte-arrays ;
|
||||
io.encodings combinators splitting io byte-arrays io.encodings.iana ;
|
||||
IN: io.encodings.utf16
|
||||
|
||||
SINGLETON: utf16be
|
||||
|
||||
utf16be "UTF-16BE" register-encoding
|
||||
|
||||
SINGLETON: utf16le
|
||||
|
||||
utf16le "UTF-16LE" register-encoding
|
||||
|
||||
SINGLETON: utf16
|
||||
|
||||
utf16 "UTF-16" register-encoding
|
||||
|
||||
ERROR: missing-bom ;
|
||||
|
||||
<PRIVATE
|
||||
|
|
|
@ -1,15 +1,21 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math kernel io.encodings combinators io io.encodings.utf16
|
||||
sequences io.binary ;
|
||||
sequences io.binary io.encodings.iana ;
|
||||
IN: io.encodings.utf32
|
||||
|
||||
SINGLETON: utf32be
|
||||
|
||||
utf32be "UTF-32BE" register-encoding
|
||||
|
||||
SINGLETON: utf32le
|
||||
|
||||
utf32le "UTF-32LE" register-encoding
|
||||
|
||||
SINGLETON: utf32
|
||||
|
||||
utf32 "UTF-32" register-encoding
|
||||
|
||||
<PRIVATE
|
||||
|
||||
! Decoding
|
||||
|
|
|
@ -35,7 +35,7 @@ M: too-many-arguments summary
|
|||
drop "There must be no more than 4 input and 4 output arguments" ;
|
||||
|
||||
: check-memoized ( word -- )
|
||||
dup #in 4 > swap #out 4 > or [ too-many-arguments ] when ;
|
||||
[ #in ] [ #out ] bi [ 4 > ] either? [ too-many-arguments ] when ;
|
||||
|
||||
: define-memoized ( word quot -- )
|
||||
over check-memoized
|
||||
|
|
|
@ -70,7 +70,7 @@ IN: stack-checker.transforms
|
|||
[
|
||||
[ no-case ]
|
||||
] [
|
||||
dup peek quotation? [
|
||||
dup peek callable? [
|
||||
dup peek swap but-last
|
||||
] [
|
||||
[ no-case ] swap
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
Daniel Ehrenberg
|
||||
Slava Pestov
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.syntax help.markup strings math kernel ;
|
||||
IN: wrap
|
||||
|
||||
ABOUT: "wrap"
|
||||
|
||||
ARTICLE: "wrap" "Word wrapping"
|
||||
"The " { $vocab-link "wrap" } " vocabulary implements word wrapping. There is support for simple string wrapping, with the following words:"
|
||||
{ $subsection wrap-lines }
|
||||
{ $subsection wrap-string }
|
||||
{ $subsection wrap-indented-string }
|
||||
"Additionally, the vocabulary provides capabilities to wrap arbitrary groups of things, in units called words."
|
||||
{ $subsection wrap }
|
||||
{ $subsection word }
|
||||
{ $subsection <word> } ;
|
||||
|
||||
HELP: wrap-lines
|
||||
{ $values { "lines" string } { "width" integer } { "newlines" "sequence of strings" } }
|
||||
{ $description "Given a string, divides it into a sequence of lines where each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
|
||||
|
||||
HELP: wrap-string
|
||||
{ $values { "string" string } { "width" integer } { "newstring" string } }
|
||||
{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
|
||||
|
||||
HELP: wrap-indented-string
|
||||
{ $values { "string" string } { "width" integer } { "indent" string } { "newstring" string } }
|
||||
{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space. Before each line, the indent string is added." } ;
|
||||
|
||||
HELP: wrap
|
||||
{ $values { "words" { "a sequence of " { $instance word } "s" } } { "width" integer } { "lines" "a sequence of sequences of words" } }
|
||||
{ $description "Divides the words into lines, where the sum of the lengths of the words on a line (not counting breaks at the end of the line) is at most the given width. Every line except for the first one starts with a non-break, and every one but the last ends with a break." } ;
|
||||
|
||||
HELP: word
|
||||
{ $class-description "A word, for the purposes of " { $vocab-link "wrap" } ", is a Factor object annotated with a length (in the " { $snippet "width" } " slot) and knowledge about whether it is an allowable position for an optional line break (in the " { $snippet "break?" } " slot). Words can be created with " { $link <word> } "." }
|
||||
{ $see-also wrap } ;
|
||||
|
||||
HELP: <word>
|
||||
{ $values { "key" object } { "width" integer } { "break?" { { $link t } " or " { $link POSTPONE: f } } } { "word" word } }
|
||||
{ $description "Creates a " { $link word } " object with the given parameters." }
|
||||
{ $see-also wrap } ;
|
|
@ -1,5 +1,7 @@
|
|||
IN: wrap.tests
|
||||
! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: tools.test wrap multiline sequences ;
|
||||
IN: wrap.tests
|
||||
|
||||
[
|
||||
{
|
||||
|
@ -23,6 +25,32 @@ USING: tools.test wrap multiline sequences ;
|
|||
} 35 wrap [ { } like ] map
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
{
|
||||
T{ word f 1 10 f }
|
||||
T{ word f 2 10 f }
|
||||
T{ word f 3 9 t }
|
||||
T{ word f 3 9 t }
|
||||
T{ word f 3 9 t }
|
||||
}
|
||||
{
|
||||
T{ word f 4 10 f }
|
||||
T{ word f 5 10 f }
|
||||
}
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ word f 1 10 f }
|
||||
T{ word f 2 10 f }
|
||||
T{ word f 3 9 t }
|
||||
T{ word f 3 9 t }
|
||||
T{ word f 3 9 t }
|
||||
T{ word f 4 10 f }
|
||||
T{ word f 5 10 f }
|
||||
} 35 wrap [ { } like ] map
|
||||
] unit-test
|
||||
|
||||
[
|
||||
<" This is a
|
||||
long piece
|
||||
|
@ -45,4 +73,10 @@ word wrap.">
|
|||
] [
|
||||
<" This is a long piece of text that we wish to word wrap."> 12
|
||||
" " wrap-indented-string
|
||||
] unit-test
|
||||
] unit-test
|
||||
|
||||
[ "this text\nhas lots of\nspaces" ]
|
||||
[ "this text has lots of spaces" 12 wrap-string ] unit-test
|
||||
|
||||
[ "hello\nhow\nare\nyou\ntoday?" ]
|
||||
[ "hello how are you today?" 3 wrap-string ] unit-test
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences kernel namespaces make splitting
|
||||
math math.order fry assocs accessors ;
|
||||
IN: wrap
|
||||
|
@ -15,12 +17,25 @@ SYMBOL: width
|
|||
: break-here? ( column word -- ? )
|
||||
break?>> not [ width get > ] [ drop f ] if ;
|
||||
|
||||
: walk ( n words -- n )
|
||||
! If on a break, take the rest of the breaks
|
||||
! If not on a break, go back until you hit a break
|
||||
2dup bounds-check? [
|
||||
2dup nth break?>>
|
||||
[ [ break?>> not ] find-from drop ]
|
||||
[ [ break?>> ] find-last-from drop 1+ ] if
|
||||
] [ drop ] if ;
|
||||
|
||||
: find-optimal-break ( words -- n )
|
||||
[ 0 ] dip [ [ width>> + dup ] keep break-here? ] find drop nip ;
|
||||
[ 0 ] keep
|
||||
[ [ width>> + dup ] keep break-here? ] find drop nip
|
||||
[ 1 max swap walk ] [ drop f ] if* ;
|
||||
|
||||
: (wrap) ( words -- )
|
||||
dup find-optimal-break
|
||||
[ 1 max cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if* ;
|
||||
[
|
||||
dup find-optimal-break
|
||||
[ cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if*
|
||||
] unless-empty ;
|
||||
|
||||
: intersperse ( seq elt -- seq' )
|
||||
[ '[ _ , ] [ , ] interleave ] { } make ;
|
||||
|
@ -34,9 +49,7 @@ SYMBOL: width
|
|||
|
||||
: join-words ( wrapped-lines -- lines )
|
||||
[
|
||||
[ break?>> ]
|
||||
[ trim-head-slice ]
|
||||
[ trim-tail-slice ] bi
|
||||
[ break?>> ] trim-slice
|
||||
[ key>> ] map concat
|
||||
] map ;
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
USING: xml xml.data xml.utilities tools.test accessors kernel ;
|
||||
USING: xml xml.data xml.utilities tools.test accessors kernel
|
||||
io.encodings.8-bit ;
|
||||
|
||||
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
|
||||
|
@ -11,4 +12,4 @@ USING: xml xml.data xml.utilities tools.test accessors kernel ;
|
|||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
|
||||
[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
|
||||
[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
USING: alien strings kernel math tools.test io prettyprint
|
||||
namespaces combinators words classes sequences accessors
|
||||
math.functions ;
|
||||
math.functions arrays ;
|
||||
IN: combinators.tests
|
||||
|
||||
! Compiled
|
||||
|
@ -314,3 +314,13 @@ IN: combinators.tests
|
|||
\ test-case-7 must-infer
|
||||
|
||||
[ "plus" ] [ \ + test-case-7 ] unit-test
|
||||
|
||||
! Some corner cases (no pun intended)
|
||||
DEFER: corner-case-1
|
||||
|
||||
<< \ corner-case-1 2 [ + ] curry 1array [ case ] curry (( a -- b )) define-declared >>
|
||||
|
||||
[ t ] [ \ corner-case-1 optimized>> ] unit-test
|
||||
[ 4 ] [ 2 corner-case-1 ] unit-test
|
||||
|
||||
[ 4 ] [ 2 2 [ + ] curry 1array case ] unit-test
|
|
@ -59,13 +59,13 @@ ERROR: no-case ;
|
|||
] [
|
||||
dup wrapper? [ wrapped>> ] when
|
||||
] if =
|
||||
] [ quotation? ] if
|
||||
] [ callable? ] if
|
||||
] find nip ;
|
||||
|
||||
: case ( obj assoc -- )
|
||||
case-find {
|
||||
{ [ dup array? ] [ nip second call ] }
|
||||
{ [ dup quotation? ] [ call ] }
|
||||
{ [ dup callable? ] [ call ] }
|
||||
{ [ dup not ] [ no-case ] }
|
||||
} cond ;
|
||||
|
||||
|
|
|
@ -246,8 +246,8 @@ HELP: retry
|
|||
{ $description "Tries the quotation up to " { $snippet "n" } " times until it returns true. Retries the quotation if an exception is thrown or if the quotation returns " { $link f } ". The quotation is expected to have side effects that may fail, such as generating a random name for a new file until successful." }
|
||||
{ $examples
|
||||
"Try to get a 0 as a random number:"
|
||||
{ $unchecked-example "USING: continuations math prettyprint ;"
|
||||
"[ 5 random 0 = ] 5 retry t"
|
||||
{ $unchecked-example "USING: continuations math prettyprint random ;"
|
||||
"[ 5 random 0 = ] 5 retry"
|
||||
"t"
|
||||
}
|
||||
} ;
|
||||
|
|
|
@ -53,7 +53,6 @@ IN: reports.noise
|
|||
{ nipd 3 }
|
||||
{ nkeep 5 }
|
||||
{ npick 6 }
|
||||
{ nrev 5 }
|
||||
{ nrot 5 }
|
||||
{ nslip 5 }
|
||||
{ ntuck 6 }
|
||||
|
|
Loading…
Reference in New Issue