Merge branch 'master' into new_ui

db4
Slava Pestov 2009-02-04 04:45:33 -06:00
commit feb5688296
28 changed files with 31390 additions and 127 deletions

View File

@ -139,15 +139,6 @@ HELP: -nrot
} }
} ; } ;
HELP: nrev
{ $values { "n" integer } }
{ $description "A generalization of " { $link spin } " that reverses any number of items at the top of the stack."
}
{ $examples
{ $example "USING: prettyprint generalizations ;" "1 2 3 4 4 nrev .s" "4\n3\n2\n1" }
"The " { $link spin } " word is equivalent to " { $snippet "3 nrev" } "."
} ;
HELP: ndip HELP: ndip
{ $values { "quot" quotation } { "n" integer } } { $values { "quot" quotation } { "n" integer } }
{ $description "A generalization of " { $link dip } " that can work " { $description "A generalization of " { $link dip } " that can work "
@ -327,7 +318,6 @@ $nl
{ $subsection nnip } { $subsection nnip }
{ $subsection ndrop } { $subsection ndrop }
{ $subsection ntuck } { $subsection ntuck }
{ $subsection nrev }
{ $subsection mnswap } { $subsection mnswap }
"Generalized combinators:" "Generalized combinators:"
{ $subsection ndip } { $subsection ndip }

View File

@ -1,8 +1,8 @@
! Copyright (C) 2007, 2008 Chris Double, Doug Coleman, Eduardo ! Copyright (C) 2007, 2008 Chris Double, Doug Coleman, Eduardo
! Cavazos, Slava Pestov. ! Cavazos, Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences sequences.private math math.ranges USING: kernel sequences sequences.private math combinators
combinators macros quotations fry macros locals ; macros quotations fry ;
IN: generalizations IN: generalizations
<< <<
@ -51,9 +51,6 @@ MACRO: nnip ( n -- )
MACRO: ntuck ( n -- ) MACRO: ntuck ( n -- )
2 + '[ dup _ -nrot ] ; 2 + '[ dup _ -nrot ] ;
MACRO: nrev ( n -- )
1 [a,b] [ ] [ '[ @ _ -nrot ] ] reduce ;
MACRO: ndip ( quot n -- ) MACRO: ndip ( quot n -- )
[ '[ _ dip ] ] times ; [ '[ _ dip ] ] times ;

View File

@ -3,17 +3,11 @@
USING: accessors kernel combinators math namespaces make assocs USING: accessors kernel combinators math namespaces make assocs
sequences splitting sorting sets strings vectors hashtables sequences splitting sorting sets strings vectors hashtables
quotations arrays byte-arrays math.parser calendar quotations arrays byte-arrays math.parser calendar
calendar.format present urls calendar.format present urls fry
io io.encodings io.encodings.iana io.encodings.binary io io.encodings io.encodings.iana io.encodings.binary
io.encodings.8-bit io.crlf io.encodings.8-bit io.crlf
unicode.case unicode.categories unicode.case unicode.categories
http.parsers ; http.parsers ;
EXCLUDE: fry => , ;
IN: http IN: http
: (read-header) ( -- alist ) : (read-header) ( -- alist )
@ -217,5 +211,7 @@ TUPLE: post-data data params content-type content-encoding ;
" " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ; " " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ;
: parse-content-type ( content-type -- type encoding ) : parse-content-type ( content-type -- type encoding )
";" split1 parse-content-type-attributes "charset" swap at ";" split1
name>encoding over "text/" head? latin1 binary ? or ; parse-content-type-attributes "charset" swap at
[ name>encoding ]
[ dup "text/" head? latin1 binary ? ] if* ;

View File

@ -1,6 +1,21 @@
USING: http http.server math sequences continuations tools.test ; USING: http http.server math sequences continuations tools.test
io.encodings.utf8 io.encodings.binary accessors ;
IN: http.server.tests IN: http.server.tests
[ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test [ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test
\ make-http-error must-infer \ make-http-error must-infer
[ "text/plain; charset=UTF-8" ] [
<response>
"text/plain" >>content-type
utf8 >>content-charset
unparse-content-type
] unit-test
[ "text/xml" ] [
<response>
"text/xml" >>content-type
binary >>content-charset
unparse-content-type
] unit-test

View File

@ -97,10 +97,8 @@ GENERIC: write-full-response ( request response -- )
tri ; tri ;
: unparse-content-type ( request -- content-type ) : unparse-content-type ( request -- content-type )
[ content-type>> "application/octet-stream" or ] [ content-type>> "application/octet-stream" or ] [ content-charset>> ] bi
[ content-charset>> encoding>name ] dup binary eq? [ drop ] [ encoding>name "; charset=" glue ] if ;
bi
[ "; charset=" glue ] when* ;
: ensure-domain ( cookie -- cookie ) : ensure-domain ( cookie -- cookie )
[ [

View File

@ -3,31 +3,33 @@
USING: math.parser arrays io.encodings sequences kernel assocs USING: math.parser arrays io.encodings sequences kernel assocs
hashtables io.encodings.ascii generic parser classes.tuple words hashtables io.encodings.ascii generic parser classes.tuple words
words.symbol io io.files splitting namespaces math words.symbol io io.files splitting namespaces math
compiler.units accessors ; compiler.units accessors classes.singleton classes.mixin
io.encodings.iana ;
IN: io.encodings.8-bit IN: io.encodings.8-bit
<PRIVATE <PRIVATE
: mappings { : mappings {
{ "latin1" "8859-1" } ! encoding-name iana-name file-name
{ "latin2" "8859-2" } { "latin1" "ISO_8859-1:1987" "8859-1" }
{ "latin3" "8859-3" } { "latin2" "ISO_8859-2:1987" "8859-2" }
{ "latin4" "8859-4" } { "latin3" "ISO_8859-3:1988" "8859-3" }
{ "latin/cyrillic" "8859-5" } { "latin4" "ISO_8859-4:1988" "8859-4" }
{ "latin/arabic" "8859-6" } { "latin/cyrillic" "ISO_8859-5:1988" "8859-5" }
{ "latin/greek" "8859-7" } { "latin/arabic" "ISO_8859-6:1987" "8859-6" }
{ "latin/hebrew" "8859-8" } { "latin/greek" "ISO_8859-7:1987" "8859-7" }
{ "latin5" "8859-9" } { "latin/hebrew" "ISO_8859-8:1988" "8859-8" }
{ "latin6" "8859-10" } { "latin5" "ISO_8859-9:1989" "8859-9" }
{ "latin/thai" "8859-11" } { "latin6" "ISO-8859-10" "8859-10" }
{ "latin7" "8859-13" } { "latin/thai" "TIS-620" "8859-11" }
{ "latin8" "8859-14" } { "latin7" "ISO-8859-13" "8859-13" }
{ "latin9" "8859-15" } { "latin8" "ISO-8859-14" "8859-14" }
{ "latin10" "8859-16" } { "latin9" "ISO-8859-15" "8859-15" }
{ "koi8-r" "KOI8-R" } { "latin10" "ISO-8859-16" "8859-16" }
{ "windows-1252" "CP1252" } { "koi8-r" "KOI8-R" "KOI8-R" }
{ "ebcdic" "CP037" } { "windows-1252" "windows-1252" "CP1252" }
{ "mac-roman" "ROMAN" } { "ebcdic" "IBM037" "CP037" }
{ "mac-roman" "macintosh" "ROMAN" }
} ; } ;
: encoding-file ( file-name -- stream ) : encoding-file ( file-name -- stream )
@ -65,8 +67,7 @@ M: 8-bit encode-char encode>> encode-8-bit ;
M: 8-bit decode-char decode>> decode-8-bit ; M: 8-bit decode-char decode>> decode-8-bit ;
PREDICATE: 8-bit-encoding < word MIXIN: 8-bit-encoding
8-bit-encodings get-global key? ;
M: 8-bit-encoding <encoder> M: 8-bit-encoding <encoder>
8-bit-encodings get-global at <encoder> ; 8-bit-encodings get-global at <encoder> ;
@ -74,15 +75,21 @@ M: 8-bit-encoding <encoder>
M: 8-bit-encoding <decoder> M: 8-bit-encoding <decoder>
8-bit-encodings get-global at <decoder> ; 8-bit-encodings get-global at <decoder> ;
: create-encoding ( name -- word )
"io.encodings.8-bit" create
[ define-singleton-class ]
[ 8-bit-encoding add-mixin-instance ]
[ ] tri ;
PRIVATE> PRIVATE>
[ [
mappings [ mappings [
[ "io.encodings.8-bit" create ] first3
[ create-encoding ]
[ dupd register-encoding ]
[ encoding-file parse-file 8-bit boa ] [ encoding-file parse-file 8-bit boa ]
bi* tri*
] assoc-map ] H{ } map>assoc
[ keys [ define-symbol ] each ] 8-bit-encodings set-global
[ 8-bit-encodings set-global ]
bi
] with-compilation-unit ] with-compilation-unit

View File

@ -1,6 +1,6 @@
! Copyright (C) 2008 Daniel Ehrenberg. ! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: io io.encodings kernel math io.encodings.private ; USING: io io.encodings kernel math io.encodings.private io.encodings.iana ;
IN: io.encodings.ascii IN: io.encodings.ascii
<PRIVATE <PRIVATE
@ -20,3 +20,5 @@ M: ascii encode-char
M: ascii decode-char M: ascii decode-char
128 decode-if< ; 128 decode-if< ;
ascii "ANSI_X3.4-1968" register-encoding

View File

@ -0,0 +1,14 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup ;
IN: io.encodings.chinese
ARTICLE: "io.encodings.chinese" "Chinese text encodings"
"The " { $vocab-link "io.encodings.chinese" } " vocabulary implements encodings used for Chinese text besides the standard UTF encodings for Unicode strings."
{ $subsection gb18030 } ;
ABOUT: "io.encodings.chinese"
HELP: gb18030
{ $class-description "The encoding descriptor for GB 18030, a Chinese national standard for text encoding. GB 18030 consists of a unique encoding for each Unicode code point, and for this reason has been described as a UTF. It is backwards compatible with the earlier encodings GB 2312 and GBK." }
{ $see-also "encodings-introduction" } ;

View File

@ -0,0 +1,26 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: io.encodings.chinese io.encodings.string strings tools.test arrays ;
IN: io.encodings.chinese.tests
[ "hello" ] [ "hello" gb18030 encode >string ] unit-test
[ "hello" ] [ "hello" gb18030 decode ] unit-test
[ B{ HEX: A1 HEX: A4 HEX: 81 HEX: 30 HEX: 86 HEX: 30 } ]
[ B{ HEX: B7 HEX: B8 } gb18030 encode ] unit-test
[ { HEX: B7 HEX: B8 } ]
[ B{ HEX: A1 HEX: A4 HEX: 81 HEX: 30 HEX: 86 HEX: 30 } gb18030 decode >array ] unit-test
[ { HEX: B7 CHAR: replacement-character } ]
[ B{ HEX: A1 HEX: A4 HEX: 81 HEX: 30 HEX: 86 } gb18030 decode >array ] unit-test
[ { HEX: B7 CHAR: replacement-character } ]
[ B{ HEX: A1 HEX: A4 HEX: 81 HEX: 30 } gb18030 decode >array ] unit-test
[ { HEX: B7 CHAR: replacement-character } ]
[ B{ HEX: A1 HEX: A4 HEX: 81 } gb18030 decode >array ] unit-test
[ { HEX: B7 } ]
[ B{ HEX: A1 HEX: A4 } gb18030 decode >array ] unit-test
[ { CHAR: replacement-character } ]
[ B{ HEX: A1 } gb18030 decode >array ] unit-test
[ { HEX: 44D7 HEX: 464B } ]
[ B{ HEX: 82 HEX: 33 HEX: A3 HEX: 39 HEX: 82 HEX: 33 HEX: C9 HEX: 31 }
gb18030 decode >array ] unit-test
[ { HEX: 82 HEX: 33 HEX: A3 HEX: 39 HEX: 82 HEX: 33 HEX: C9 HEX: 31 } ]
[ { HEX: 44D7 HEX: 464B } gb18030 encode >array ] unit-test

View File

@ -0,0 +1,135 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: xml xml.data kernel io io.encodings interval-maps splitting fry
math.parser sequences combinators assocs locals accessors math
arrays values io.encodings.ascii ascii io.files biassocs math.order
combinators.short-circuit io.binary io.encodings.iana ;
IN: io.encodings.chinese
SINGLETON: gb18030
gb18030 "GB18030" register-encoding
<PRIVATE
! GB to mean GB18030 is a terrible abuse of notation
! Resource file from:
! http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
TUPLE: range ufirst ulast bfirst blast ;
: b>byte-array ( string -- byte-array )
" " split [ hex> ] B{ } map-as ;
: add-range ( contained ranges -- )
[
{
[ "uFirst" attr hex> ]
[ "uLast" attr hex> ]
[ "bFirst" attr b>byte-array ]
[ "bLast" attr b>byte-array ]
} cleave range boa
] dip push ;
: add-mapping ( contained mapping -- )
[
[ "b" attr b>byte-array ]
[ "u" attr hex> ] bi
] dip set-at ;
: xml>gb-data ( stream -- mapping ranges )
[let | mapping [ H{ } clone ] ranges [ V{ } clone ] |
[
dup contained? [
dup name>> main>> {
{ "range" [ ranges add-range ] }
{ "a" [ mapping add-mapping ] }
[ 2drop ]
} case
] [ drop ] if
] each-element mapping ranges
] ;
! Algorithms from:
! http://www-128.ibm.com/developerworks/library/u-china.html
: linear ( bytes -- num )
! This hard-codes bMin and bMax
reverse first4
10 * + 126 * + 10 * + ;
: unlinear ( num -- bytes )
B{ HEX: 81 HEX: 30 HEX: 81 HEX: 30 } linear -
10 /mod swap [ HEX: 30 + ] dip
126 /mod swap [ HEX: 81 + ] dip
10 /mod swap [ HEX: 30 + ] dip
HEX: 81 +
B{ } 4sequence reverse ;
: >interval-map-by ( start-quot end-quot value-quot seq -- interval-map )
'[ _ [ @ 2array ] _ tri ] { } map>assoc <interval-map> ; inline
: ranges-u>gb ( ranges -- interval-map )
[ ufirst>> ] [ ulast>> ] [ ] >interval-map-by ;
: ranges-gb>u ( ranges -- interval-map )
[ bfirst>> linear ] [ blast>> linear ] [ ] >interval-map-by ;
VALUE: gb>u
VALUE: u>gb
VALUE: mapping
"resource:basis/io/encodings/chinese/gb-18030-2000.xml"
ascii <file-reader> xml>gb-data
[ ranges-u>gb to: u>gb ] [ ranges-gb>u to: gb>u ] bi
>biassoc to: mapping
: lookup-range ( char -- byte-array )
dup u>gb interval-at [
[ ufirst>> - ] [ bfirst>> linear ] bi + unlinear
] [ encode-error ] if* ;
M: gb18030 encode-char ( char stream encoding -- )
drop [
dup mapping at
[ ] [ lookup-range ] ?if
] dip stream-write ;
: second-byte? ( ch -- ? ) ! of a double-byte character
{ [ HEX: 40 HEX: 7E between? ] [ HEX: 80 HEX: fe between? ] } 1|| ;
: quad-1/3? ( ch -- ? ) HEX: 81 HEX: fe between? ;
: quad-2/4? ( ch -- ? ) HEX: 30 HEX: 39 between? ;
: last-bytes? ( byte-array -- ? )
{ [ length 2 = ] [ first quad-1/3? ] [ second quad-2/4? ] } 1&& ;
: decode-quad ( byte-array -- char )
dup mapping value-at [ ] [
linear dup gb>u interval-at [
[ bfirst>> linear - ] [ ufirst>> ] bi +
] [ drop replacement-char ] if*
] ?if ;
: four-byte ( stream byte1 byte2 -- char )
rot 2 swap stream-read dup last-bytes?
[ first2 B{ } 4sequence decode-quad ]
[ 3drop replacement-char ] if ;
: two-byte ( stream byte -- char )
over stream-read1 {
{ [ dup not ] [ 3drop replacement-char ] }
{ [ dup second-byte? ] [ B{ } 2sequence mapping value-at nip ] }
{ [ dup quad-2/4? ] [ four-byte ] }
[ 3drop replacement-char ]
} cond ;
M: gb18030 decode-char ( stream encoding -- char )
drop dup stream-read1 {
{ [ dup not ] [ 2drop f ] }
{ [ dup ascii? ] [ nip 1array B{ } like mapping value-at ] }
{ [ dup quad-1/3? ] [ two-byte ] }
[ 2drop replacement-char ]
} cond ;

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,35 @@
USING: help.syntax help.markup ; USING: help.syntax help.markup strings ;
IN: io.encodings.iana IN: io.encodings.iana
ABOUT: "io.encodings.iana"
ARTICLE: "io.encodings.iana" "IANA-registered encoding names"
"The " { $vocab-link "io.encodings.iana" } " vocabulary provides words for accessing the names of encodings and the encoding descriptors corresponding to names." $nl
"Most text encodings in common use have been registered with IANA. There is a standard set of names for each encoding. Simple conversion functions:"
{ $subsection name>encoding }
{ $subsection encoding>name }
"To let a new encoding be used with the above words, use the following:"
{ $subsection register-encoding }
"Exceptions when encodings or names are not found:"
{ $subsection missing-encoding }
{ $subsection missing-name } ;
HELP: missing-encoding
{ $error-description "The error called from " { $link name>encoding } " when there is no encoding descriptor registered corresponding to the given name." } ;
HELP: missing-name
{ $error-description "The error called from " { $link encoding>name } " when there is no name registered corresponding to the given encoding." } ;
HELP: name>encoding HELP: name>encoding
{ $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } } { $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } }
{ "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ; { $description "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;
HELP: encoding>name HELP: encoding>name
{ $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } } { $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } }
{ "Given an encoding descriptor, return the preferred IANA name." } ; { $description "Given an encoding descriptor, return the preferred IANA name." } ;
{ name>encoding encoding>name } related-words { name>encoding encoding>name } related-words
HELP: register-encoding
{ $values { "descriptor" "an encoding descriptor" } { "name" string } }
{ $description "Registers an encoding descriptor with the given name, available for lookup through " { $link name>encoding } " and " { $link encoding>name } ". IANA-registered aliases are automatically included. The name given must be the first name in the " { $snippet "resources:basis/io/encodings/iana/character-sets" } " file." } ;

View File

@ -1,5 +1,28 @@
USING: io.encodings.iana io.encodings.ascii tools.test ; USING: io.encodings.iana io.encodings.iana.private
io.encodings.utf8 tools.test assocs namespaces ;
IN: io.encodings.iana.tests
[ ascii ] [ "US-ASCII" name>encoding ] unit-test [ utf8 ] [ "UTF-8" name>encoding ] unit-test
[ ascii ] [ "ASCII" name>encoding ] unit-test [ utf8 ] [ "utf8" name>encoding ] unit-test
[ "US-ASCII" ] [ ascii encoding>name ] unit-test [ "UTF-8" ] [ utf8 encoding>name ] unit-test
! We will never implement EBCDIC-FI-SE-A
SINGLETON: ebcdic-fisea
ebcdic-fisea "EBCDIC-FI-SE-A" register-encoding
[ ebcdic-fisea ] [ "EBCDIC-FI-SE-A" name>encoding ] unit-test
[ ebcdic-fisea ] [ "csEBCDICFISEA" name>encoding ] unit-test
[ "EBCDIC-FI-SE-A" ] [ ebcdic-fisea encoding>name ] unit-test
! Clean up after myself
[ ] [
"EBCDIC-FI-SE-A" n>e-table get delete-at
"csEBCDICFISEA" n>e-table get delete-at
ebcdic-fisea e>n-table get delete-at
] unit-test
[ "EBCDIC-FI-SE-A" name>encoding ] must-fail
[ "csEBCDICFISEA" name>encoding ] must-fail
[ ebcdic-fisea encoding>name ] must-fail
[ ebcdic-fisea "foobar" register-encoding ] must-fail
[ "foobar" name>encoding ] must-fail
[ ebcdic-fisea encoding>name ] must-fail

View File

@ -1,37 +1,24 @@
! Copyright (C) 2008 Daniel Ehrenberg ! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel strings values io.files assocs USING: kernel strings values io.files assocs
splitting sequences io namespaces sets io.encodings.8-bit splitting sequences io namespaces sets io.encodings.utf8 ;
io.encodings.ascii io.encodings.utf8 io.encodings.utf16 ;
IN: io.encodings.iana IN: io.encodings.iana
<PRIVATE <PRIVATE
VALUE: n>e-table SYMBOL: n>e-table
SYMBOL: e>n-table
: e>n-table H{ SYMBOL: aliases
{ ascii "US-ASCII" }
{ utf8 "UTF-8" }
{ utf16 "UTF-16" }
{ utf16be "UTF-16BE" }
{ utf16le "UTF-16LE" }
{ latin1 "ISO-8859-1" }
{ latin2 "ISO-8859-2" }
{ latin3 "ISO-8859-3" }
{ latin4 "ISO-8859-4" }
{ latin/cyrillic "ISO-8859-5" }
{ latin/arabic "ISO-8859-6" }
{ latin/greek "ISO-8859-7" }
{ latin/hebrew "ISO-8859-8" }
{ latin5 "ISO-8859-9" }
{ latin6 "ISO-8859-10" }
} ;
PRIVATE> PRIVATE>
ERROR: missing-encoding name ;
: name>encoding ( name -- encoding ) : name>encoding ( name -- encoding )
n>e-table at ; dup n>e-table get-global at [ ] [ missing-encoding ] ?if ;
ERROR: missing-name encoding ;
: encoding>name ( encoding -- name ) : encoding>name ( encoding -- name )
e>n-table at ; dup e>n-table get-global at [ ] [ missing-name ] ?if ;
<PRIVATE <PRIVATE
: parse-iana ( stream -- synonym-set ) : parse-iana ( stream -- synonym-set )
@ -39,24 +26,33 @@ PRIVATE>
[ " " split ] map [ " " split ] map
[ first { "Name:" "Alias:" } member? ] filter [ first { "Name:" "Alias:" } member? ] filter
[ second ] map { "None" } diff [ second ] map { "None" } diff
] map ; ] map harvest ;
: more-aliases ( -- assoc ) : make-aliases ( stream -- n>e )
parse-iana [ [ first ] [ ] bi ] H{ } map>assoc ;
: initial-n>e ( -- assoc )
H{ H{
{ "UTF8" utf8 } { "UTF8" utf8 }
{ "utf8" utf8 } { "utf8" utf8 }
{ "utf-8" utf8 } { "utf-8" utf8 }
} ; { "UTF-8" utf8 }
} clone ;
: initial-e>n ( -- assoc )
H{ { utf8 "UTF-8" } } clone ;
: make-n>e ( stream -- n>e )
parse-iana [ [
dup [
e>n-table value-at
[ swap [ set ] with each ]
[ drop ] if*
] with each
] each ] H{ } make-assoc more-aliases assoc-union ;
PRIVATE> PRIVATE>
"resource:basis/io/encodings/iana/character-sets" "resource:basis/io/encodings/iana/character-sets"
ascii <file-reader> make-n>e to: n>e-table utf8 <file-reader> make-aliases aliases set-global
n>e-table global [ initial-n>e or ] change-at
e>n-table global [ initial-e>n or ] change-at
: register-encoding ( descriptor name -- )
[
aliases get at [
[ n>e-table get-global set-at ] with each
] [ "Bad encoding registration" throw ] if*
] [ swap e>n-table get-global set-at ] 2bi ;

View File

@ -3,19 +3,33 @@
USING: sequences kernel io io.files combinators.short-circuit USING: sequences kernel io io.files combinators.short-circuit
math.order values assocs io.encodings io.binary fry strings math.order values assocs io.encodings io.binary fry strings
math io.encodings.ascii arrays accessors splitting math.parser math io.encodings.ascii arrays accessors splitting math.parser
biassocs ; biassocs io.encodings.iana ;
IN: io.encodings.japanese IN: io.encodings.japanese
VALUE: shift-jis SINGLETON: shift-jis
VALUE: windows-31j shift-jis "Shift_JIS" register-encoding
SINGLETON: windows-31j
windows-31j "Windows-31J" register-encoding
<PRIVATE <PRIVATE
VALUE: shift-jis-table
M: shift-jis <encoder> drop shift-jis-table <encoder> ;
M: shift-jis <decoder> drop shift-jis-table <decoder> ;
VALUE: windows-31j-table
M: windows-31j <encoder> drop windows-31j-table <encoder> ;
M: windows-31j <decoder> drop windows-31j-table <decoder> ;
TUPLE: jis assoc ; TUPLE: jis assoc ;
: <jis> ( assoc -- jis ) : <jis> ( assoc -- jis )
[ nip ] assoc-filter H{ } assoc-like [ nip ] assoc-filter
>biassoc jis boa ; >biassoc jis boa ;
: ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ; : ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
@ -31,10 +45,10 @@ TUPLE: jis assoc ;
ascii file-lines process-jis <jis> ; ascii file-lines process-jis <jis> ;
"resource:basis/io/encodings/japanese/CP932.txt" "resource:basis/io/encodings/japanese/CP932.txt"
make-jis to: windows-31j make-jis to: windows-31j-table
"resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt" "resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt"
make-jis to: shift-jis make-jis to: shift-jis-table
: small? ( char -- ? ) : small? ( char -- ? )
! ASCII range or single-byte halfwidth katakana ! ASCII range or single-byte halfwidth katakana

View File

@ -1,15 +1,21 @@
! Copyright (C) 2006, 2008 Daniel Ehrenberg. ! Copyright (C) 2006, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces io.binary USING: math kernel sequences sbufs vectors namespaces io.binary
io.encodings combinators splitting io byte-arrays ; io.encodings combinators splitting io byte-arrays io.encodings.iana ;
IN: io.encodings.utf16 IN: io.encodings.utf16
SINGLETON: utf16be SINGLETON: utf16be
utf16be "UTF-16BE" register-encoding
SINGLETON: utf16le SINGLETON: utf16le
utf16le "UTF-16LE" register-encoding
SINGLETON: utf16 SINGLETON: utf16
utf16 "UTF-16" register-encoding
ERROR: missing-bom ; ERROR: missing-bom ;
<PRIVATE <PRIVATE

View File

@ -1,15 +1,21 @@
! Copyright (C) 2009 Daniel Ehrenberg. ! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math kernel io.encodings combinators io io.encodings.utf16 USING: math kernel io.encodings combinators io io.encodings.utf16
sequences io.binary ; sequences io.binary io.encodings.iana ;
IN: io.encodings.utf32 IN: io.encodings.utf32
SINGLETON: utf32be SINGLETON: utf32be
utf32be "UTF-32BE" register-encoding
SINGLETON: utf32le SINGLETON: utf32le
utf32le "UTF-32LE" register-encoding
SINGLETON: utf32 SINGLETON: utf32
utf32 "UTF-32" register-encoding
<PRIVATE <PRIVATE
! Decoding ! Decoding

View File

@ -35,7 +35,7 @@ M: too-many-arguments summary
drop "There must be no more than 4 input and 4 output arguments" ; drop "There must be no more than 4 input and 4 output arguments" ;
: check-memoized ( word -- ) : check-memoized ( word -- )
dup #in 4 > swap #out 4 > or [ too-many-arguments ] when ; [ #in ] [ #out ] bi [ 4 > ] either? [ too-many-arguments ] when ;
: define-memoized ( word quot -- ) : define-memoized ( word quot -- )
over check-memoized over check-memoized

View File

@ -70,7 +70,7 @@ IN: stack-checker.transforms
[ [
[ no-case ] [ no-case ]
] [ ] [
dup peek quotation? [ dup peek callable? [
dup peek swap but-last dup peek swap but-last
] [ ] [
[ no-case ] swap [ no-case ] swap

View File

@ -1 +1,2 @@
Daniel Ehrenberg Daniel Ehrenberg
Slava Pestov

View File

@ -0,0 +1,41 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup strings math kernel ;
IN: wrap
ABOUT: "wrap"
ARTICLE: "wrap" "Word wrapping"
"The " { $vocab-link "wrap" } " vocabulary implements word wrapping. There is support for simple string wrapping, with the following words:"
{ $subsection wrap-lines }
{ $subsection wrap-string }
{ $subsection wrap-indented-string }
"Additionally, the vocabulary provides capabilities to wrap arbitrary groups of things, in units called words."
{ $subsection wrap }
{ $subsection word }
{ $subsection <word> } ;
HELP: wrap-lines
{ $values { "lines" string } { "width" integer } { "newlines" "sequence of strings" } }
{ $description "Given a string, divides it into a sequence of lines where each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
HELP: wrap-string
{ $values { "string" string } { "width" integer } { "newstring" string } }
{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
HELP: wrap-indented-string
{ $values { "string" string } { "width" integer } { "indent" string } { "newstring" string } }
{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space. Before each line, the indent string is added." } ;
HELP: wrap
{ $values { "words" { "a sequence of " { $instance word } "s" } } { "width" integer } { "lines" "a sequence of sequences of words" } }
{ $description "Divides the words into lines, where the sum of the lengths of the words on a line (not counting breaks at the end of the line) is at most the given width. Every line except for the first one starts with a non-break, and every one but the last ends with a break." } ;
HELP: word
{ $class-description "A word, for the purposes of " { $vocab-link "wrap" } ", is a Factor object annotated with a length (in the " { $snippet "width" } " slot) and knowledge about whether it is an allowable position for an optional line break (in the " { $snippet "break?" } " slot). Words can be created with " { $link <word> } "." }
{ $see-also wrap } ;
HELP: <word>
{ $values { "key" object } { "width" integer } { "break?" { { $link t } " or " { $link POSTPONE: f } } } { "word" word } }
{ $description "Creates a " { $link word } " object with the given parameters." }
{ $see-also wrap } ;

View File

@ -1,5 +1,7 @@
IN: wrap.tests ! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test wrap multiline sequences ; USING: tools.test wrap multiline sequences ;
IN: wrap.tests
[ [
{ {
@ -23,6 +25,32 @@ USING: tools.test wrap multiline sequences ;
} 35 wrap [ { } like ] map } 35 wrap [ { } like ] map
] unit-test ] unit-test
[
{
{
T{ word f 1 10 f }
T{ word f 2 10 f }
T{ word f 3 9 t }
T{ word f 3 9 t }
T{ word f 3 9 t }
}
{
T{ word f 4 10 f }
T{ word f 5 10 f }
}
}
] [
{
T{ word f 1 10 f }
T{ word f 2 10 f }
T{ word f 3 9 t }
T{ word f 3 9 t }
T{ word f 3 9 t }
T{ word f 4 10 f }
T{ word f 5 10 f }
} 35 wrap [ { } like ] map
] unit-test
[ [
<" This is a <" This is a
long piece long piece
@ -46,3 +74,9 @@ word wrap.">
<" This is a long piece of text that we wish to word wrap."> 12 <" This is a long piece of text that we wish to word wrap."> 12
" " wrap-indented-string " " wrap-indented-string
] unit-test ] unit-test
[ "this text\nhas lots of\nspaces" ]
[ "this text has lots of spaces" 12 wrap-string ] unit-test
[ "hello\nhow\nare\nyou\ntoday?" ]
[ "hello how are you today?" 3 wrap-string ] unit-test

View File

@ -1,3 +1,5 @@
! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: sequences kernel namespaces make splitting USING: sequences kernel namespaces make splitting
math math.order fry assocs accessors ; math math.order fry assocs accessors ;
IN: wrap IN: wrap
@ -15,12 +17,25 @@ SYMBOL: width
: break-here? ( column word -- ? ) : break-here? ( column word -- ? )
break?>> not [ width get > ] [ drop f ] if ; break?>> not [ width get > ] [ drop f ] if ;
: walk ( n words -- n )
! If on a break, take the rest of the breaks
! If not on a break, go back until you hit a break
2dup bounds-check? [
2dup nth break?>>
[ [ break?>> not ] find-from drop ]
[ [ break?>> ] find-last-from drop 1+ ] if
] [ drop ] if ;
: find-optimal-break ( words -- n ) : find-optimal-break ( words -- n )
[ 0 ] dip [ [ width>> + dup ] keep break-here? ] find drop nip ; [ 0 ] keep
[ [ width>> + dup ] keep break-here? ] find drop nip
[ 1 max swap walk ] [ drop f ] if* ;
: (wrap) ( words -- ) : (wrap) ( words -- )
[
dup find-optimal-break dup find-optimal-break
[ 1 max cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if* ; [ cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if*
] unless-empty ;
: intersperse ( seq elt -- seq' ) : intersperse ( seq elt -- seq' )
[ '[ _ , ] [ , ] interleave ] { } make ; [ '[ _ , ] [ , ] interleave ] { } make ;
@ -34,9 +49,7 @@ SYMBOL: width
: join-words ( wrapped-lines -- lines ) : join-words ( wrapped-lines -- lines )
[ [
[ break?>> ] [ break?>> ] trim-slice
[ trim-head-slice ]
[ trim-tail-slice ] bi
[ key>> ] map concat [ key>> ] map concat
] map ; ] map ;

View File

@ -1,4 +1,5 @@
USING: xml xml.data xml.utilities tools.test accessors kernel ; USING: xml xml.data xml.utilities tools.test accessors kernel
io.encodings.8-bit ;
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test [ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test [ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test

View File

@ -1,6 +1,6 @@
USING: alien strings kernel math tools.test io prettyprint USING: alien strings kernel math tools.test io prettyprint
namespaces combinators words classes sequences accessors namespaces combinators words classes sequences accessors
math.functions ; math.functions arrays ;
IN: combinators.tests IN: combinators.tests
! Compiled ! Compiled
@ -314,3 +314,13 @@ IN: combinators.tests
\ test-case-7 must-infer \ test-case-7 must-infer
[ "plus" ] [ \ + test-case-7 ] unit-test [ "plus" ] [ \ + test-case-7 ] unit-test
! Some corner cases (no pun intended)
DEFER: corner-case-1
<< \ corner-case-1 2 [ + ] curry 1array [ case ] curry (( a -- b )) define-declared >>
[ t ] [ \ corner-case-1 optimized>> ] unit-test
[ 4 ] [ 2 corner-case-1 ] unit-test
[ 4 ] [ 2 2 [ + ] curry 1array case ] unit-test

View File

@ -59,13 +59,13 @@ ERROR: no-case ;
] [ ] [
dup wrapper? [ wrapped>> ] when dup wrapper? [ wrapped>> ] when
] if = ] if =
] [ quotation? ] if ] [ callable? ] if
] find nip ; ] find nip ;
: case ( obj assoc -- ) : case ( obj assoc -- )
case-find { case-find {
{ [ dup array? ] [ nip second call ] } { [ dup array? ] [ nip second call ] }
{ [ dup quotation? ] [ call ] } { [ dup callable? ] [ call ] }
{ [ dup not ] [ no-case ] } { [ dup not ] [ no-case ] }
} cond ; } cond ;

View File

@ -246,8 +246,8 @@ HELP: retry
{ $description "Tries the quotation up to " { $snippet "n" } " times until it returns true. Retries the quotation if an exception is thrown or if the quotation returns " { $link f } ". The quotation is expected to have side effects that may fail, such as generating a random name for a new file until successful." } { $description "Tries the quotation up to " { $snippet "n" } " times until it returns true. Retries the quotation if an exception is thrown or if the quotation returns " { $link f } ". The quotation is expected to have side effects that may fail, such as generating a random name for a new file until successful." }
{ $examples { $examples
"Try to get a 0 as a random number:" "Try to get a 0 as a random number:"
{ $unchecked-example "USING: continuations math prettyprint ;" { $unchecked-example "USING: continuations math prettyprint random ;"
"[ 5 random 0 = ] 5 retry t" "[ 5 random 0 = ] 5 retry"
"t" "t"
} }
} ; } ;

View File

@ -53,7 +53,6 @@ IN: reports.noise
{ nipd 3 } { nipd 3 }
{ nkeep 5 } { nkeep 5 }
{ npick 6 } { npick 6 }
{ nrev 5 }
{ nrot 5 } { nrot 5 }
{ nslip 5 } { nslip 5 }
{ ntuck 6 } { ntuck 6 }