Merge branch 'master' of git://factorcode.org/git/factor

db4
John Benediktsson 2009-02-04 07:42:46 -08:00
commit 7282010cec
29 changed files with 342 additions and 145 deletions

View File

@ -3,17 +3,11 @@
USING: accessors kernel combinators math namespaces make assocs
sequences splitting sorting sets strings vectors hashtables
quotations arrays byte-arrays math.parser calendar
calendar.format present urls
calendar.format present urls fry
io io.encodings io.encodings.iana io.encodings.binary
io.encodings.8-bit io.crlf
unicode.case unicode.categories
http.parsers ;
EXCLUDE: fry => , ;
IN: http
: (read-header) ( -- alist )
@ -217,5 +211,7 @@ TUPLE: post-data data params content-type content-encoding ;
" " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ;
: parse-content-type ( content-type -- type encoding )
";" split1 parse-content-type-attributes "charset" swap at
name>encoding over "text/" head? latin1 binary ? or ;
";" split1
parse-content-type-attributes "charset" swap at
[ name>encoding ]
[ dup "text/" head? latin1 binary ? ] if* ;

View File

@ -1,6 +1,21 @@
USING: http http.server math sequences continuations tools.test ;
USING: http http.server math sequences continuations tools.test
io.encodings.utf8 io.encodings.binary accessors ;
IN: http.server.tests
[ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test
\ make-http-error must-infer
[ "text/plain; charset=UTF-8" ] [
<response>
"text/plain" >>content-type
utf8 >>content-charset
unparse-content-type
] unit-test
[ "text/xml" ] [
<response>
"text/xml" >>content-type
binary >>content-charset
unparse-content-type
] unit-test

View File

@ -97,10 +97,8 @@ GENERIC: write-full-response ( request response -- )
tri ;
: unparse-content-type ( request -- content-type )
[ content-type>> "application/octet-stream" or ]
[ content-charset>> encoding>name ]
bi
[ "; charset=" glue ] when* ;
[ content-type>> "application/octet-stream" or ] [ content-charset>> ] bi
dup binary eq? [ drop ] [ encoding>name "; charset=" glue ] if ;
: ensure-domain ( cookie -- cookie )
[

View File

@ -3,31 +3,33 @@
USING: math.parser arrays io.encodings sequences kernel assocs
hashtables io.encodings.ascii generic parser classes.tuple words
words.symbol io io.files splitting namespaces math
compiler.units accessors ;
compiler.units accessors classes.singleton classes.mixin
io.encodings.iana ;
IN: io.encodings.8-bit
<PRIVATE
: mappings {
{ "latin1" "8859-1" }
{ "latin2" "8859-2" }
{ "latin3" "8859-3" }
{ "latin4" "8859-4" }
{ "latin/cyrillic" "8859-5" }
{ "latin/arabic" "8859-6" }
{ "latin/greek" "8859-7" }
{ "latin/hebrew" "8859-8" }
{ "latin5" "8859-9" }
{ "latin6" "8859-10" }
{ "latin/thai" "8859-11" }
{ "latin7" "8859-13" }
{ "latin8" "8859-14" }
{ "latin9" "8859-15" }
{ "latin10" "8859-16" }
{ "koi8-r" "KOI8-R" }
{ "windows-1252" "CP1252" }
{ "ebcdic" "CP037" }
{ "mac-roman" "ROMAN" }
! encoding-name iana-name file-name
{ "latin1" "ISO_8859-1:1987" "8859-1" }
{ "latin2" "ISO_8859-2:1987" "8859-2" }
{ "latin3" "ISO_8859-3:1988" "8859-3" }
{ "latin4" "ISO_8859-4:1988" "8859-4" }
{ "latin/cyrillic" "ISO_8859-5:1988" "8859-5" }
{ "latin/arabic" "ISO_8859-6:1987" "8859-6" }
{ "latin/greek" "ISO_8859-7:1987" "8859-7" }
{ "latin/hebrew" "ISO_8859-8:1988" "8859-8" }
{ "latin5" "ISO_8859-9:1989" "8859-9" }
{ "latin6" "ISO-8859-10" "8859-10" }
{ "latin/thai" "TIS-620" "8859-11" }
{ "latin7" "ISO-8859-13" "8859-13" }
{ "latin8" "ISO-8859-14" "8859-14" }
{ "latin9" "ISO-8859-15" "8859-15" }
{ "latin10" "ISO-8859-16" "8859-16" }
{ "koi8-r" "KOI8-R" "KOI8-R" }
{ "windows-1252" "windows-1252" "CP1252" }
{ "ebcdic" "IBM037" "CP037" }
{ "mac-roman" "macintosh" "ROMAN" }
} ;
: encoding-file ( file-name -- stream )
@ -65,8 +67,7 @@ M: 8-bit encode-char encode>> encode-8-bit ;
M: 8-bit decode-char decode>> decode-8-bit ;
PREDICATE: 8-bit-encoding < word
8-bit-encodings get-global key? ;
MIXIN: 8-bit-encoding
M: 8-bit-encoding <encoder>
8-bit-encodings get-global at <encoder> ;
@ -74,15 +75,21 @@ M: 8-bit-encoding <encoder>
M: 8-bit-encoding <decoder>
8-bit-encodings get-global at <decoder> ;
: create-encoding ( name -- word )
"io.encodings.8-bit" create
[ define-singleton-class ]
[ 8-bit-encoding add-mixin-instance ]
[ ] tri ;
PRIVATE>
[
mappings [
[ "io.encodings.8-bit" create ]
first3
[ create-encoding ]
[ dupd register-encoding ]
[ encoding-file parse-file 8-bit boa ]
bi*
] assoc-map
[ keys [ define-symbol ] each ]
[ 8-bit-encodings set-global ]
bi
tri*
] H{ } map>assoc
8-bit-encodings set-global
] with-compilation-unit

View File

@ -1,6 +1,6 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: io io.encodings kernel math io.encodings.private ;
USING: io io.encodings kernel math io.encodings.private io.encodings.iana ;
IN: io.encodings.ascii
<PRIVATE
@ -20,3 +20,5 @@ M: ascii encode-char
M: ascii decode-char
128 decode-if< ;
ascii "ANSI_X3.4-1968" register-encoding

View File

@ -1,13 +1,15 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: xml xml.data kernel io io.encodings interval-maps splitting fry
math.parser sequences combinators assocs locals accessors math
arrays values io.encodings.ascii ascii io.files biassocs math.order
combinators.short-circuit io.binary ;
math.parser sequences combinators assocs locals accessors math arrays
byte-arrays values io.encodings.ascii ascii io.files biassocs
math.order combinators.short-circuit io.binary io.encodings.iana ;
IN: io.encodings.chinese
SINGLETON: gb18030
gb18030 "GB18030" register-encoding
<PRIVATE
! GB to mean GB18030 is a terrible abuse of notation
@ -15,6 +17,14 @@ SINGLETON: gb18030
! Resource file from:
! http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
! Algorithms from:
! http://www-128.ibm.com/developerworks/library/u-china.html
: linear ( bytes -- num )
! This hard-codes bMin and bMax
reverse first4
10 * + 126 * + 10 * + ; foldable
TUPLE: range ufirst ulast bfirst blast ;
: b>byte-array ( string -- byte-array )
@ -25,8 +35,8 @@ TUPLE: range ufirst ulast bfirst blast ;
{
[ "uFirst" attr hex> ]
[ "uLast" attr hex> ]
[ "bFirst" attr b>byte-array ]
[ "bLast" attr b>byte-array ]
[ "bFirst" attr b>byte-array linear ]
[ "bLast" attr b>byte-array linear ]
} cleave range boa
] dip push ;
@ -49,21 +59,13 @@ TUPLE: range ufirst ulast bfirst blast ;
] each-element mapping ranges
] ;
! Algorithms from:
! http://www-128.ibm.com/developerworks/library/u-china.html
: linear ( bytes -- num )
! This hard-codes bMin and bMax
reverse first4
10 * + 126 * + 10 * + ;
: unlinear ( num -- bytes )
B{ HEX: 81 HEX: 30 HEX: 81 HEX: 30 } linear -
10 /mod swap [ HEX: 30 + ] dip
126 /mod swap [ HEX: 81 + ] dip
10 /mod swap [ HEX: 30 + ] dip
10 /mod HEX: 30 + swap
126 /mod HEX: 81 + swap
10 /mod HEX: 30 + swap
HEX: 81 +
B{ } 4sequence reverse ;
4byte-array dup reverse-here ;
: >interval-map-by ( start-quot end-quot value-quot seq -- interval-map )
'[ _ [ @ 2array ] _ tri ] { } map>assoc <interval-map> ; inline
@ -72,7 +74,7 @@ TUPLE: range ufirst ulast bfirst blast ;
[ ufirst>> ] [ ulast>> ] [ ] >interval-map-by ;
: ranges-gb>u ( ranges -- interval-map )
[ bfirst>> linear ] [ blast>> linear ] [ ] >interval-map-by ;
[ bfirst>> ] [ blast>> ] [ ] >interval-map-by ;
VALUE: gb>u
VALUE: u>gb
@ -85,7 +87,7 @@ ascii <file-reader> xml>gb-data
: lookup-range ( char -- byte-array )
dup u>gb interval-at [
[ ufirst>> - ] [ bfirst>> linear ] bi + unlinear
[ ufirst>> - ] [ bfirst>> ] bi + unlinear
] [ encode-error ] if* ;
M: gb18030 encode-char ( char stream encoding -- )
@ -107,19 +109,19 @@ M: gb18030 encode-char ( char stream encoding -- )
: decode-quad ( byte-array -- char )
dup mapping value-at [ ] [
linear dup gb>u interval-at [
[ bfirst>> linear - ] [ ufirst>> ] bi +
[ bfirst>> - ] [ ufirst>> ] bi +
] [ drop replacement-char ] if*
] ?if ;
: four-byte ( stream byte1 byte2 -- char )
rot 2 swap stream-read dup last-bytes?
[ first2 B{ } 4sequence decode-quad ]
[ first2 4byte-array decode-quad ]
[ 3drop replacement-char ] if ;
: two-byte ( stream byte -- char )
over stream-read1 {
{ [ dup not ] [ 3drop replacement-char ] }
{ [ dup second-byte? ] [ B{ } 2sequence mapping value-at nip ] }
{ [ dup second-byte? ] [ 2byte-array mapping value-at nip ] }
{ [ dup quad-2/4? ] [ four-byte ] }
[ 3drop replacement-char ]
} cond ;
@ -127,7 +129,7 @@ M: gb18030 encode-char ( char stream encoding -- )
M: gb18030 decode-char ( stream encoding -- char )
drop dup stream-read1 {
{ [ dup not ] [ 2drop f ] }
{ [ dup ascii? ] [ nip 1array B{ } like mapping value-at ] }
{ [ dup ascii? ] [ nip 1byte-array mapping value-at ] }
{ [ dup quad-1/3? ] [ two-byte ] }
[ 2drop replacement-char ]
} cond ;

View File

@ -1,12 +1,35 @@
USING: help.syntax help.markup ;
USING: help.syntax help.markup strings ;
IN: io.encodings.iana
ABOUT: "io.encodings.iana"
ARTICLE: "io.encodings.iana" "IANA-registered encoding names"
"The " { $vocab-link "io.encodings.iana" } " vocabulary provides words for accessing the names of encodings and the encoding descriptors corresponding to names." $nl
"Most text encodings in common use have been registered with IANA. There is a standard set of names for each encoding. Simple conversion functions:"
{ $subsection name>encoding }
{ $subsection encoding>name }
"To let a new encoding be used with the above words, use the following:"
{ $subsection register-encoding }
"Exceptions when encodings or names are not found:"
{ $subsection missing-encoding }
{ $subsection missing-name } ;
HELP: missing-encoding
{ $error-description "The error called from " { $link name>encoding } " when there is no encoding descriptor registered corresponding to the given name." } ;
HELP: missing-name
{ $error-description "The error called from " { $link encoding>name } " when there is no name registered corresponding to the given encoding." } ;
HELP: name>encoding
{ $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } }
{ "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;
{ $description "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;
HELP: encoding>name
{ $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } }
{ "Given an encoding descriptor, return the preferred IANA name." } ;
{ $description "Given an encoding descriptor, return the preferred IANA name." } ;
{ name>encoding encoding>name } related-words
HELP: register-encoding
{ $values { "descriptor" "an encoding descriptor" } { "name" string } }
{ $description "Registers an encoding descriptor with the given name, available for lookup through " { $link name>encoding } " and " { $link encoding>name } ". IANA-registered aliases are automatically included. The name given must be the first name in the " { $snippet "resources:basis/io/encodings/iana/character-sets" } " file." } ;

View File

@ -1,5 +1,28 @@
USING: io.encodings.iana io.encodings.ascii tools.test ;
USING: io.encodings.iana io.encodings.iana.private
io.encodings.utf8 tools.test assocs namespaces ;
IN: io.encodings.iana.tests
[ ascii ] [ "US-ASCII" name>encoding ] unit-test
[ ascii ] [ "ASCII" name>encoding ] unit-test
[ "US-ASCII" ] [ ascii encoding>name ] unit-test
[ utf8 ] [ "UTF-8" name>encoding ] unit-test
[ utf8 ] [ "utf8" name>encoding ] unit-test
[ "UTF-8" ] [ utf8 encoding>name ] unit-test
! We will never implement EBCDIC-FI-SE-A
SINGLETON: ebcdic-fisea
ebcdic-fisea "EBCDIC-FI-SE-A" register-encoding
[ ebcdic-fisea ] [ "EBCDIC-FI-SE-A" name>encoding ] unit-test
[ ebcdic-fisea ] [ "csEBCDICFISEA" name>encoding ] unit-test
[ "EBCDIC-FI-SE-A" ] [ ebcdic-fisea encoding>name ] unit-test
! Clean up after myself
[ ] [
"EBCDIC-FI-SE-A" n>e-table get delete-at
"csEBCDICFISEA" n>e-table get delete-at
ebcdic-fisea e>n-table get delete-at
] unit-test
[ "EBCDIC-FI-SE-A" name>encoding ] must-fail
[ "csEBCDICFISEA" name>encoding ] must-fail
[ ebcdic-fisea encoding>name ] must-fail
[ ebcdic-fisea "foobar" register-encoding ] must-fail
[ "foobar" name>encoding ] must-fail
[ ebcdic-fisea encoding>name ] must-fail

View File

@ -1,41 +1,24 @@
! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel strings values io.files assocs
splitting sequences io namespaces sets io.encodings.8-bit
io.encodings.ascii io.encodings.utf8 io.encodings.utf16
io.encodings.chinese io.encodings.japanese ;
splitting sequences io namespaces sets io.encodings.utf8 ;
IN: io.encodings.iana
<PRIVATE
VALUE: n>e-table
: e>n-table H{
{ ascii "US-ASCII" }
{ utf8 "UTF-8" }
{ utf16 "UTF-16" }
{ utf16be "UTF-16BE" }
{ utf16le "UTF-16LE" }
{ latin1 "ISO-8859-1" }
{ latin2 "ISO-8859-2" }
{ latin3 "ISO-8859-3" }
{ latin4 "ISO-8859-4" }
{ latin/cyrillic "ISO-8859-5" }
{ latin/arabic "ISO-8859-6" }
{ latin/greek "ISO-8859-7" }
{ latin/hebrew "ISO-8859-8" }
{ latin5 "ISO-8859-9" }
{ latin6 "ISO-8859-10" }
{ shift-jis "Shift_JIS" }
{ windows-31j "Windows-31J" }
{ gb18030 "GB18030" }
} ;
SYMBOL: n>e-table
SYMBOL: e>n-table
SYMBOL: aliases
PRIVATE>
ERROR: missing-encoding name ;
: name>encoding ( name -- encoding )
n>e-table at ;
dup n>e-table get-global at [ ] [ missing-encoding ] ?if ;
ERROR: missing-name encoding ;
: encoding>name ( encoding -- name )
e>n-table at ;
dup e>n-table get-global at [ ] [ missing-name ] ?if ;
<PRIVATE
: parse-iana ( stream -- synonym-set )
@ -43,24 +26,33 @@ PRIVATE>
[ " " split ] map
[ first { "Name:" "Alias:" } member? ] filter
[ second ] map { "None" } diff
] map ;
] map harvest ;
: more-aliases ( -- assoc )
: make-aliases ( stream -- n>e )
parse-iana [ [ first ] [ ] bi ] H{ } map>assoc ;
: initial-n>e ( -- assoc )
H{
{ "UTF8" utf8 }
{ "utf8" utf8 }
{ "utf-8" utf8 }
} ;
{ "UTF-8" utf8 }
} clone ;
: initial-e>n ( -- assoc )
H{ { utf8 "UTF-8" } } clone ;
: make-n>e ( stream -- n>e )
parse-iana [ [
dup [
e>n-table value-at
[ swap [ set ] with each ]
[ drop ] if*
] with each
] each ] H{ } make-assoc more-aliases assoc-union ;
PRIVATE>
"resource:basis/io/encodings/iana/character-sets"
ascii <file-reader> make-n>e to: n>e-table
utf8 <file-reader> make-aliases aliases set-global
n>e-table global [ initial-n>e or ] change-at
e>n-table global [ initial-e>n or ] change-at
: register-encoding ( descriptor name -- )
[
aliases get at [
[ n>e-table get-global set-at ] with each
] [ "Bad encoding registration" throw ] if*
] [ swap e>n-table get-global set-at ] 2bi ;

View File

@ -4,7 +4,7 @@ USING: help.markup help.syntax ;
IN: io.encodings.japanese
ARTICLE: "io.encodings.japanese" "Japanese text encodings"
"The " { $vocab-link "io.encodings.japanese" } " vocabulary implements Japanese-specific text encodings. Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
"Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
{ $subsection shift-jis }
{ $subsection windows-31j } ;

View File

@ -1,17 +1,31 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: sequences kernel io io.files combinators.short-circuit
math.order values assocs io.encodings io.binary fry strings
math io.encodings.ascii arrays accessors splitting math.parser
biassocs ;
math.order values assocs io.encodings io.binary fry strings math
io.encodings.ascii arrays byte-arrays accessors splitting
math.parser biassocs io.encodings.iana ;
IN: io.encodings.japanese
VALUE: shift-jis
SINGLETON: shift-jis
VALUE: windows-31j
shift-jis "Shift_JIS" register-encoding
SINGLETON: windows-31j
windows-31j "Windows-31J" register-encoding
<PRIVATE
VALUE: shift-jis-table
M: shift-jis <encoder> drop shift-jis-table <encoder> ;
M: shift-jis <decoder> drop shift-jis-table <decoder> ;
VALUE: windows-31j-table
M: windows-31j <encoder> drop windows-31j-table <encoder> ;
M: windows-31j <decoder> drop windows-31j-table <decoder> ;
TUPLE: jis assoc ;
: <jis> ( assoc -- jis )
@ -31,17 +45,17 @@ TUPLE: jis assoc ;
ascii file-lines process-jis <jis> ;
"resource:basis/io/encodings/japanese/CP932.txt"
make-jis to: windows-31j
make-jis to: windows-31j-table
"resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt"
make-jis to: shift-jis
make-jis to: shift-jis-table
: small? ( char -- ? )
! ASCII range or single-byte halfwidth katakana
{ [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;
: write-halfword ( stream halfword -- )
h>b/b swap B{ } 2sequence swap stream-write ;
h>b/b swap 2byte-array swap stream-write ;
M: jis encode-char
swapd ch>jis

View File

@ -1,15 +1,21 @@
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
! Copyright (C) 2006, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces io.binary
io.encodings combinators splitting io byte-arrays ;
io.encodings combinators splitting io byte-arrays io.encodings.iana ;
IN: io.encodings.utf16
SINGLETON: utf16be
utf16be "UTF-16BE" register-encoding
SINGLETON: utf16le
utf16le "UTF-16LE" register-encoding
SINGLETON: utf16
utf16 "UTF-16" register-encoding
ERROR: missing-bom ;
<PRIVATE

View File

@ -1,15 +1,21 @@
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: math kernel io.encodings combinators io io.encodings.utf16
sequences io.binary ;
sequences io.binary io.encodings.iana ;
IN: io.encodings.utf32
SINGLETON: utf32be
utf32be "UTF-32BE" register-encoding
SINGLETON: utf32le
utf32le "UTF-32LE" register-encoding
SINGLETON: utf32
utf32 "UTF-32" register-encoding
<PRIVATE
! Decoding

View File

@ -70,7 +70,7 @@ IN: stack-checker.transforms
[
[ no-case ]
] [
dup peek quotation? [
dup peek callable? [
dup peek swap but-last
] [
[ no-case ] swap

View File

@ -1 +1,2 @@
Daniel Ehrenberg
Slava Pestov

View File

@ -0,0 +1,41 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup strings math kernel ;
IN: wrap
ABOUT: "wrap"
ARTICLE: "wrap" "Word wrapping"
"The " { $vocab-link "wrap" } " vocabulary implements word wrapping. There is support for simple string wrapping, with the following words:"
{ $subsection wrap-lines }
{ $subsection wrap-string }
{ $subsection wrap-indented-string }
"Additionally, the vocabulary provides capabilities to wrap arbitrary groups of things, in units called words."
{ $subsection wrap }
{ $subsection word }
{ $subsection <word> } ;
HELP: wrap-lines
{ $values { "lines" string } { "width" integer } { "newlines" "sequence of strings" } }
{ $description "Given a string, divides it into a sequence of lines where each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
HELP: wrap-string
{ $values { "string" string } { "width" integer } { "newstring" string } }
{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
HELP: wrap-indented-string
{ $values { "string" string } { "width" integer } { "indent" string } { "newstring" string } }
{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space. Before each line, the indent string is added." } ;
HELP: wrap
{ $values { "words" { "a sequence of " { $instance word } "s" } } { "width" integer } { "lines" "a sequence of sequences of words" } }
{ $description "Divides the words into lines, where the sum of the lengths of the words on a line (not counting breaks at the end of the line) is at most the given width. Every line except for the first one starts with a non-break, and every one but the last ends with a break." } ;
HELP: word
{ $class-description "A word, for the purposes of " { $vocab-link "wrap" } ", is a Factor object annotated with a length (in the " { $snippet "width" } " slot) and knowledge about whether it is an allowable position for an optional line break (in the " { $snippet "break?" } " slot). Words can be created with " { $link <word> } "." }
{ $see-also wrap } ;
HELP: <word>
{ $values { "key" object } { "width" integer } { "break?" { { $link t } " or " { $link POSTPONE: f } } } { "word" word } }
{ $description "Creates a " { $link word } " object with the given parameters." }
{ $see-also wrap } ;

View File

@ -1,5 +1,7 @@
IN: wrap.tests
! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test wrap multiline sequences ;
IN: wrap.tests
[
{
@ -23,6 +25,32 @@ USING: tools.test wrap multiline sequences ;
} 35 wrap [ { } like ] map
] unit-test
[
{
{
T{ word f 1 10 f }
T{ word f 2 10 f }
T{ word f 3 9 t }
T{ word f 3 9 t }
T{ word f 3 9 t }
}
{
T{ word f 4 10 f }
T{ word f 5 10 f }
}
}
] [
{
T{ word f 1 10 f }
T{ word f 2 10 f }
T{ word f 3 9 t }
T{ word f 3 9 t }
T{ word f 3 9 t }
T{ word f 4 10 f }
T{ word f 5 10 f }
} 35 wrap [ { } like ] map
] unit-test
[
<" This is a
long piece
@ -45,4 +73,10 @@ word wrap.">
] [
<" This is a long piece of text that we wish to word wrap."> 12
" " wrap-indented-string
] unit-test
] unit-test
[ "this text\nhas lots of\nspaces" ]
[ "this text has lots of spaces" 12 wrap-string ] unit-test
[ "hello\nhow\nare\nyou\ntoday?" ]
[ "hello how are you today?" 3 wrap-string ] unit-test

View File

@ -1,3 +1,5 @@
! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: sequences kernel namespaces make splitting
math math.order fry assocs accessors ;
IN: wrap
@ -15,12 +17,25 @@ SYMBOL: width
: break-here? ( column word -- ? )
break?>> not [ width get > ] [ drop f ] if ;
: walk ( n words -- n )
! If on a break, take the rest of the breaks
! If not on a break, go back until you hit a break
2dup bounds-check? [
2dup nth break?>>
[ [ break?>> not ] find-from drop ]
[ [ break?>> ] find-last-from drop 1+ ] if
] [ drop ] if ;
: find-optimal-break ( words -- n )
[ 0 ] dip [ [ width>> + dup ] keep break-here? ] find drop nip ;
[ 0 ] keep
[ [ width>> + dup ] keep break-here? ] find drop nip
[ 1 max swap walk ] [ drop f ] if* ;
: (wrap) ( words -- )
dup find-optimal-break
[ 1 max cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if* ;
[
dup find-optimal-break
[ cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if*
] unless-empty ;
: intersperse ( seq elt -- seq' )
[ '[ _ , ] [ , ] interleave ] { } make ;
@ -34,9 +49,7 @@ SYMBOL: width
: join-words ( wrapped-lines -- lines )
[
[ break?>> ]
[ trim-head-slice ]
[ trim-tail-slice ] bi
[ break?>> ] trim-slice
[ key>> ] map concat
] map ;

View File

@ -1,4 +1,5 @@
USING: xml xml.data xml.utilities tools.test accessors kernel ;
USING: xml xml.data xml.utilities tools.test accessors kernel
io.encodings.8-bit ;
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
@ -11,4 +12,4 @@ USING: xml xml.data xml.utilities tools.test accessors kernel ;
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test

View File

@ -9,3 +9,5 @@ USING: tools.test byte-arrays sequences kernel ;
[ B{ 1 2 } ] [ 2 B{ 1 2 3 4 5 6 7 8 9 } resize-byte-array ] unit-test
[ -10 B{ } resize-byte-array ] must-fail
[ B{ 123 } ] [ 123 1byte-array ] unit-test

View File

@ -1,4 +1,4 @@
! Copyright (C) 2007, 2008 Slava Pestov.
! Copyright (C) 2007, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel kernel.private alien.accessors sequences
sequences.private math ;
@ -19,7 +19,7 @@ M: byte-array resize
INSTANCE: byte-array sequence
: 1byte-array ( x -- byte-array ) 1 <byte-array> [ set-first ] keep ; inline
: 1byte-array ( x -- byte-array ) B{ } 1sequence ; inline
: 2byte-array ( x y -- byte-array ) B{ } 2sequence ; inline

View File

@ -1,6 +1,6 @@
USING: alien strings kernel math tools.test io prettyprint
namespaces combinators words classes sequences accessors
math.functions ;
math.functions arrays ;
IN: combinators.tests
! Compiled
@ -314,3 +314,13 @@ IN: combinators.tests
\ test-case-7 must-infer
[ "plus" ] [ \ + test-case-7 ] unit-test
! Some corner cases (no pun intended)
DEFER: corner-case-1
<< \ corner-case-1 2 [ + ] curry 1array [ case ] curry (( a -- b )) define-declared >>
[ t ] [ \ corner-case-1 optimized>> ] unit-test
[ 4 ] [ 2 corner-case-1 ] unit-test
[ 4 ] [ 2 2 [ + ] curry 1array case ] unit-test

View File

@ -59,13 +59,13 @@ ERROR: no-case ;
] [
dup wrapper? [ wrapped>> ] when
] if =
] [ quotation? ] if
] [ callable? ] if
] find nip ;
: case ( obj assoc -- )
case-find {
{ [ dup array? ] [ nip second call ] }
{ [ dup quotation? ] [ call ] }
{ [ dup callable? ] [ call ] }
{ [ dup not ] [ no-case ] }
} cond ;

View File

@ -246,8 +246,8 @@ HELP: retry
{ $description "Tries the quotation up to " { $snippet "n" } " times until it returns true. Retries the quotation if an exception is thrown or if the quotation returns " { $link f } ". The quotation is expected to have side effects that may fail, such as generating a random name for a new file until successful." }
{ $examples
"Try to get a 0 as a random number:"
{ $unchecked-example "USING: continuations math prettyprint ;"
"[ 5 random 0 = ] 5 retry t"
{ $unchecked-example "USING: continuations math prettyprint random ;"
"[ 5 random 0 = ] 5 retry"
"t"
}
} ;

View File

@ -207,6 +207,10 @@ HELP: first4-unsafe
{ $values { "seq" sequence } { "first" "the first element" } { "second" "the second element" } { "third" "the third element" } { "fourth" "the fourth element" } }
{ $contract "Unsafe variant of " { $link first4 } " that does not perform bounds checks." } ;
HELP: 1sequence
{ $values { "obj" object } { "exemplar" sequence } { "seq" sequence } }
{ $description "Creates a one-element sequence of the same type as " { $snippet "exemplar" } "." } ;
HELP: 2sequence
{ $values { "obj1" object } { "obj2" object } { "exemplar" sequence } { "seq" sequence } }
{ $description "Creates a two-element sequence of the same type as " { $snippet "exemplar" } "." } ;

View File

@ -137,9 +137,12 @@ INSTANCE: iota immutable-sequence
: from-end ( seq n -- seq n' ) [ dup length ] dip - ; inline
: (1sequence) ( obj seq -- seq )
[ 0 swap set-nth-unsafe ] keep ; inline
: (2sequence) ( obj1 obj2 seq -- seq )
[ 1 swap set-nth-unsafe ] keep
[ 0 swap set-nth-unsafe ] keep ; inline
(1sequence) ; inline
: (3sequence) ( obj1 obj2 obj3 seq -- seq )
[ 2 swap set-nth-unsafe ] keep
@ -151,6 +154,9 @@ INSTANCE: iota immutable-sequence
PRIVATE>
: 1sequence ( obj exemplar -- seq )
1 swap [ (1sequence) ] new-like ; inline
: 2sequence ( obj1 obj2 exemplar -- seq )
2 swap [ (2sequence) ] new-like ; inline

View File

@ -97,3 +97,5 @@ IN: vectors.tests
[ fixnum ] [ 1 >bignum V{ } new-sequence length class ] unit-test
[ fixnum ] [ 1 >bignum [ ] V{ } map-as length class ] unit-test
[ V{ "lulz" } ] [ "lulz" 1vector ] unit-test

View File

@ -40,7 +40,7 @@ M: sequence new-resizable drop <vector> ;
INSTANCE: vector growable
: 1vector ( x -- vector ) 1array >vector ;
: 1vector ( x -- vector ) V{ } 1sequence ;
: ?push ( elt seq/f -- seq )
[ 1 <vector> ] unless* [ push ] keep ;

View File

@ -53,7 +53,6 @@ IN: reports.noise
{ nipd 3 }
{ nkeep 5 }
{ npick 6 }
{ nrev 5 }
{ nrot 5 }
{ nslip 5 }
{ ntuck 6 }