Merge branch 'master' of git://factorcode.org/git/factor

db4
Doug Coleman 2008-03-26 15:56:28 -05:00
commit 92b912bce9
28 changed files with 349 additions and 165 deletions

2
.gitignore vendored
View File

@ -18,4 +18,4 @@ factor
temp
logs
work
buildsupport/wordsize
build-support/wordsize

View File

@ -1,38 +1,38 @@
#!/bin/sh
if [ \( `uname -s ` = FreeBSD \) -a \( `uname -p` = i386 \) ]
then
echo freebsd-x86-32
elif [ \( `uname -s` = FreeBSD \) -a \( `uname -m` = amd64 \) ]
then
echo freebsd-x86-64
elif [ \( `uname -s` = OpenBSD \) -a \( `uname -m` = i386 \) ]
then
echo openbsd-x86-32
elif [ \( `uname -s` = OpenBSD \) -a \( `uname -m` = amd64 \) ]
then
echo openbsd-x86-64
elif [ \( `uname -s` = NetBSD \) -a \( `uname -p` = i386 \) ]
then
echo netbsd-x86-32
elif [ \( `uname -s` = NetBSD \) -a \( `uname -p` = x86_64 \) ]
then
echo netbsd-x86-64
elif [ \( `uname -s` = Darwin \) -a \( `uname -p` = powerpc \) ]
then
echo macosx-ppc
elif [ `uname -s` = Darwin ]
then
echo macosx-x86-`./build-support/wordsize`
elif [ \( `uname -s` = Linux \) -a \( `uname -m` = i686 \) ]
then
echo linux-x86-32
elif [ \( `uname -s` = Linux \) -a \( `uname -m` = x86_64 \) ]
then
echo linux-x86-64
elif [ \( `uname -o` = Cygwin \) -a \( `uname -m` = i686 \) ]
then
echo winnt-x86-`./build-support/wordsize`
else
echo help
uname_s=`uname -s`
case $uname_s in
CYGWIN_NT-5.2-WOW64) OS=winnt;;
*CYGWIN_NT*) OS=winnt;;
*CYGWIN*) OS=winnt;;
*darwin*) OS=macosx;;
*Darwin*) OS=macosx;;
*linux*) OS=linux;;
*Linux*) OS=linux;;
*NetBSD*) OS=netbsd;;
*FreeBSD*) OS=freebsd;;
*OpenBSD*) OS=openbsd;;
*DragonFly*) OS=dragonflybsd;;
esac
uname_m=`uname -m`
case $uname_m in
i386) ARCH=x86;;
i686) ARCH=x86;;
amd64) ARCH=x86;;
*86) ARCH=x86;;
*86_64) ARCH=x86;;
"Power Macintosh") ARCH=ppc;;
esac
WORD=`./build-support/wordsize`
MAKE_TARGET=$OS-$ARCH-$WORD
if [[ $OS == macosx && $ARCH == ppc ]] ; then
MAKE_TARGET=$OS-$ARCH
fi
if [[ $OS == linux && $ARCH == ppc ]] ; then
MAKE_TARGET=$OS-$ARCH
fi
echo $MAKE_TARGET

View File

@ -2,4 +2,7 @@ USING: help.syntax help.markup ;
IN: io.encodings.binary
HELP: binary
{ $class-description "This is the encoding descriptor for binary I/O. Making an encoded stream with the binary encoding is a no-op; streams with this encoding deal with byte-arrays, not strings." } ;
{ $class-description "This is the encoding descriptor for binary I/O. Making an encoded stream with the binary encoding is a no-op; streams with this encoding deal with byte-arrays, not strings." }
{ $see-also "encodings-introduction" } ;
ABOUT: binary

View File

@ -1,15 +1,16 @@
USING: help.markup help.syntax ;
IN: io.encodings
ABOUT: "encodings"
ABOUT: "io.encodings"
ARTICLE: "io.encodings" "I/O encodings"
"Many streams deal with bytes, rather than Unicode code points, at some level. The translation between these two things is specified by an encoding. To abstract this away from the programmer, Factor provides a system where these streams are associated with an encoding which is always used when the stream is read from or written to. For most purposes, an encoding descriptor consisting of a symbol is all that is needed when initializing a stream."
"Bytes can't be understood in isolation as text. They must be interpreted under a certain encoding. Factor provides utilities for dealing with encoded text by declaring that a stream has a particular encoding, and utilities to encode and decode strings."
{ $subsection "encodings-constructors" }
{ $subsection "encodings-descriptors" }
{ $subsection "encodings-protocol" } ;
ARTICLE: "encodings-constructors" "Constructing an encoded stream"
ARTICLE: "encodings-constructors" "Manually constructing an encoded stream"
"The following words can be used to construct encoded streams. Note that they are usually not used directly, but rather by the stream constructors themselves. Most stream constructors take an encoding descriptor as a parameter and internally call these constructors."
{ $subsection <encoder> }
{ $subsection <decoder> }
{ $subsection <encoder-duplex> } ;
@ -37,19 +38,22 @@ HELP: <encoder-duplex>
ARTICLE: "encodings-descriptors" "Encoding descriptors"
"An encoding descriptor is something which can be used for input or output streams to encode or decode files. It must conform to the " { $link "encodings-protocol" } ". Encodings which you can use are defined in the following vocabularies:"
{ $vocab-subsection "io.encodings.utf8" }
{ $vocab-subsection "io.encodings.ascii" }
{ $vocab-subsection "io.encodings.8-bit" }
{ $vocab-subsection "io.encodings.binary" }
{ $vocab-subsection "io.encodings.utf16" } ;
{ $vocab-subsection "ASCII" "io.encodings.ascii" }
{ $vocab-subsection "Binary" "io.encodings.binary" }
{ $vocab-subsection "Strict encodings" "io.encodings.strict" }
{ $vocab-subsection "8-bit encodings" "io.encodings.8-bit" }
{ $vocab-subsection "UTF-8" "io.encodings.utf8" }
{ $vocab-subsection "UTF-16" "io.encodings.utf16" }
{ $see-also "encodings-introduction" } ;
ARTICLE: "encodings-protocol" "Encoding protocol"
"An encoding descriptor must implement the following methods. The methods are implemented on tuple classes by instantiating the class and calling the method again."
"There are two parts to implementing a new encoding. First, methods for creating an encoded or decoded stream must be provided. These have defaults, however, which wrap a stream in an encoder or decoder wrapper with the given encoding descriptor."
{ $subsection <encoder> }
{ $subsection <decoder> }
"If an encoding might be contained in the code slot of an encoder or decoder tuple, then the following methods must be implemented to read or write one code point from a stream:"
{ $subsection decode-char }
{ $subsection encode-char }
"The following methods are optional:"
{ $subsection <encoder> }
{ $subsection <decoder> } ;
{ $see-also "encodings-introduction" } ;
HELP: decode-char
{ $values { "stream" "an underlying input stream" }

View File

@ -1,11 +1,8 @@
USING: help.markup help.syntax io.encodings strings io.files ;
USING: help.markup help.syntax ;
IN: io.encodings.utf8
ARTICLE: "io.encodings.utf8" "Working with UTF8-encoded data"
"The UTF8 encoding is a variable-width encoding. 7-bit ASCII characters are encoded as single bytes, and other Unicode code points are encoded as 2 to 4 byte sequences. The encoding descriptor for UTF-8:"
{ $subsection utf8 } ;
HELP: utf8
{ $class-description "This is the class of encoding tuples which denote a UTF-8 encoding. This conforms to the " { $link "encodings-protocol" } "." } ;
{ $class-description "This is the encoding descriptor for a UTF-8 encoding. UTF-8 is a variable-width encoding. 7-bit ASCII characters are encoded as single bytes, and other Unicode code points are encoded as 2 to 4 byte sequences." }
{ $see-also "encodings-introduction" } ;
ABOUT: "io.encodings.utf8"
ABOUT: utf8

View File

@ -178,10 +178,10 @@ io.files.unique sequences strings accessors ;
[ "/lib/bux" ] [ "/usr" "/lib/bux" append-path ] unit-test
[ "/lib/bux/" ] [ "/usr" "/lib/bux/" append-path ] unit-test
[ "foo/" ] [ "foo/bar/." parent-directory ] unit-test
[ "foo/" ] [ "foo/bar/./" parent-directory ] unit-test
[ "foo/" ] [ "foo/bar/baz/.." parent-directory ] unit-test
[ "foo/" ] [ "foo/bar/baz/../" parent-directory ] unit-test
[ "foo/bar/." parent-directory ] must-fail
[ "foo/bar/./" parent-directory ] must-fail
[ "foo/bar/baz/.." parent-directory ] must-fail
[ "foo/bar/baz/../" parent-directory ] must-fail
[ "." parent-directory ] must-fail
[ "./" parent-directory ] must-fail
@ -190,6 +190,8 @@ io.files.unique sequences strings accessors ;
[ "../../" parent-directory ] must-fail
[ "foo/.." parent-directory ] must-fail
[ "foo/../" parent-directory ] must-fail
[ "" parent-directory ] must-fail
[ "." ] [ "boot.x86.64.image" parent-directory ] unit-test
[ "bar/foo" ] [ "bar/baz" "..///foo" append-path ] unit-test
[ "bar/baz/foo" ] [ "bar/baz" ".///foo" append-path ] unit-test

View File

@ -66,14 +66,12 @@ ERROR: no-parent-directory path ;
right-trim-separators
dup last-path-separator [
1+ cut
{
{ "." [ 1 head* parent-directory ] }
{ ".." [
2 head* parent-directory parent-directory
] }
[ drop ]
} case
] [ no-parent-directory ] if
] [
drop "." swap
] if
{ "" "." ".." } member? [
no-parent-directory
] when
] unless ;
<PRIVATE
@ -157,6 +155,8 @@ HOOK: cwd io-backend ( -- path )
SYMBOL: current-directory
M: object cwd ( -- path ) "." ;
[ cwd current-directory set-global ] "current-directory" add-init-hook
: with-directory ( path quot -- )
@ -259,7 +259,7 @@ DEFER: copy-tree-into
prepend-path ;
: ?resource-path ( path -- newpath )
"resource:" ?head [ resource-path ] when ;
"resource:" ?head [ left-trim-separators resource-path ] when ;
: resource-exists? ( path -- ? )
?resource-path exists? ;

View File

@ -9,7 +9,7 @@ IN: io.tests
] unit-test
: <resource-reader> ( resource -- stream )
resource-path iso-8859-1 <file-reader> ;
resource-path latin1 <file-reader> ;
[
"This is a line.\rThis is another line.\r"

View File

@ -170,7 +170,17 @@ ARTICLE: "collections" "Collections"
{ $subsection "graphs" }
{ $subsection "buffers" } ;
USING: io.sockets io.launcher io.mmap io.monitors ;
USING: io.sockets io.launcher io.mmap io.monitors
io.encodings.utf8 io.encodings.binary io.encodings.ascii io.files ;
ARTICLE: "encodings-introduction" "An introduction to encodings"
"In order to express text in terms of binary, some sort of encoding has to be used. In a modern context, this is understood as a two-way mapping between Unicode code points (characters) and some amount of binary. Since English isn't the only language in the world, ASCII is not sufficient as a mapping from binary to Unicode; it can't even express em-dashes or curly quotes. Unicode was designed as a universal character set that could potentially represent everything." $nl
"Not all encodings can represent all Unicode code points, but Unicode can represent basically everything that exists in modern encodings. Some encodings are language-specific, and some can represent everything in Unicode. Though the world is moving toward Unicode and UTF-8, the reality today is that there are several encodings which must be taken into account." $nl
"Factor uses a system of encoding descriptors to denote encodings. Encoding descriptors are objects which describe encodings. Examples are " { $link utf8 } ", " { $link ascii } " and " { $link binary } ". Encoding descriptors can be passed around independently. Each encoding descriptor has some method for constructing an encoded or decoded stream, and the resulting stream has an encoding descriptor stored which has methods for reading or writing characters." $nl
"Constructors for streams which deal with bytes usually take an encoding as an explicit parameter. For example, to open a text file for reading whose contents are in UTF-8, use the following"
{ $code "\"filename\" utf8 <file-reader>" }
"If there is an error in the encoded stream, a replacement character (0xFFFD) will be inserted. To throw an exception upon error, use a strict encoding as follows"
{ $code "\"filename\" utf8 strict <file-reader>" } ;
ARTICLE: "io" "Input and output"
{ $heading "Streams" }
@ -188,6 +198,7 @@ ARTICLE: "io" "Input and output"
{ $subsection "io.mmap" }
{ $subsection "io.monitors" }
{ $heading "Encodings" }
{ $subsection "encodings-introduction" }
{ $subsection "io.encodings" }
{ $subsection "io.encodings.string" }
{ $heading "Other features" }

View File

@ -52,7 +52,7 @@ PRIVATE>
: http-request ( request -- response stream )
dup request [
dup request-addr iso-8859-1 <client>
dup request-addr latin1 <client>
1 minutes over set-timeout
[
write-request flush
@ -82,7 +82,7 @@ PRIVATE>
: download-to ( url file -- )
#! Downloads the contents of a URL to a file.
swap http-get-stream swap check-response
[ swap iso-8859-1 <file-writer> stream-copy ] with-disposal ;
[ swap latin1 <file-writer> stream-copy ] with-disposal ;
: download ( url -- )
dup download-name download-to ;

View File

@ -217,7 +217,7 @@ SYMBOL: exit-continuation
: httpd ( port -- )
internet-server "http.server"
iso-8859-1 [ handle-client ] with-server ;
latin1 [ handle-client ] with-server ;
: httpd-main ( -- ) 8888 httpd ;

View File

@ -0,0 +1,110 @@
! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup io.encodings.8-bit.private ;
IN: io.encodings.8-bit
ARTICLE: "io.encodings.8-bit" "8-bit encodings"
"Many encodings are a simple mapping of bytes onto characters. The " { $vocab-link "io.encodings.8-bit" } " vocabulary implements these generically using existing resource files. These encodings should be used with extreme caution, as fully general Unicode encodings like UTF-8 are nearly always more appropriate. The following 8-bit encodings are already defined:"
{ $subsection latin1 }
{ $subsection latin2 }
{ $subsection latin3 }
{ $subsection latin4 }
{ $subsection latin/cyrillic }
{ $subsection latin/arabic }
{ $subsection latin/greek }
{ $subsection latin/hebrew }
{ $subsection latin5 }
{ $subsection latin6 }
{ $subsection latin/thai }
{ $subsection latin7 }
{ $subsection latin8 }
{ $subsection latin9 }
{ $subsection latin10 }
{ $subsection koi8-r }
{ $subsection windows-1252 }
{ $subsection ebcdic }
{ $subsection mac-roman }
"Other encodings can be defined using the following utility"
{ $subsection define-8-bit-encoding } ;
ABOUT: "io.encodings.8-bit"
HELP: define-8-bit-encoding
{ $values { "name" "a string" } { "path" "a path" } }
{ $description "Creates a new encoding with the given name, using the resource file at the path to tell how to encode and decode octets. The resource file should be in a similar format to those at ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/" } ;
HELP: latin1
{ $description "This is the ISO-8859-1 encoding, also called Latin-1: Western European. It is an 8-bit superset of ASCII which is the default for a mimetype starting with 'text' and provides the characters necessary for most western European languages." }
{ $see-also "encodings-introduction" } ;
HELP: latin2
{ $description "This is the ISO-8859-2 encoding, also called Latin-2: Eastern European. It is an 8-bit superset of ASCII and provides the characters necessary for most eastern European languages." }
{ $see-also "encodings-introduction" } ;
HELP: latin3
{ $description "This is the ISO-8859-3 encoding, also called Latin-3: South European. It is an 8-bit superset of ASCII and provides the characters necessary for Turkish, Maltese and Esperanto." }
{ $see-also "encodings-introduction" } ;
HELP: latin4
{ $description "This is the ISO-8859-4 encoding, also called Latin-4: North European. It is an 8-bit superset of ASCII and provides the characters necessary for Latvian, Lithuanian, Estonian, Greenlandic and Sami." }
{ $see-also "encodings-introduction" } ;
HELP: latin/cyrillic
{ $description "This is the ISO-8859-5 encoding, also called Latin/Cyrillic. It is an 8-bit superset of ASCII and provides the characters necessary for most languages which use Cyrilic, including Russian, Macedonian, Belarusian, Bulgarian, Serbian, and Ukrainian. KOI8-R is used much more commonly." }
{ $see-also "encodings-introduction" } ;
HELP: latin/arabic
{ $description "This is the ISO-8859-6 encoding, also called Latin/Arabic. It is an 8-bit superset of ASCII and provides the characters necessary for Arabic, though not other languages which use Arabic script." }
{ $see-also "encodings-introduction" } ;
HELP: latin/greek
{ $description "This is the ISO-8859-7 encoding, also called Latin/Greek. It is an 8-bit superset of ASCII and provides the characters necessary for Greek written in modern monotonic orthography, or ancient Greek without accent marks." }
{ $see-also "encodings-introduction" } ;
HELP: latin/hebrew
{ $description "This is the ISO-8859-8 encoding, also called Latin/Hebrew. It is an 8-bit superset of ASCII and provides the characters necessary for modern Hebrew without explicit vowels. Generally, this is interpreted in logical order, making it ISO-8859-8-I, technically." }
{ $see-also "encodings-introduction" } ;
HELP: latin5
{ $description "This is the ISO-8859-9 encoding, also called Latin-5: Turkish. It is an 8-bit superset of ASCII and provides the characters necessary for Turkish, similar to Latin-1 but replacing the spots used for Icelandic with characters used in Turkish." }
{ $see-also "encodings-introduction" } ;
HELP: latin6
{ $description "This is the ISO-8859-10 encoding, also called Latin-6: Nordic. It is an 8-bit superset of ASCII containing the same characters as Latin-4, but rearranged to be of better use to nordic languages." }
{ $see-also "encodings-introduction" } ;
HELP: latin/thai
{ $description "This is the ISO-8859-11 encoding, also called Latin/Thai. It is an 8-bit superset of ASCII containing the characters necessary to represent Thai. It is basically identical to TIS-620." }
{ $see-also "encodings-introduction" } ;
HELP: latin7
{ $description "This is the ISO-8859-13 encoding, also called Latin-7: Baltic Rim. It is an 8-bit superset of ASCII containing all characters necesary to represent Baltic Rim languages, as previous character sets were incomplete." }
{ $see-also "encodings-introduction" } ;
HELP: latin8
{ $description "This is the ISO-8859-14 encoding, also called Latin-8: Celtic. It is an 8-bit superset of ASCII designed for Celtic languages like Gaelic and Breton." }
{ $see-also "encodings-introduction" } ;
HELP: latin9
{ $description "This is the ISO-8859-15 encoding, also called Latin-9 and unoffically as Latin-0. It is an 8-bit superset of ASCII designed as a modification of Latin-1, removing little-used characters in favor of the Euro symbol and other characters." }
{ $see-also "encodings-introduction" } ;
HELP: latin10
{ $description "This is the ISO-8859-16 encoding, also called Latin-10: South-Eastern European. It is an 8-bit superset of ASCII." }
{ $see-also "encodings-introduction" } ;
HELP: windows-1252
{ $description "Windows 1252 is an 8-bit superset of ASCII which is closely related to Latin-1. Control characters in the 0x80 to 0x9F range are replaced with printable characters such as the Euro symbol." }
{ $see-also "encodings-introduction" } ;
HELP: ebcdic
{ $description "EBCDIC is an 8-bit legacy encoding designed for IBM mainframes like System/360 in the 1960s. It has since fallen into disuse. It contains large unallocated regions, and the version included here (code page 37) contains auxiliary characters in this region for English- and Portugese-speaking countries." }
{ $see-also "encodings-introduction" } ;
HELP: mac-roman
{ $description "Mac Roman is an 8-bit superset of ASCII which was the standard encoding on Mac OS prior to version 10. It is incompatible with Latin-1 in all but a few places and ASCII, and it is suitable for encoding many Western European languages." }
{ $see-also "encodings-introduction" } ;
HELP: koi8-r
{ $description "KOI8-R is an 8-bit superset of ASCII which encodes the Cyrillic alphabet, as used in Russian and Bulgarian. Characters are in such an order that, if the eight bit is stripped, text is still interpretable as ASCII. Block-building characters also exist." }
{ $see-also "encodings-introduction" } ;

View File

@ -1,10 +1,10 @@
USING: io.encodings.string io.encodings.8-bit tools.test strings arrays ;
IN: io.encodings.8-bit.tests
[ B{ CHAR: f CHAR: o CHAR: o } ] [ "foo" iso-8859-1 encode ] unit-test
[ { 256 } >string iso-8859-1 encode ] must-fail
[ B{ 255 } ] [ { 255 } iso-8859-1 encode ] unit-test
[ B{ CHAR: f CHAR: o CHAR: o } ] [ "foo" latin1 encode ] unit-test
[ { 256 } >string latin1 encode ] must-fail
[ B{ 255 } ] [ { 255 } latin1 encode ] unit-test
[ "bar" ] [ "bar" iso-8859-1 decode ] unit-test
[ { CHAR: b 233 CHAR: r } ] [ { CHAR: b 233 CHAR: r } iso-8859-1 decode >array ] unit-test
[ "bar" ] [ "bar" latin1 decode ] unit-test
[ { CHAR: b 233 CHAR: r } ] [ { CHAR: b 233 CHAR: r } latin1 decode >array ] unit-test
[ { HEX: fffd HEX: 20AC } ] [ { HEX: 81 HEX: 80 } windows-1252 decode >array ] unit-test

View File

@ -9,21 +9,21 @@ IN: io.encodings.8-bit
<PRIVATE
: mappings {
{ "iso-8859-1" "8859-1" }
{ "iso-8859-2" "8859-2" }
{ "iso-8859-3" "8859-3" }
{ "iso-8859-4" "8859-4" }
{ "iso-8859-5" "8859-5" }
{ "iso-8859-6" "8859-6" }
{ "iso-8859-7" "8859-7" }
{ "iso-8859-8" "8859-8" }
{ "iso-8859-9" "8859-9" }
{ "iso-8859-10" "8859-10" }
{ "iso-8859-11" "8859-11" }
{ "iso-8859-13" "8859-13" }
{ "iso-8859-14" "8859-14" }
{ "iso-8859-15" "8859-15" }
{ "iso-8859-16" "8859-16" }
{ "latin1" "8859-1" }
{ "latin2" "8859-2" }
{ "latin3" "8859-3" }
{ "latin4" "8859-4" }
{ "latin/cyrillic" "8859-5" }
{ "latin/arabic" "8859-6" }
{ "latin/greek" "8859-7" }
{ "latin/hebrew" "8859-8" }
{ "latin5" "8859-9" }
{ "latin6" "8859-10" }
{ "latin/thai" "8859-11" }
{ "latin7" "8859-13" }
{ "latin8" "8859-14" }
{ "latin9" "8859-15" }
{ "latin10" "8859-16" }
{ "koi8-r" "KOI8-R" }
{ "windows-1252" "CP1252" }
{ "ebcdic" "CP037" }
@ -50,7 +50,7 @@ IN: io.encodings.8-bit
[ swap ] assoc-map >hashtable ;
: parse-file ( file-name -- byte>ch ch>byte )
full-path ascii file-lines process-contents
ascii file-lines process-contents
[ byte>ch ] [ ch>byte ] bi ;
: empty-tuple-class ( string -- class )
@ -85,9 +85,9 @@ IN: io.encodings.8-bit
: 8-bit-methods ( class byte>ch ch>byte -- )
>r over r> define-encode-char define-decode-char ;
: define-8-bit-encoding ( tuple-name file-name -- )
: define-8-bit-encoding ( name path -- )
>r empty-tuple-class r> parse-file 8-bit-methods ;
PRIVATE>
[ mappings [ define-8-bit-encoding ] assoc-each ] with-compilation-unit
[ mappings [ full-path define-8-bit-encoding ] assoc-each ] with-compilation-unit

View File

@ -0,0 +1,8 @@
USING: help.markup help.syntax ;
IN: io.encodings.ascii
HELP: ascii
{ $class-description "This is the encoding descriptor which denotes an ASCII encoding. By default, if there's a non-ASCII character in an input stream, it will be replaced with a replacement character (U+FFFD), and if a non-ASCII character is used in output, an exception is thrown." }
{ $see-also "encodings-introduction" } ;
ABOUT: ascii

View File

@ -0,0 +1,10 @@
! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup ;
IN: io.encodings.strict
HELP: strict ( encoding -- strict-encoding )
{ $values { "encoding" "an encoding descriptor" } { "strict-encoding" "a strict encoding descriptor" } }
{ $description "Makes an encoding strict, that is, in the presence of a malformed code point, an error is thrown. Note that the existence of a replacement character in a file (U+FFFD) also throws an error." } ;
ABOUT: strict

View File

@ -1,22 +1,25 @@
USING: help.markup help.syntax io.encodings strings ;
IN: io.encodings.utf16
ARTICLE: "utf16" "Working with UTF-16-encoded data"
ARTICLE: "io.encodings.utf16" "UTF-16"
"The UTF-16 encoding is a variable-width encoding. Unicode code points are encoded as 2 or 4 byte sequences. There are three encoding descriptor classes for working with UTF-16, depending on endianness or the presence of a BOM:"
{ $subsection utf16 }
{ $subsection utf16le }
{ $subsection utf16be }
{ $subsection utf16 }
"All of these conform to the " { $link "encodings-protocol" } "." ;
{ $subsection utf16n } ;
ABOUT: "utf16"
ABOUT: "io.encodings.utf16"
HELP: utf16le
{ $class-description "The encoding protocol for UTF-16LE, that is, UTF-16 in little endian, without a byte order mark. Streams can be made which read or write wth this encoding." } ;
{ $class-description "The encoding descriptor for UTF-16LE, that is, UTF-16 in little endian, without a byte order mark. Streams can be made which read or write wth this encoding." } ;
HELP: utf16be
{ $class-description "The encoding protocol for UTF-16BE, that is, UTF-16 in big endian, without a byte order mark. Streams can be made which read or write wth this encoding." } ;
{ $class-description "The encoding descriptor for UTF-16BE, that is, UTF-16 in big endian, without a byte order mark. Streams can be made which read or write wth this encoding." } ;
HELP: utf16
{ $class-description "The encoding protocol for UTF-16, that is, UTF-16 with a byte order mark. This is the most useful for general input and output in UTF-16. Streams can be made which read or write wth this encoding." } ;
{ $class-description "The encoding descriptor for UTF-16, that is, UTF-16 with a byte order mark. This is the most useful for general input and output in UTF-16. Streams can be made which read or write wth this encoding." } ;
{ utf16 utf16le utf16be } related-words
HELP: utf16n
{ $class-description "The encoding descriptor for UTF-16 without a byte order mark in native endian order. This is useful mostly for FFI calls which take input of strings in of wide_t*." } ;
{ utf16 utf16le utf16be utf16n } related-words

View File

@ -1,5 +1,7 @@
USING: kernel tools.test io.encodings.utf16 arrays sbufs
sequences io.encodings io unicode io.encodings.string ;
io.streams.byte-array sequences io.encodings io unicode
io.encodings.string alien.c-types accessors classes ;
IN: io.encodings.utf16.tests
[ { CHAR: x } ] [ { 0 CHAR: x } utf16be decode >array ] unit-test
[ { HEX: 1D11E } ] [ { HEX: D8 HEX: 34 HEX: DD HEX: 1E } utf16be decode >array ] unit-test
@ -20,3 +22,9 @@ sequences io.encodings io unicode io.encodings.string ;
[ { CHAR: x } ] [ { HEX: fe HEX: ff 0 CHAR: x } utf16 decode >array ] unit-test
[ { HEX: ff HEX: fe 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16 encode >array ] unit-test
: correct-endian
code>> class little-endian? [ utf16le = ] [ utf16be = ] if ;
[ t ] [ B{ } utf16n <byte-reader> correct-endian ] unit-test
[ t ] [ utf16n <byte-writer> correct-endian ] unit-test

View File

@ -1,7 +1,8 @@
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces io.binary
io.encodings combinators splitting io byte-arrays inspector ;
io.encodings combinators splitting io byte-arrays inspector
alien.c-types ;
IN: io.encodings.utf16
TUPLE: utf16be ;
@ -10,6 +11,8 @@ TUPLE: utf16le ;
TUPLE: utf16 ;
TUPLE: utf16n ;
<PRIVATE
! UTF-16BE decoding
@ -121,4 +124,13 @@ M: utf16 <decoder> ( stream utf16 -- decoder )
M: utf16 <encoder> ( stream utf16 -- encoder )
drop bom-le over stream-write utf16le <encoder> ;
! Native-order UTF-16
: native-utf16 ( -- descriptor )
little-endian? utf16le utf16be ? ;
M: utf16n <decoder> drop native-utf16 <decoder> ;
M: utf16n <encoder> drop native-utf16 <encoder> ;
PRIVATE>

View File

@ -6,7 +6,6 @@ IN: io.unix.files.tests
[ "/" ] [ "/etc/" parent-directory ] unit-test
[ "/" ] [ "/etc" parent-directory ] unit-test
[ "/" ] [ "/" parent-directory ] unit-test
[ "asdf" parent-directory ] must-fail
[ f ] [ "" root-directory? ] unit-test
[ t ] [ "/" root-directory? ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel math sequences vectors classes combinators
arrays words assocs parser namespaces definitions
USING: kernel math sequences vectors classes classes.algebra
combinators arrays words assocs parser namespaces definitions
prettyprint prettyprint.backend quotations arrays.lib
debugger io compiler.units kernel.private effects ;
IN: multi-methods

View File

@ -0,0 +1,10 @@
USING: help.syntax help.markup ;
IN: openssl
ARTICLE: "openssl" "OpenSSL"
"Factor on Windows has been tested with this version of OpenSSL: "
{ $url "http://www.openssl.org/related/binaries.html" } ;

View File

@ -3,7 +3,7 @@
USING: kernel compiler.units parser words arrays strings math.parser sequences
quotations vectors namespaces math assocs continuations peg
peg.parsers unicode.categories multiline combinators.lib
splitting ;
splitting accessors ;
IN: peg.ebnf
TUPLE: ebnf-non-terminal symbol ;
@ -16,7 +16,7 @@ TUPLE: ebnf-choice options ;
TUPLE: ebnf-sequence elements ;
TUPLE: ebnf-repeat0 group ;
TUPLE: ebnf-repeat1 group ;
TUPLE: ebnf-optional elements ;
TUPLE: ebnf-optional group ;
TUPLE: ebnf-rule symbol elements ;
TUPLE: ebnf-action parser code ;
TUPLE: ebnf rules ;
@ -198,7 +198,7 @@ DEFER: 'choice'
: 'rule' ( -- parser )
[
'non-terminal' [ ebnf-non-terminal-symbol ] action ,
'non-terminal' [ symbol>> ] action ,
"=" syntax ,
'choice' ,
] seq* [ first2 <ebnf-rule> ] action ;
@ -215,49 +215,53 @@ SYMBOL: main
H{ } clone dup dup [ parser set swap (transform) main set ] bind ;
M: ebnf (transform) ( ast -- parser )
ebnf-rules [ (transform) ] map peek ;
rules>> [ (transform) ] map peek ;
M: ebnf-rule (transform) ( ast -- parser )
dup ebnf-rule-elements (transform) [
swap ebnf-rule-symbol set
dup elements>> (transform) [
swap symbol>> set
] keep ;
M: ebnf-sequence (transform) ( ast -- parser )
ebnf-sequence-elements [ (transform) ] map seq ;
elements>> [ (transform) ] map seq ;
M: ebnf-choice (transform) ( ast -- parser )
ebnf-choice-options [ (transform) ] map choice ;
options>> [ (transform) ] map choice ;
M: ebnf-any-character (transform) ( ast -- parser )
drop any-char ;
M: ebnf-range (transform) ( ast -- parser )
ebnf-range-pattern range-pattern ;
pattern>> range-pattern ;
: transform-group ( ast -- parser )
#! convert a ast node with groups to a parser for that group
group>> (transform) ;
M: ebnf-ensure (transform) ( ast -- parser )
ebnf-ensure-group (transform) ensure ;
transform-group ensure ;
M: ebnf-ensure-not (transform) ( ast -- parser )
ebnf-ensure-not-group (transform) ensure-not ;
transform-group ensure-not ;
M: ebnf-repeat0 (transform) ( ast -- parser )
ebnf-repeat0-group (transform) repeat0 ;
transform-group repeat0 ;
M: ebnf-repeat1 (transform) ( ast -- parser )
ebnf-repeat1-group (transform) repeat1 ;
transform-group repeat1 ;
M: ebnf-optional (transform) ( ast -- parser )
ebnf-optional-elements (transform) optional ;
transform-group optional ;
M: ebnf-action (transform) ( ast -- parser )
[ ebnf-action-parser (transform) ] keep
ebnf-action-code string-lines [ parse-lines ] with-compilation-unit action ;
[ parser>> (transform) ] keep
code>> string-lines [ parse-lines ] with-compilation-unit action ;
M: ebnf-terminal (transform) ( ast -- parser )
ebnf-terminal-symbol token sp ;
symbol>> token sp ;
M: ebnf-non-terminal (transform) ( ast -- parser )
ebnf-non-terminal-symbol [
symbol>> [
, parser get , \ at ,
] [ ] make delay sp ;

View File

@ -48,3 +48,7 @@ IN: peg.parsers.tests
[ V{ } ]
[ "" epsilon parse parse-result-ast ] unit-test
{ "a" } [
"a" "a" token just parse parse-result-ast
] unit-test

View File

@ -3,14 +3,14 @@
USING: kernel sequences strings namespaces math assocs shuffle
vectors arrays combinators.lib math.parser match
unicode.categories sequences.deep peg peg.private
peg.search math.ranges ;
peg.search math.ranges words ;
IN: peg.parsers
TUPLE: just-parser p1 ;
: just-pattern
[
dup [
execute dup [
dup parse-result-remaining empty? [ drop f ] unless
] when
] ;

View File

@ -3,7 +3,7 @@
USING: kernel sequences strings namespaces math assocs shuffle
vectors arrays combinators.lib math.parser match
unicode.categories sequences.lib compiler.units parser
words quotations effects memoize ;
words quotations effects memoize accessors combinators.cleave ;
IN: peg
TUPLE: parse-result remaining ast ;
@ -43,17 +43,16 @@ TUPLE: token-parser symbol ;
MATCH-VARS: ?token ;
: token-pattern ( -- quot )
[
?token 2dup head? [
dup >r length tail-slice r> <parse-result>
] [
2drop f
] if
] ;
: parse-token ( input string -- result )
#! Parse the string, returning a parse result
2dup head? [
dup >r length tail-slice r> <parse-result>
] [
2drop f
] if ;
M: token-parser (compile) ( parser -- quot )
token-parser-symbol \ ?token token-pattern match-replace ;
symbol>> [ parse-token ] curry ;
TUPLE: satisfy-parser quot ;
@ -73,7 +72,7 @@ MATCH-VARS: ?quot ;
] ;
M: satisfy-parser (compile) ( parser -- quot )
satisfy-parser-quot \ ?quot satisfy-pattern match-replace ;
quot>> \ ?quot satisfy-pattern match-replace ;
TUPLE: range-parser min max ;
@ -101,12 +100,12 @@ TUPLE: seq-parser parsers ;
: seq-pattern ( -- quot )
[
dup [
dup parse-result-remaining ?quot [
[ parse-result-remaining swap set-parse-result-remaining ] 2keep
parse-result-ast dup ignore = [
dup remaining>> ?quot [
[ remaining>> swap (>>remaining) ] 2keep
ast>> dup ignore = [
drop
] [
swap [ parse-result-ast push ] keep
swap [ ast>> push ] keep
] if
] [
drop f
@ -119,7 +118,7 @@ TUPLE: seq-parser parsers ;
M: seq-parser (compile) ( parser -- quot )
[
[ V{ } clone <parse-result> ] %
seq-parser-parsers [ compiled-parser \ ?quot seq-pattern match-replace % ] each
parsers>> [ compiled-parser \ ?quot seq-pattern match-replace % ] each
] [ ] make ;
TUPLE: choice-parser parsers ;
@ -136,16 +135,16 @@ TUPLE: choice-parser parsers ;
M: choice-parser (compile) ( parser -- quot )
[
f ,
choice-parser-parsers [ compiled-parser \ ?quot choice-pattern match-replace % ] each
parsers>> [ compiled-parser \ ?quot choice-pattern match-replace % ] each
\ nip ,
] [ ] make ;
TUPLE: repeat0-parser p1 ;
: (repeat0) ( quot result -- result )
2dup parse-result-remaining swap call [
[ parse-result-remaining swap set-parse-result-remaining ] 2keep
parse-result-ast swap [ parse-result-ast push ] keep
2dup remaining>> swap call [
[ remaining>> swap (>>remaining) ] 2keep
ast>> swap [ ast>> push ] keep
(repeat0)
] [
nip
@ -159,7 +158,7 @@ TUPLE: repeat0-parser p1 ;
M: repeat0-parser (compile) ( parser -- quot )
[
[ V{ } clone <parse-result> ] %
repeat0-parser-p1 compiled-parser \ ?quot repeat0-pattern match-replace %
p1>> compiled-parser \ ?quot repeat0-pattern match-replace %
] [ ] make ;
TUPLE: repeat1-parser p1 ;
@ -167,7 +166,7 @@ TUPLE: repeat1-parser p1 ;
: repeat1-pattern ( -- quot )
[
[ ?quot ] swap (repeat0) [
dup parse-result-ast empty? [
dup ast>> empty? [
drop f
] when
] [
@ -178,7 +177,7 @@ TUPLE: repeat1-parser p1 ;
M: repeat1-parser (compile) ( parser -- quot )
[
[ V{ } clone <parse-result> ] %
repeat1-parser-p1 compiled-parser \ ?quot repeat1-pattern match-replace %
p1>> compiled-parser \ ?quot repeat1-pattern match-replace %
] [ ] make ;
TUPLE: optional-parser p1 ;
@ -189,7 +188,7 @@ TUPLE: optional-parser p1 ;
] ;
M: optional-parser (compile) ( parser -- quot )
optional-parser-p1 compiled-parser \ ?quot optional-pattern match-replace ;
p1>> compiled-parser \ ?quot optional-pattern match-replace ;
TUPLE: ensure-parser p1 ;
@ -203,7 +202,7 @@ TUPLE: ensure-parser p1 ;
] ;
M: ensure-parser (compile) ( parser -- quot )
ensure-parser-p1 compiled-parser \ ?quot ensure-pattern match-replace ;
p1>> compiled-parser \ ?quot ensure-pattern match-replace ;
TUPLE: ensure-not-parser p1 ;
@ -217,7 +216,7 @@ TUPLE: ensure-not-parser p1 ;
] ;
M: ensure-not-parser (compile) ( parser -- quot )
ensure-not-parser-p1 compiled-parser \ ?quot ensure-not-pattern match-replace ;
p1>> compiled-parser \ ?quot ensure-not-pattern match-replace ;
TUPLE: action-parser p1 quot ;
@ -226,13 +225,13 @@ MATCH-VARS: ?action ;
: action-pattern ( -- quot )
[
?quot dup [
dup parse-result-ast ?action call
swap [ set-parse-result-ast ] keep
dup ast>> ?action call
>>ast
] when
] ;
M: action-parser (compile) ( parser -- quot )
{ action-parser-p1 action-parser-quot } get-slots [ compiled-parser ] dip
{ [ p1>> ] [ quot>> ] } cleave [ compiled-parser ] dip
2array { ?quot ?action } action-pattern match-replace ;
: left-trim-slice ( string -- string )
@ -246,7 +245,7 @@ TUPLE: sp-parser p1 ;
M: sp-parser (compile) ( parser -- quot )
[
\ left-trim-slice , sp-parser-p1 compiled-parser ,
\ left-trim-slice , p1>> compiled-parser ,
] [ ] make ;
TUPLE: delay-parser quot ;
@ -256,7 +255,7 @@ M: delay-parser (compile) ( parser -- quot )
#! This way it is run only once and the
#! parser constructed once at run time.
[
delay-parser-quot % \ compile ,
quot>> % \ compile ,
] [ ] make
{ } { "word" } <effect> memoize-quot
[ % \ execute , ] [ ] make ;

View File

@ -62,7 +62,7 @@ M: freetype-renderer free-fonts ( world -- )
} at ;
: ttf-path ( name -- string )
"/fonts/" swap ".ttf" 3append resource-path ;
"resource:fonts/" swap ".ttf" 3append ?resource-path ;
: (open-face) ( path length -- face )
#! We use FT_New_Memory_Face, not FT_New_Face, since

View File

@ -27,5 +27,5 @@ C-STRUCT: stat
FUNCTION: int __xstat ( int ver, char* pathname, stat* buf ) ;
FUNCTION: int __lxstat ( int ver, char* pathname, stat* buf ) ;
: stat ( pathname buf -- int ) 3 -rot __xstat ;
: lstat ( pathname buf -- int ) 3 -rot __lxstat ;
: stat ( pathname buf -- int ) 1 -rot __xstat ;
: lstat ( pathname buf -- int ) 1 -rot __lxstat ;