Merge branch 'master' of git://factorcode.org/git/factor

2008-03-26 15:56:28 -05:00 · 2008-03-26 15:56:28 -05:00 · 92b912bce9
parent 65a12660a7 bc55f67a49
commit 92b912bce9
28 changed files with 349 additions and 165 deletions
--- a/.gitignore
+++ b/.gitignore
@ -18,4 +18,4 @@ factor
 temp
 logs
 work
-buildsupport/wordsize
+build-support/wordsize
--- a/build-support/target
+++ b/build-support/target
@ -1,38 +1,38 @@
 #!/bin/sh

-if [ \( `uname -s ` = FreeBSD \) -a \( `uname -p` = i386 \) ]
-then
-  echo freebsd-x86-32
-elif [ \( `uname -s` = FreeBSD \) -a \( `uname -m` = amd64 \) ]
-then
-  echo freebsd-x86-64
-elif [ \( `uname -s` = OpenBSD \) -a \( `uname -m` = i386 \) ]
-then
-  echo openbsd-x86-32
-elif [ \( `uname -s` = OpenBSD \) -a \( `uname -m` = amd64 \) ]
-then
-  echo openbsd-x86-64
-elif [ \( `uname -s` = NetBSD \) -a \( `uname -p` = i386 \) ]
-then
-  echo netbsd-x86-32
-elif [ \( `uname -s` = NetBSD \) -a \( `uname -p` = x86_64 \) ]
-then
-  echo netbsd-x86-64
-elif [ \( `uname -s` = Darwin \) -a \( `uname -p` = powerpc \) ]
-then
-  echo macosx-ppc
-elif [ `uname -s` = Darwin ]
-then
-  echo macosx-x86-`./build-support/wordsize`
-elif [ \( `uname -s` = Linux \) -a \( `uname -m` = i686 \) ]
-then
-  echo linux-x86-32
-elif [ \( `uname -s` = Linux \) -a \( `uname -m` = x86_64 \) ]
-then
-  echo linux-x86-64
-elif [ \( `uname -o` = Cygwin \) -a \( `uname -m` = i686 \) ]
-then
-  echo winnt-x86-`./build-support/wordsize`
-else
-  echo help
+uname_s=`uname -s`
+case $uname_s in
+	CYGWIN_NT-5.2-WOW64) OS=winnt;;
+	*CYGWIN_NT*) OS=winnt;;
+	*CYGWIN*) OS=winnt;;
+	*darwin*) OS=macosx;;
+	*Darwin*) OS=macosx;;
+	*linux*) OS=linux;;
+	*Linux*) OS=linux;;
+	*NetBSD*) OS=netbsd;;
+	*FreeBSD*) OS=freebsd;;
+	*OpenBSD*) OS=openbsd;;
+	*DragonFly*) OS=dragonflybsd;;
+esac
+
+uname_m=`uname -m`
+case $uname_m in
+   i386) ARCH=x86;;
+   i686) ARCH=x86;;
+   amd64) ARCH=x86;;
+   *86) ARCH=x86;;
+   *86_64) ARCH=x86;;
+   "Power Macintosh") ARCH=ppc;;
+esac
+
+WORD=`./build-support/wordsize`
+
+MAKE_TARGET=$OS-$ARCH-$WORD
+if [[ $OS == macosx && $ARCH == ppc ]] ; then
+	MAKE_TARGET=$OS-$ARCH
 fi
+if [[ $OS == linux && $ARCH == ppc ]] ; then
+	MAKE_TARGET=$OS-$ARCH
+fi
+
+echo $MAKE_TARGET
--- a/core/io/encodings/binary/binary-docs.factor
+++ b/core/io/encodings/binary/binary-docs.factor
@ -2,4 +2,7 @@ USING: help.syntax help.markup ;
 IN: io.encodings.binary

 HELP: binary
-{ $class-description "This is the encoding descriptor for binary I/O. Making an encoded stream with the binary encoding is a no-op; streams with this encoding deal with byte-arrays, not strings." } ;
+{ $class-description "This is the encoding descriptor for binary I/O. Making an encoded stream with the binary encoding is a no-op; streams with this encoding deal with byte-arrays, not strings." }
+{ $see-also "encodings-introduction" } ;
+
+ABOUT: binary
--- a/core/io/encodings/encodings-docs.factor
+++ b/core/io/encodings/encodings-docs.factor
@ -1,15 +1,16 @@
 USING: help.markup help.syntax ;
 IN: io.encodings

-ABOUT: "encodings"
+ABOUT: "io.encodings"

 ARTICLE: "io.encodings" "I/O encodings"
-"Many streams deal with bytes, rather than Unicode code points, at some level. The translation between these two things is specified by an encoding. To abstract this away from the programmer, Factor provides a system where these streams are associated with an encoding which is always used when the stream is read from or written to. For most purposes, an encoding descriptor consisting of a symbol is all that is needed when initializing a stream."
+"Bytes can't be understood in isolation as text. They must be interpreted under a certain encoding. Factor provides utilities for dealing with encoded text by declaring that a stream has a particular encoding, and utilities to encode and decode strings."
 { $subsection "encodings-constructors" }
 { $subsection "encodings-descriptors" }
 { $subsection "encodings-protocol" } ;

-ARTICLE: "encodings-constructors" "Constructing an encoded stream"
+ARTICLE: "encodings-constructors" "Manually constructing an encoded stream"
+"The following words can be used to construct encoded streams. Note that they are usually not used directly, but rather by the stream constructors themselves. Most stream constructors take an encoding descriptor as a parameter and internally call these constructors."
 { $subsection <encoder> }
 { $subsection <decoder> }
 { $subsection <encoder-duplex> } ;
@ -37,19 +38,22 @@ HELP: <encoder-duplex>

 ARTICLE: "encodings-descriptors" "Encoding descriptors"
 "An encoding descriptor is something which can be used for input or output streams to encode or decode files. It must conform to the " { $link "encodings-protocol" } ". Encodings which you can use are defined in the following vocabularies:"
-{ $vocab-subsection "io.encodings.utf8" }
-{ $vocab-subsection "io.encodings.ascii" }
-{ $vocab-subsection "io.encodings.8-bit" }
-{ $vocab-subsection "io.encodings.binary" }
-{ $vocab-subsection "io.encodings.utf16" } ;
+{ $vocab-subsection "ASCII" "io.encodings.ascii" }
+{ $vocab-subsection "Binary" "io.encodings.binary" }
+{ $vocab-subsection "Strict encodings" "io.encodings.strict" }
+{ $vocab-subsection "8-bit encodings" "io.encodings.8-bit" }
+{ $vocab-subsection "UTF-8" "io.encodings.utf8" }
+{ $vocab-subsection "UTF-16" "io.encodings.utf16" }
+{ $see-also "encodings-introduction" } ;

 ARTICLE: "encodings-protocol" "Encoding protocol"
-"An encoding descriptor must implement the following methods. The methods are implemented on tuple classes by instantiating the class and calling the method again."
+"There are two parts to implementing a new encoding. First, methods for creating an encoded or decoded stream must be provided. These have defaults, however, which wrap a stream in an encoder or decoder wrapper with the given encoding descriptor."
+{ $subsection <encoder> }
+{ $subsection <decoder> }
+"If an encoding might be contained in the code slot of an encoder or decoder tuple, then the following methods must be implemented to read or write one code point from a stream:"
 { $subsection decode-char }
 { $subsection encode-char }
-"The following methods are optional:"
-{ $subsection <encoder> }
-{ $subsection <decoder> } ;
+{ $see-also "encodings-introduction" } ;

 HELP: decode-char
 { $values { "stream" "an underlying input stream" }
--- a/core/io/encodings/utf8/utf8-docs.factor
+++ b/core/io/encodings/utf8/utf8-docs.factor
@ -1,11 +1,8 @@
-USING: help.markup help.syntax io.encodings strings io.files ;
+USING: help.markup help.syntax ;
 IN: io.encodings.utf8

-ARTICLE: "io.encodings.utf8" "Working with UTF8-encoded data"
-"The UTF8 encoding is a variable-width encoding. 7-bit ASCII characters are encoded as single bytes, and other Unicode code points are encoded as 2 to 4 byte sequences. The encoding descriptor for UTF-8:"
-{ $subsection utf8 } ;
-
 HELP: utf8
-{ $class-description "This is the class of encoding tuples which denote a UTF-8 encoding. This conforms to the " { $link "encodings-protocol" } "." } ;
+{ $class-description "This is the encoding descriptor for a UTF-8 encoding. UTF-8 is a variable-width encoding. 7-bit ASCII characters are encoded as single bytes, and other Unicode code points are encoded as 2 to 4 byte sequences." }
+{ $see-also "encodings-introduction" } ;

-ABOUT: "io.encodings.utf8"
+ABOUT: utf8
--- a/core/io/files/files-tests.factor
+++ b/core/io/files/files-tests.factor
@ -178,10 +178,10 @@ io.files.unique sequences strings accessors ;
 [ "/lib/bux" ] [ "/usr" "/lib/bux" append-path ] unit-test
 [ "/lib/bux/" ] [ "/usr" "/lib/bux/" append-path ] unit-test

-[ "foo/" ] [ "foo/bar/." parent-directory ] unit-test
-[ "foo/" ] [ "foo/bar/./" parent-directory ] unit-test
-[ "foo/" ] [ "foo/bar/baz/.." parent-directory ] unit-test
-[ "foo/" ] [ "foo/bar/baz/../" parent-directory ] unit-test
+[ "foo/bar/." parent-directory ] must-fail
+[ "foo/bar/./" parent-directory ] must-fail
+[ "foo/bar/baz/.." parent-directory ] must-fail
+[ "foo/bar/baz/../" parent-directory ] must-fail

 [ "." parent-directory ] must-fail
 [ "./" parent-directory ] must-fail
@ -190,6 +190,8 @@ io.files.unique sequences strings accessors ;
 [ "../../" parent-directory ] must-fail
 [ "foo/.." parent-directory ] must-fail
 [ "foo/../" parent-directory ] must-fail
+[ "" parent-directory ] must-fail
+[ "." ] [ "boot.x86.64.image" parent-directory ] unit-test

 [ "bar/foo" ] [ "bar/baz" "..///foo" append-path ] unit-test
 [ "bar/baz/foo" ] [ "bar/baz" ".///foo" append-path ] unit-test
--- a/core/io/files/files.factor
+++ b/core/io/files/files.factor
@ -66,14 +66,12 @@ ERROR: no-parent-directory path ;
        right-trim-separators
        dup last-path-separator [
            1+ cut
-            {
-                { "." [ 1 head* parent-directory ] }
-                { ".." [
-                    2 head* parent-directory parent-directory
-                ] }
-                [ drop ]
-            } case
-        ] [ no-parent-directory ] if
+        ] [
+            drop "." swap
+        ] if
+        { "" "." ".." } member? [
+            no-parent-directory
+        ] when
    ] unless ;

 <PRIVATE
@ -157,6 +155,8 @@ HOOK: cwd io-backend ( -- path )

 SYMBOL: current-directory

+M: object cwd ( -- path ) "." ;
+
 [ cwd current-directory set-global ] "current-directory" add-init-hook

 : with-directory ( path quot -- )
@ -259,7 +259,7 @@ DEFER: copy-tree-into
    prepend-path ;

 : ?resource-path ( path -- newpath )
-    "resource:" ?head [ resource-path ] when ;
+    "resource:" ?head [ left-trim-separators resource-path ] when ;

 : resource-exists? ( path -- ? )
    ?resource-path exists? ;
--- a/core/io/io-tests.factor
+++ b/core/io/io-tests.factor
@ -9,7 +9,7 @@ IN: io.tests
 ] unit-test

 : <resource-reader> ( resource -- stream )
-    resource-path iso-8859-1 <file-reader> ;
+    resource-path latin1 <file-reader> ;

 [
    "This is a line.\rThis is another line.\r"
--- a/extra/help/handbook/handbook.factor
+++ b/extra/help/handbook/handbook.factor
@ -170,7 +170,17 @@ ARTICLE: "collections" "Collections"
 { $subsection "graphs" }
 { $subsection "buffers" } ;

-USING: io.sockets io.launcher io.mmap io.monitors ;
+USING: io.sockets io.launcher io.mmap io.monitors
+io.encodings.utf8 io.encodings.binary io.encodings.ascii io.files ;
+
+ARTICLE: "encodings-introduction" "An introduction to encodings"
+"In order to express text in terms of binary, some sort of encoding has to be used. In a modern context, this is understood as a two-way mapping between Unicode code points (characters) and some amount of binary. Since English isn't the only language in the world, ASCII is not sufficient as a mapping from binary to Unicode; it can't even express em-dashes or curly quotes. Unicode was designed as a universal character set that could potentially represent everything." $nl
+"Not all encodings can represent all Unicode code points, but Unicode can represent basically everything that exists in modern encodings. Some encodings are language-specific, and some can represent everything in Unicode. Though the world is moving toward Unicode and UTF-8, the reality today is that there are several encodings which must be taken into account." $nl
+"Factor uses a system of encoding descriptors to denote encodings. Encoding descriptors are objects which describe encodings. Examples are " { $link utf8 } ", " { $link ascii } " and " { $link binary } ". Encoding descriptors can be passed around independently. Each encoding descriptor has some method for constructing an encoded or decoded stream, and the resulting stream has an encoding descriptor stored which has methods for reading or writing characters." $nl
+"Constructors for streams which deal with bytes usually take an encoding as an explicit parameter. For example, to open a text file for reading whose contents are in UTF-8, use the following"
+{ $code "\"filename\" utf8 <file-reader>" }
+"If there is an error in the encoded stream, a replacement character (0xFFFD) will be inserted. To throw an exception upon error, use a strict encoding as follows"
+{ $code "\"filename\" utf8 strict <file-reader>" } ;

 ARTICLE: "io" "Input and output"
 { $heading "Streams" }
@ -188,6 +198,7 @@ ARTICLE: "io" "Input and output"
 { $subsection "io.mmap" }
 { $subsection "io.monitors" }
 { $heading "Encodings" }
+{ $subsection "encodings-introduction" }
 { $subsection "io.encodings" }
 { $subsection "io.encodings.string" }
 { $heading "Other features" }
--- a/extra/http/client/client.factor
+++ b/extra/http/client/client.factor
@ -52,7 +52,7 @@ PRIVATE>

 : http-request ( request -- response stream )
    dup request [
-        dup request-addr iso-8859-1 <client>
+        dup request-addr latin1 <client>
        1 minutes over set-timeout
        [
            write-request flush
@ -82,7 +82,7 @@ PRIVATE>
 : download-to ( url file -- )
    #! Downloads the contents of a URL to a file.
    swap http-get-stream swap check-response
-    [ swap iso-8859-1 <file-writer> stream-copy ] with-disposal ;
+    [ swap latin1 <file-writer> stream-copy ] with-disposal ;

 : download ( url -- )
    dup download-name download-to ;
--- a/extra/http/server/server.factor
+++ b/extra/http/server/server.factor
@ -217,7 +217,7 @@ SYMBOL: exit-continuation

 : httpd ( port -- )
    internet-server "http.server"
-    iso-8859-1 [ handle-client ] with-server ;
+    latin1 [ handle-client ] with-server ;

 : httpd-main ( -- ) 8888 httpd ;

--- a/extra/io/encodings/8-bit/8-bit-docs.factor
+++ b/extra/io/encodings/8-bit/8-bit-docs.factor
@ -0,0 +1,110 @@
+! Copyright (C) 2008 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.syntax help.markup io.encodings.8-bit.private ;
+IN: io.encodings.8-bit
+
+ARTICLE: "io.encodings.8-bit" "8-bit encodings"
+"Many encodings are a simple mapping of bytes onto characters. The " { $vocab-link "io.encodings.8-bit" } " vocabulary implements these generically using existing resource files. These encodings should be used with extreme caution, as fully general Unicode encodings like UTF-8 are nearly always more appropriate. The following 8-bit encodings are already defined:"
+{ $subsection latin1 }
+{ $subsection latin2 }
+{ $subsection latin3 }
+{ $subsection latin4 }
+{ $subsection latin/cyrillic }
+{ $subsection latin/arabic }
+{ $subsection latin/greek }
+{ $subsection latin/hebrew }
+{ $subsection latin5 }
+{ $subsection latin6 }
+{ $subsection latin/thai }
+{ $subsection latin7 }
+{ $subsection latin8 }
+{ $subsection latin9 }
+{ $subsection latin10 }
+{ $subsection koi8-r }
+{ $subsection windows-1252 }
+{ $subsection ebcdic }
+{ $subsection mac-roman }
+"Other encodings can be defined using the following utility"
+{ $subsection define-8-bit-encoding } ;
+
+ABOUT: "io.encodings.8-bit"
+
+HELP: define-8-bit-encoding
+{ $values { "name" "a string" } { "path" "a path" } }
+{ $description "Creates a new encoding with the given name, using the resource file at the path to tell how to encode and decode octets. The resource file should be in a similar format to those at ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/" } ;
+
+HELP: latin1
+{ $description "This is the ISO-8859-1 encoding, also called Latin-1: Western European. It is an 8-bit superset of ASCII which is the default for a mimetype starting with 'text' and provides the characters necessary for most western European languages." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin2
+{ $description "This is the ISO-8859-2 encoding, also called Latin-2: Eastern European. It is an 8-bit superset of ASCII and provides the characters necessary for most eastern European languages." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin3
+{ $description "This is the ISO-8859-3 encoding, also called Latin-3: South European. It is an 8-bit superset of ASCII and provides the characters necessary for Turkish, Maltese and Esperanto." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin4
+{ $description "This is the ISO-8859-4 encoding, also called Latin-4: North European. It is an 8-bit superset of ASCII and provides the characters necessary for Latvian, Lithuanian, Estonian, Greenlandic and Sami." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin/cyrillic
+{ $description "This is the ISO-8859-5 encoding, also called Latin/Cyrillic. It is an 8-bit superset of ASCII and provides the characters necessary for most languages which use Cyrilic, including Russian, Macedonian, Belarusian, Bulgarian, Serbian, and Ukrainian. KOI8-R is used much more commonly." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin/arabic
+{ $description "This is the ISO-8859-6 encoding, also called Latin/Arabic. It is an 8-bit superset of ASCII and provides the characters necessary for Arabic, though not other languages which use Arabic script." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin/greek
+{ $description "This is the ISO-8859-7 encoding, also called Latin/Greek. It is an 8-bit superset of ASCII and provides the characters necessary for Greek written in modern monotonic orthography, or ancient Greek without accent marks." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin/hebrew
+{ $description "This is the ISO-8859-8 encoding, also called Latin/Hebrew. It is an 8-bit superset of ASCII and provides the characters necessary for modern Hebrew without explicit vowels. Generally, this is interpreted in logical order, making it ISO-8859-8-I, technically." }
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin5
+{ $description "This is the ISO-8859-9 encoding, also called Latin-5: Turkish. It is an 8-bit superset of ASCII and provides the characters necessary for Turkish, similar to Latin-1 but replacing the spots used for Icelandic with characters used in Turkish." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin6
+{ $description "This is the ISO-8859-10 encoding, also called Latin-6: Nordic. It is an 8-bit superset of ASCII containing the same characters as Latin-4, but rearranged to be of better use to nordic languages." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin/thai
+{ $description "This is the ISO-8859-11 encoding, also called Latin/Thai. It is an 8-bit superset of ASCII containing the characters necessary to represent Thai. It is basically identical to TIS-620." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin7
+{ $description "This is the ISO-8859-13 encoding, also called Latin-7: Baltic Rim. It is an 8-bit superset of ASCII containing all characters necesary to represent Baltic Rim languages, as previous character sets were incomplete." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin8
+{ $description "This is the ISO-8859-14 encoding, also called Latin-8: Celtic. It is an 8-bit superset of ASCII designed for Celtic languages like Gaelic and Breton." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin9
+{ $description "This is the ISO-8859-15 encoding, also called Latin-9 and unoffically as Latin-0. It is an 8-bit superset of ASCII designed as a modification of Latin-1, removing little-used characters in favor of the Euro symbol and other characters." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: latin10
+{ $description "This is the ISO-8859-16 encoding, also called Latin-10: South-Eastern European. It is an 8-bit superset of ASCII." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: windows-1252
+{ $description "Windows 1252 is an 8-bit superset of ASCII which is closely related to Latin-1. Control characters in the 0x80 to 0x9F range are replaced with printable characters such as the Euro symbol." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: ebcdic
+{ $description "EBCDIC is an 8-bit legacy encoding designed for IBM mainframes like System/360 in the 1960s. It has since fallen into disuse. It contains large unallocated regions, and the version included here (code page 37) contains auxiliary characters in this region for English- and Portugese-speaking countries." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: mac-roman
+{ $description "Mac Roman is an 8-bit superset of ASCII which was the standard encoding on Mac OS prior to version 10. It is incompatible with Latin-1 in all but a few places and ASCII, and it is suitable for encoding many Western European languages." } 
+{ $see-also "encodings-introduction" } ;
+
+HELP: koi8-r
+{ $description "KOI8-R is an 8-bit superset of ASCII which encodes the Cyrillic alphabet, as used in Russian and Bulgarian. Characters are in such an order that, if the eight bit is stripped, text is still interpretable as ASCII. Block-building characters also exist." } 
+{ $see-also "encodings-introduction" } ;
--- a/extra/io/encodings/8-bit/8-bit-tests.factor
+++ b/extra/io/encodings/8-bit/8-bit-tests.factor
@ -1,10 +1,10 @@
 USING: io.encodings.string io.encodings.8-bit tools.test strings arrays ;
 IN: io.encodings.8-bit.tests

-[ B{ CHAR: f CHAR: o CHAR: o } ] [ "foo" iso-8859-1 encode ] unit-test
-[ { 256 } >string iso-8859-1 encode ] must-fail
-[ B{ 255 } ] [ { 255 } iso-8859-1 encode ] unit-test
+[ B{ CHAR: f CHAR: o CHAR: o } ] [ "foo" latin1 encode ] unit-test
+[ { 256 } >string latin1 encode ] must-fail
+[ B{ 255 } ] [ { 255 } latin1 encode ] unit-test

-[ "bar" ] [ "bar" iso-8859-1 decode ] unit-test
-[ { CHAR: b 233 CHAR: r } ] [ { CHAR: b 233 CHAR: r } iso-8859-1 decode >array ] unit-test
+[ "bar" ] [ "bar" latin1 decode ] unit-test
+[ { CHAR: b 233 CHAR: r } ] [ { CHAR: b 233 CHAR: r } latin1 decode >array ] unit-test
 [ { HEX: fffd HEX: 20AC } ] [ { HEX: 81 HEX: 80 } windows-1252 decode >array ] unit-test
--- a/extra/io/encodings/8-bit/8-bit.factor
+++ b/extra/io/encodings/8-bit/8-bit.factor
@ -9,21 +9,21 @@ IN: io.encodings.8-bit
 <PRIVATE

 : mappings {
-    { "iso-8859-1" "8859-1" }
-    { "iso-8859-2" "8859-2" }
-    { "iso-8859-3" "8859-3" }
-    { "iso-8859-4" "8859-4" }
-    { "iso-8859-5" "8859-5" }
-    { "iso-8859-6" "8859-6" }
-    { "iso-8859-7" "8859-7" }
-    { "iso-8859-8" "8859-8" }
-    { "iso-8859-9" "8859-9" }
-    { "iso-8859-10" "8859-10" }
-    { "iso-8859-11" "8859-11" }
-    { "iso-8859-13" "8859-13" }
-    { "iso-8859-14" "8859-14" }
-    { "iso-8859-15" "8859-15" }
-    { "iso-8859-16" "8859-16" }
+    { "latin1" "8859-1" }
+    { "latin2" "8859-2" }
+    { "latin3" "8859-3" }
+    { "latin4" "8859-4" }
+    { "latin/cyrillic" "8859-5" }
+    { "latin/arabic" "8859-6" }
+    { "latin/greek" "8859-7" }
+    { "latin/hebrew" "8859-8" }
+    { "latin5" "8859-9" }
+    { "latin6" "8859-10" }
+    { "latin/thai" "8859-11" }
+    { "latin7" "8859-13" }
+    { "latin8" "8859-14" }
+    { "latin9" "8859-15" }
+    { "latin10" "8859-16" }
    { "koi8-r" "KOI8-R" }
    { "windows-1252" "CP1252" }
    { "ebcdic" "CP037" }
@ -50,7 +50,7 @@ IN: io.encodings.8-bit
    [ swap ] assoc-map >hashtable ;

 : parse-file ( file-name -- byte>ch ch>byte )
-    full-path ascii file-lines process-contents
+    ascii file-lines process-contents
    [ byte>ch ] [ ch>byte ] bi ;

 : empty-tuple-class ( string -- class )
@ -85,9 +85,9 @@ IN: io.encodings.8-bit
 : 8-bit-methods ( class byte>ch ch>byte -- )
    >r over r> define-encode-char define-decode-char ;

-: define-8-bit-encoding ( tuple-name file-name -- )
+: define-8-bit-encoding ( name path -- )
    >r empty-tuple-class r> parse-file 8-bit-methods ;

 PRIVATE>

-[ mappings [ define-8-bit-encoding ] assoc-each ] with-compilation-unit
+[ mappings [ full-path define-8-bit-encoding ] assoc-each ] with-compilation-unit
--- a/extra/io/encodings/ascii/ascii-docs.factor
+++ b/extra/io/encodings/ascii/ascii-docs.factor
@ -0,0 +1,8 @@
+USING: help.markup help.syntax ;
+IN: io.encodings.ascii
+
+HELP: ascii
+{ $class-description "This is the encoding descriptor which denotes an ASCII encoding. By default, if there's a non-ASCII character in an input stream, it will be replaced with a replacement character (U+FFFD), and if a non-ASCII character is used in output, an exception is thrown." }
+{ $see-also "encodings-introduction" } ;
+
+ABOUT: ascii
--- a/extra/io/encodings/strict/strict-docs.factor
+++ b/extra/io/encodings/strict/strict-docs.factor
@ -0,0 +1,10 @@
+! Copyright (C) 2008 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.syntax help.markup ;
+IN: io.encodings.strict
+
+HELP: strict ( encoding -- strict-encoding )
+{ $values { "encoding" "an encoding descriptor" } { "strict-encoding" "a strict encoding descriptor" } }
+{ $description "Makes an encoding strict, that is, in the presence of a malformed code point, an error is thrown. Note that the existence of a replacement character in a file (U+FFFD) also throws an error." } ;
+
+ABOUT: strict
--- a/extra/io/encodings/utf16/utf16-docs.factor
+++ b/extra/io/encodings/utf16/utf16-docs.factor
@ -1,22 +1,25 @@
 USING: help.markup help.syntax io.encodings strings ;
 IN: io.encodings.utf16

-ARTICLE: "utf16" "Working with UTF-16-encoded data"
+ARTICLE: "io.encodings.utf16" "UTF-16"
 "The UTF-16 encoding is a variable-width encoding. Unicode code points are encoded as 2 or 4 byte sequences. There are three encoding descriptor classes for working with UTF-16, depending on endianness or the presence of a BOM:"
+{ $subsection utf16 }
 { $subsection utf16le }
 { $subsection utf16be }
-{ $subsection utf16 }
-"All of these conform to the " { $link "encodings-protocol" } "." ;
+{ $subsection utf16n } ;

-ABOUT: "utf16"
+ABOUT: "io.encodings.utf16"

 HELP: utf16le
-{ $class-description "The encoding protocol for UTF-16LE, that is, UTF-16 in little endian, without a byte order mark. Streams can be made which read or write wth this encoding." } ;
+{ $class-description "The encoding descriptor for UTF-16LE, that is, UTF-16 in little endian, without a byte order mark. Streams can be made which read or write wth this encoding." } ;

 HELP: utf16be
-{ $class-description "The encoding protocol for UTF-16BE, that is, UTF-16 in big endian, without a byte order mark. Streams can be made which read or write wth this encoding." } ;
+{ $class-description "The encoding descriptor for UTF-16BE, that is, UTF-16 in big endian, without a byte order mark. Streams can be made which read or write wth this encoding." } ;

 HELP: utf16
-{ $class-description "The encoding protocol for UTF-16, that is, UTF-16 with a byte order mark. This is the most useful for general input and output in UTF-16. Streams can be made which read or write wth this encoding." } ;
+{ $class-description "The encoding descriptor for UTF-16, that is, UTF-16 with a byte order mark. This is the most useful for general input and output in UTF-16. Streams can be made which read or write wth this encoding." } ;

-{ utf16 utf16le utf16be } related-words
+HELP: utf16n
+{ $class-description "The encoding descriptor for UTF-16 without a byte order mark in native endian order. This is useful mostly for FFI calls which take input of strings in of wide_t*." } ;
+
+{ utf16 utf16le utf16be utf16n } related-words
--- a/extra/io/encodings/utf16/utf16-tests.factor
+++ b/extra/io/encodings/utf16/utf16-tests.factor
@ -1,5 +1,7 @@
 USING: kernel tools.test io.encodings.utf16 arrays sbufs
-sequences io.encodings io unicode io.encodings.string ;
+io.streams.byte-array sequences io.encodings io unicode
+io.encodings.string alien.c-types accessors classes ;
+IN: io.encodings.utf16.tests

 [ { CHAR: x } ] [ { 0 CHAR: x } utf16be decode >array ] unit-test
 [ { HEX: 1D11E } ] [ { HEX: D8 HEX: 34 HEX: DD HEX: 1E } utf16be decode >array ] unit-test
@ -20,3 +22,9 @@ sequences io.encodings io unicode io.encodings.string ;
 [ { CHAR: x } ] [ { HEX: fe HEX: ff 0 CHAR: x } utf16 decode >array ] unit-test

 [ { HEX: ff HEX: fe 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16 encode >array ] unit-test
+
+: correct-endian
+    code>> class little-endian? [ utf16le = ] [ utf16be = ] if ;
+
+[ t ] [ B{ } utf16n <byte-reader> correct-endian ] unit-test
+[ t ] [ utf16n <byte-writer> correct-endian ] unit-test
--- a/extra/io/encodings/utf16/utf16.factor
+++ b/extra/io/encodings/utf16/utf16.factor
@ -1,7 +1,8 @@
 ! Copyright (C) 2006, 2008 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: math kernel sequences sbufs vectors namespaces io.binary
-io.encodings combinators splitting io byte-arrays inspector ;
+io.encodings combinators splitting io byte-arrays inspector
+alien.c-types ;
 IN: io.encodings.utf16

 TUPLE: utf16be ;
@ -10,6 +11,8 @@ TUPLE: utf16le ;

 TUPLE: utf16 ;

+TUPLE: utf16n ;
+
 <PRIVATE

 ! UTF-16BE decoding
@ -121,4 +124,13 @@ M: utf16 <decoder> ( stream utf16 -- decoder )
 M: utf16 <encoder> ( stream utf16 -- encoder )
    drop bom-le over stream-write utf16le <encoder> ;

+! Native-order UTF-16
+
+: native-utf16 ( -- descriptor )
+    little-endian? utf16le utf16be ? ;
+
+M: utf16n <decoder> drop native-utf16 <decoder> ;
+
+M: utf16n <encoder> drop native-utf16 <encoder> ;
+
 PRIVATE>
--- a/extra/io/unix/files/files-tests.factor
+++ b/extra/io/unix/files/files-tests.factor
@ -6,7 +6,6 @@ IN: io.unix.files.tests
 [ "/" ] [ "/etc/" parent-directory ] unit-test
 [ "/" ] [ "/etc" parent-directory ] unit-test
 [ "/" ] [ "/" parent-directory ] unit-test
-[ "asdf" parent-directory ] must-fail

 [ f ] [ "" root-directory? ] unit-test
 [ t ] [ "/" root-directory? ] unit-test
--- a/extra/multi-methods/multi-methods.factor
+++ b/extra/multi-methods/multi-methods.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2008 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: kernel math sequences vectors classes combinators
-arrays words assocs parser namespaces definitions
+USING: kernel math sequences vectors classes classes.algebra
+combinators arrays words assocs parser namespaces definitions
 prettyprint prettyprint.backend quotations arrays.lib
 debugger io compiler.units kernel.private effects ;
 IN: multi-methods
--- a/extra/openssl/openssl-docs.factor
+++ b/extra/openssl/openssl-docs.factor
@ -0,0 +1,10 @@
+
+USING: help.syntax help.markup ;
+
+IN: openssl
+
+ARTICLE: "openssl" "OpenSSL"
+
+"Factor on Windows has been tested with this version of OpenSSL: "
+
+{ $url "http://www.openssl.org/related/binaries.html" } ;
--- a/extra/peg/ebnf/ebnf.factor
+++ b/extra/peg/ebnf/ebnf.factor
@ -3,7 +3,7 @@
 USING: kernel compiler.units parser words arrays strings math.parser sequences 
       quotations vectors namespaces math assocs continuations peg
       peg.parsers unicode.categories multiline combinators.lib 
-       splitting ;
+       splitting accessors ;
 IN: peg.ebnf

 TUPLE: ebnf-non-terminal symbol ;
@ -16,7 +16,7 @@ TUPLE: ebnf-choice options ;
 TUPLE: ebnf-sequence elements ;
 TUPLE: ebnf-repeat0 group ;
 TUPLE: ebnf-repeat1 group ;
-TUPLE: ebnf-optional elements ;
+TUPLE: ebnf-optional group ;
 TUPLE: ebnf-rule symbol elements ;
 TUPLE: ebnf-action parser code ;
 TUPLE: ebnf rules ;
@ -198,7 +198,7 @@ DEFER: 'choice'
 
 : 'rule' ( -- parser )
  [
-    'non-terminal' [ ebnf-non-terminal-symbol ] action  ,
+    'non-terminal' [ symbol>> ] action  ,
    "=" syntax  ,
    'choice'  ,
  ] seq* [ first2 <ebnf-rule> ] action ;
@ -215,49 +215,53 @@ SYMBOL: main
  H{ } clone dup dup [ parser set swap (transform) main set ] bind ;

 M: ebnf (transform) ( ast -- parser )
-  ebnf-rules [ (transform) ] map peek ;
+  rules>> [ (transform) ] map peek ;
  
 M: ebnf-rule (transform) ( ast -- parser )
-  dup ebnf-rule-elements (transform) [
-    swap ebnf-rule-symbol set
+  dup elements>> (transform) [
+    swap symbol>> set
  ] keep ;

 M: ebnf-sequence (transform) ( ast -- parser )
-  ebnf-sequence-elements [ (transform) ] map seq ;
+  elements>> [ (transform) ] map seq ;

 M: ebnf-choice (transform) ( ast -- parser )
-  ebnf-choice-options [ (transform) ] map choice ;
+  options>> [ (transform) ] map choice ;

 M: ebnf-any-character (transform) ( ast -- parser )
  drop any-char ;

 M: ebnf-range (transform) ( ast -- parser )
-  ebnf-range-pattern range-pattern ;
+  pattern>> range-pattern ;
+
+: transform-group ( ast -- parser ) 
+  #! convert a ast node with groups to a parser for that group
+  group>> (transform) ;

 M: ebnf-ensure (transform) ( ast -- parser )
-  ebnf-ensure-group (transform) ensure ;
+  transform-group ensure ;

 M: ebnf-ensure-not (transform) ( ast -- parser )
-  ebnf-ensure-not-group (transform) ensure-not ;
+  transform-group ensure-not ;

 M: ebnf-repeat0 (transform) ( ast -- parser )
-  ebnf-repeat0-group (transform) repeat0 ;
+  transform-group repeat0 ;

 M: ebnf-repeat1 (transform) ( ast -- parser )
-  ebnf-repeat1-group (transform) repeat1 ;
+  transform-group repeat1 ;

 M: ebnf-optional (transform) ( ast -- parser )
-  ebnf-optional-elements (transform) optional ;
+  transform-group optional ;

 M: ebnf-action (transform) ( ast -- parser )
-  [ ebnf-action-parser (transform) ] keep
-  ebnf-action-code string-lines [ parse-lines ] with-compilation-unit action ;
+  [ parser>> (transform) ] keep
+  code>> string-lines [ parse-lines ] with-compilation-unit action ;

 M: ebnf-terminal (transform) ( ast -- parser )
-  ebnf-terminal-symbol token sp ;
+  symbol>> token sp ;

 M: ebnf-non-terminal (transform) ( ast -- parser )
-  ebnf-non-terminal-symbol  [
+  symbol>>  [
    , parser get , \ at ,  
  ] [ ] make delay sp ;

--- a/extra/peg/parsers/parsers-tests.factor
+++ b/extra/peg/parsers/parsers-tests.factor
@ -48,3 +48,7 @@ IN: peg.parsers.tests

 [ V{ } ]
 [ "" epsilon parse parse-result-ast ] unit-test
+
+{ "a" } [
+  "a" "a" token just parse parse-result-ast
+] unit-test
--- a/extra/peg/parsers/parsers.factor
+++ b/extra/peg/parsers/parsers.factor
@ -3,14 +3,14 @@
 USING: kernel sequences strings namespaces math assocs shuffle 
     vectors arrays combinators.lib math.parser match
     unicode.categories sequences.deep peg peg.private 
-     peg.search math.ranges ;
+     peg.search math.ranges words ;
 IN: peg.parsers

 TUPLE: just-parser p1 ;

 : just-pattern
  [
-    dup [
+    execute dup [
      dup parse-result-remaining empty? [ drop f ] unless
    ] when
  ] ;
--- a/extra/peg/peg.factor
+++ b/extra/peg/peg.factor
@ -3,7 +3,7 @@
 USING: kernel sequences strings namespaces math assocs shuffle 
       vectors arrays combinators.lib math.parser match
       unicode.categories sequences.lib compiler.units parser
-       words quotations effects memoize ;
+       words quotations effects memoize accessors combinators.cleave ;
 IN: peg

 TUPLE: parse-result remaining ast ;
@ -43,17 +43,16 @@ TUPLE: token-parser symbol ;

 MATCH-VARS: ?token ;

-: token-pattern ( -- quot )
-  [
-    ?token 2dup head? [
-      dup >r length tail-slice r> <parse-result>
-    ] [
-      2drop f
-    ] if 
-  ] ;
-  
+: parse-token ( input string -- result )
+  #! Parse the string, returning a parse result
+  2dup head? [
+    dup >r length tail-slice r> <parse-result>
+  ] [
+    2drop f
+  ] if ;
+
 M: token-parser (compile) ( parser -- quot )
-  token-parser-symbol \ ?token token-pattern match-replace ;
+  symbol>> [ parse-token ] curry ;
      
 TUPLE: satisfy-parser quot ;

@ -73,7 +72,7 @@ MATCH-VARS: ?quot ;
  ] ;

 M: satisfy-parser (compile) ( parser -- quot )
-  satisfy-parser-quot \ ?quot satisfy-pattern match-replace ;
+  quot>> \ ?quot satisfy-pattern match-replace ;

 TUPLE: range-parser min max ;

@ -101,12 +100,12 @@ TUPLE: seq-parser parsers ;
 : seq-pattern ( -- quot )
  [
    dup [
-      dup parse-result-remaining ?quot [
-        [ parse-result-remaining swap set-parse-result-remaining ] 2keep
-        parse-result-ast dup ignore = [ 
+      dup remaining>> ?quot [
+        [ remaining>> swap (>>remaining) ] 2keep
+        ast>> dup ignore = [ 
          drop  
        ] [ 
-          swap [ parse-result-ast push ] keep 
+          swap [ ast>> push ] keep 
        ] if
      ] [
        drop f 
@ -119,7 +118,7 @@ TUPLE: seq-parser parsers ;
 M: seq-parser (compile) ( parser -- quot )
  [
    [ V{ } clone <parse-result> ] %
-    seq-parser-parsers [ compiled-parser \ ?quot seq-pattern match-replace % ] each 
+    parsers>> [ compiled-parser \ ?quot seq-pattern match-replace % ] each 
  ] [ ] make ;

 TUPLE: choice-parser parsers ;
@ -136,16 +135,16 @@ TUPLE: choice-parser parsers ;
 M: choice-parser (compile) ( parser -- quot )
  [
    f ,
-    choice-parser-parsers [ compiled-parser \ ?quot choice-pattern match-replace % ] each
+    parsers>> [ compiled-parser \ ?quot choice-pattern match-replace % ] each
    \ nip ,
  ] [ ] make ;

 TUPLE: repeat0-parser p1 ;

 : (repeat0) ( quot result -- result )
-  2dup parse-result-remaining swap call [
-    [ parse-result-remaining swap set-parse-result-remaining ] 2keep 
-    parse-result-ast swap [ parse-result-ast push ] keep
+  2dup remaining>> swap call [
+    [ remaining>> swap (>>remaining) ] 2keep 
+    ast>> swap [ ast>> push ] keep
    (repeat0) 
 ] [
    nip
@ -159,7 +158,7 @@ TUPLE: repeat0-parser p1 ;
 M: repeat0-parser (compile) ( parser -- quot )
  [
    [ V{ } clone <parse-result> ] %
-    repeat0-parser-p1 compiled-parser \ ?quot repeat0-pattern match-replace %        
+    p1>> compiled-parser \ ?quot repeat0-pattern match-replace %        
  ] [ ] make ;

 TUPLE: repeat1-parser p1 ;
@ -167,7 +166,7 @@ TUPLE: repeat1-parser p1 ;
 : repeat1-pattern ( -- quot )
  [
    [ ?quot ] swap (repeat0) [
-      dup parse-result-ast empty? [
+      dup ast>> empty? [
        drop f
      ] when  
    ] [
@ -178,7 +177,7 @@ TUPLE: repeat1-parser p1 ;
 M: repeat1-parser (compile) ( parser -- quot )
  [
    [ V{ } clone <parse-result> ] %
-    repeat1-parser-p1 compiled-parser \ ?quot repeat1-pattern match-replace % 
+    p1>> compiled-parser \ ?quot repeat1-pattern match-replace % 
  ] [ ] make ;

 TUPLE: optional-parser p1 ;
@ -189,7 +188,7 @@ TUPLE: optional-parser p1 ;
  ] ;

 M: optional-parser (compile) ( parser -- quot )
-  optional-parser-p1 compiled-parser \ ?quot optional-pattern match-replace ;
+  p1>> compiled-parser \ ?quot optional-pattern match-replace ;

 TUPLE: ensure-parser p1 ;

@ -203,7 +202,7 @@ TUPLE: ensure-parser p1 ;
  ] ;

 M: ensure-parser (compile) ( parser -- quot )
-  ensure-parser-p1 compiled-parser \ ?quot ensure-pattern match-replace ;
+  p1>> compiled-parser \ ?quot ensure-pattern match-replace ;

 TUPLE: ensure-not-parser p1 ;

@ -217,7 +216,7 @@ TUPLE: ensure-not-parser p1 ;
  ] ;

 M: ensure-not-parser (compile) ( parser -- quot )
-  ensure-not-parser-p1 compiled-parser \ ?quot ensure-not-pattern match-replace ;
+  p1>> compiled-parser \ ?quot ensure-not-pattern match-replace ;

 TUPLE: action-parser p1 quot ;

@ -226,13 +225,13 @@ MATCH-VARS: ?action ;
 : action-pattern ( -- quot )
  [
    ?quot dup [ 
-      dup parse-result-ast ?action call
-      swap [ set-parse-result-ast ] keep
+      dup ast>> ?action call
+      >>ast
    ] when 
  ] ;

 M: action-parser (compile) ( parser -- quot )
-  { action-parser-p1 action-parser-quot } get-slots [ compiled-parser ] dip 
+  { [ p1>> ] [ quot>> ] } cleave [ compiled-parser ] dip 
  2array { ?quot ?action } action-pattern match-replace ;

 : left-trim-slice ( string -- string )
@ -246,7 +245,7 @@ TUPLE: sp-parser p1 ;

 M: sp-parser (compile) ( parser -- quot )
  [
-    \ left-trim-slice , sp-parser-p1 compiled-parser , 
+    \ left-trim-slice , p1>> compiled-parser , 
  ] [ ] make ;

 TUPLE: delay-parser quot ;
@ -256,7 +255,7 @@ M: delay-parser (compile) ( parser -- quot )
  #! This way it is run only once and the 
  #! parser constructed once at run time.
  [
-    delay-parser-quot % \ compile ,
+    quot>> % \ compile ,
  ] [ ] make 
  { } { "word" } <effect> memoize-quot 
  [ % \ execute , ] [ ] make ;
--- a/extra/ui/freetype/freetype.factor
+++ b/extra/ui/freetype/freetype.factor
@ -62,7 +62,7 @@ M: freetype-renderer free-fonts ( world -- )
    } at ;

 : ttf-path ( name -- string )
-    "/fonts/" swap ".ttf" 3append resource-path ;
+    "resource:fonts/" swap ".ttf" 3append ?resource-path ;

 : (open-face) ( path length -- face )
    #! We use FT_New_Memory_Face, not FT_New_Face, since
--- a/extra/unix/stat/linux/64/64.factor
+++ b/extra/unix/stat/linux/64/64.factor
@ -27,5 +27,5 @@ C-STRUCT: stat
 FUNCTION: int __xstat  ( int ver, char* pathname, stat* buf ) ;
 FUNCTION: int __lxstat ( int ver, char* pathname, stat* buf ) ;

-:  stat ( pathname buf -- int ) 3 -rot __xstat ;
-: lstat ( pathname buf -- int ) 3 -rot __lxstat ;
+:  stat ( pathname buf -- int ) 1 -rot __xstat ;
+: lstat ( pathname buf -- int ) 1 -rot __lxstat ;