From 0ab7ed6979f024482c72c13eb2285381e78742a8 Mon Sep 17 00:00:00 2001 From: John Benediktsson Date: Wed, 28 Feb 2018 08:55:15 -0800 Subject: [PATCH] io.encodings: use new latin1 instead of 8-bit encoding from file. --- basis/ftp/server/server.factor | 2 +- basis/http/client/client-docs.factor | 2 +- basis/http/http-tests.factor | 2 +- basis/io/encodings/8-bit/8-bit-tests.factor | 2 + basis/io/encodings/8-bit/8-bit.factor | 37 ++++++------------- .../io/sockets/secure/openssl/openssl.factor | 2 +- basis/io/streams/limited/limited-tests.factor | 2 +- .../quoted-printable-tests.factor | 2 +- basis/tools/deploy/test/4/4.factor | 2 +- core/alien/strings/strings-tests.factor | 2 +- core/alien/strings/strings.factor | 10 ++--- core/io/files/files-tests.factor | 4 +- extra/geobytes/geobytes.factor | 2 +- extra/graphviz/graphviz-tests.factor | 4 +- extra/graphviz/render/render-docs.factor | 2 +- extra/graphviz/render/render.factor | 2 +- extra/images/bitmap/bitmap.factor | 2 +- extra/images/png/png.factor | 8 ++-- extra/io/encodings/detect/detect-tests.factor | 4 +- extra/io/encodings/detect/detect.factor | 17 ++++----- 20 files changed, 49 insertions(+), 61 deletions(-) diff --git a/basis/ftp/server/server.factor b/basis/ftp/server/server.factor index dbf7a6affb..1bd87be393 100644 --- a/basis/ftp/server/server.factor +++ b/basis/ftp/server/server.factor @@ -3,7 +3,7 @@ USING: accessors calendar calendar.format classes combinators combinators.short-circuit concurrency.promises continuations destructors ftp io io.directories io.encodings -io.encodings.8-bit io.encodings.binary io.encodings.utf8 +io.encodings.binary io.encodings.latin1 io.encodings.utf8 io.files io.files.info io.pathnames io.servers io.sockets io.streams.string io.timeouts kernel logging math math.bitwise math.parser namespaces sequences simple-tokenizer splitting diff --git a/basis/http/client/client-docs.factor b/basis/http/client/client-docs.factor index a6e7996fd5..d846693adb 100644 --- a/basis/http/client/client-docs.factor +++ b/basis/http/client/client-docs.factor @@ -1,6 +1,6 @@ USING: assocs byte-arrays destructors help.markup help.syntax http http.client.post-data.private http.client.private -io.encodings.8-bit io.encodings.binary io.pathnames kernel +io.encodings.binary io.encodings.latin1 io.pathnames kernel sequences strings urls urls.encoding ; IN: http.client diff --git a/basis/http/http-tests.factor b/basis/http/http-tests.factor index ab83ba1f86..fc6a896c65 100644 --- a/basis/http/http-tests.factor +++ b/basis/http/http-tests.factor @@ -1,6 +1,6 @@ USING: destructors http http.server http.server.requests http.client http.client.private tools.test multiline fry io.streams.string io.crlf -io.encodings.utf8 io.encodings.8-bit io.encodings.binary io.encodings.string +io.encodings.utf8 io.encodings.latin1 io.encodings.binary io.encodings.string io.encodings.ascii kernel arrays splitting sequences assocs io.sockets db db.sqlite make continuations urls hashtables accessors namespaces xml.data random combinators.short-circuit literals ; diff --git a/basis/io/encodings/8-bit/8-bit-tests.factor b/basis/io/encodings/8-bit/8-bit-tests.factor index 758b0ef48b..d6309b5656 100644 --- a/basis/io/encodings/8-bit/8-bit-tests.factor +++ b/basis/io/encodings/8-bit/8-bit-tests.factor @@ -11,3 +11,5 @@ io.encodings.8-bit.private strings tools.test ; { t } [ \ latin1 8-bit-encoding? ] unit-test { "bar" } [ "bar" \ latin1 decode ] unit-test + +{ { 0x221a 0x00b1 0x0040 } } [ B{ 0xfb 0xf1 0x40 } cp437 decode >array ] unit-test diff --git a/basis/io/encodings/8-bit/8-bit.factor b/basis/io/encodings/8-bit/8-bit.factor index e2ac549f8c..73e17ba46c 100644 --- a/basis/io/encodings/8-bit/8-bit.factor +++ b/basis/io/encodings/8-bit/8-bit.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors assocs biassocs classes.mixin classes.singleton -fry io io.encodings io.encodings.iana kernel lexer namespaces -parser sequences simple-flat-file ; +USING: accessors assocs biassocs classes.singleton generic io +io.encodings io.encodings.iana kernel lexer parser sequences +simple-flat-file words ; IN: io.encodings.8-bit << @@ -11,41 +11,28 @@ IN: io.encodings.8-bit : encoding-file ( file-name -- stream ) "vocab:io/encodings/8-bit/" ".TXT" surround ; -SYMBOL: 8-bit-encodings -8-bit-encodings [ H{ } clone ] initialize - -TUPLE: 8-bit { biassoc biassoc read-only } ; +TUPLE: 8-bit { table biassoc read-only } ; : 8-bit-encode ( char 8-bit -- byte ) - biassoc>> value-at [ encode-error ] unless* ; inline + table>> value-at [ encode-error ] unless* ; inline M: 8-bit encode-char swap [ 8-bit-encode ] dip stream-write1 ; M: 8-bit decode-char - swap stream-read1 - [ swap biassoc>> at [ replacement-char ] unless* ] - [ drop f ] if* ; - -MIXIN: 8-bit-encoding - -M: 8-bit-encoding - 8-bit-encodings get-global at ; - -M: 8-bit-encoding - 8-bit-encodings get-global at ; + swap stream-read1 [ + swap table>> at [ replacement-char ] unless* + ] [ drop f ] if* ; : create-encoding ( name -- word ) - create-word-in - [ define-singleton-class ] - [ 8-bit-encoding add-mixin-instance ] - [ ] tri ; + create-word-in dup define-singleton-class ; : load-encoding ( name iana-name file-name -- ) [ create-encoding dup ] [ register-encoding ] [ encoding-file load-codetable-file 8-bit boa ] tri* - swap 8-bit-encodings get-global set-at ; + [ [ \ create-method ] dip [ nip ] curry define ] + [ [ \ create-method ] dip [ nip ] curry define ] 2bi ; PRIVATE> @@ -75,7 +62,7 @@ SYNTAX: 8-BIT: scan-token scan-token scan-token load-encoding ; 8-BIT: latin/greek ISO_8859-7:1987 8859-7 8-BIT: latin/hebrew ISO_8859-8:1988 8859-8 8-BIT: latin/thai TIS-620 8859-11 -8-BIT: latin1 ISO_8859-1:1987 8859-1 +! 8-BIT: latin1 ISO_8859-1:1987 8859-1 8-BIT: latin2 ISO_8859-2:1987 8859-2 8-BIT: latin3 ISO_8859-3:1988 8859-3 8-BIT: latin4 ISO_8859-4:1988 8859-4 diff --git a/basis/io/sockets/secure/openssl/openssl.factor b/basis/io/sockets/secure/openssl/openssl.factor index 9d1fb96f77..7ee03d3adf 100644 --- a/basis/io/sockets/secure/openssl/openssl.factor +++ b/basis/io/sockets/secure/openssl/openssl.factor @@ -3,7 +3,7 @@ USING: accessors alien alien.c-types alien.data alien.enums alien.strings assocs byte-arrays classes.struct combinators combinators.short-circuit destructors fry io io.backend -io.binary io.buffers io.encodings.8-bit io.encodings.utf8 +io.binary io.buffers io.encodings.latin1 io.encodings.utf8 io.files io.pathnames io.ports io.sockets io.sockets.secure io.timeouts kernel libc locals math math.functions math.order math.parser memoize namespaces openssl openssl.libcrypto diff --git a/basis/io/streams/limited/limited-tests.factor b/basis/io/streams/limited/limited-tests.factor index 59a7530c1f..6437761079 100644 --- a/basis/io/streams/limited/limited-tests.factor +++ b/basis/io/streams/limited/limited-tests.factor @@ -1,4 +1,4 @@ -USING: destructors io io.encodings io.encodings.8-bit +USING: destructors io io.encodings io.encodings.latin1 io.encodings.ascii io.encodings.binary io.encodings.string io.encodings.utf8 io.files io.pipes io.streams.byte-array io.streams.duplex io.streams.limited io.streams.string kernel diff --git a/basis/quoted-printable/quoted-printable-tests.factor b/basis/quoted-printable/quoted-printable-tests.factor index 2e51891fbb..c389045aba 100644 --- a/basis/quoted-printable/quoted-printable-tests.factor +++ b/basis/quoted-printable/quoted-printable-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: tools.test quoted-printable io.encodings.string -sequences splitting kernel io.encodings.8-bit ; +sequences splitting kernel io.encodings.latin1 ; IN: quoted-printable.tests { "José was the diff --git a/basis/tools/deploy/test/4/4.factor b/basis/tools/deploy/test/4/4.factor index 2527dd5a17..30f5b8c4da 100644 --- a/basis/tools/deploy/test/4/4.factor +++ b/basis/tools/deploy/test/4/4.factor @@ -2,6 +2,6 @@ USING: io.encodings.string kernel io.encodings.8-bit ; IN: tools.deploy.test.4 : deploy-test-4 ( -- ) - "xyzthg" \ latin7 encode drop ; + "xyzthg" latin7 encode drop ; MAIN: deploy-test-4 diff --git a/core/alien/strings/strings-tests.factor b/core/alien/strings/strings-tests.factor index dda7aafd58..03d29738be 100644 --- a/core/alien/strings/strings-tests.factor +++ b/core/alien/strings/strings-tests.factor @@ -1,6 +1,6 @@ USING: alien.strings alien.c-types alien.data tools.test kernel libc io.encodings.utf8 io.encodings.utf16 io.encodings.utf16n -io.encodings.ascii alien io.encodings.string io.encodings.8-bit ; +io.encodings.ascii alien io.encodings.string io.encodings.latin1 ; { "\u0000ff" } [ "\u0000ff" latin1 string>alien latin1 alien>string ] diff --git a/core/alien/strings/strings.factor b/core/alien/strings/strings.factor index 74fdfb5637..e2bb70bcd4 100644 --- a/core/alien/strings/strings.factor +++ b/core/alien/strings/strings.factor @@ -1,10 +1,10 @@ ! Copyright (C) 2008, 2011 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien arrays byte-arrays byte-vectors init io -io.encodings io.encodings.ascii io.encodings.utf16n -io.encodings.utf8 io.streams.memory kernel kernel.private math -namespaces sequences sequences.private strings strings.private -system system.private ; +io.encodings io.encodings.ascii io.encodings.latin1 +io.encodings.utf16n io.encodings.utf8 io.streams.memory kernel +kernel.private math namespaces sequences sequences.private +strings strings.private system system.private ; IN: alien.strings GENERIC#: alien>string 1 ( c-ptr encoding -- string/f ) @@ -31,7 +31,7 @@ M: c-ptr string>alien drop ; > not [ { ascii utf8 } member-eq? ] [ drop f ] if ; inline + swap aux>> not [ { ascii latin1 utf8 } member-eq? ] [ drop f ] if ; inline : string>alien-fast ( string encoding -- byte-array ) { string object } declare ! aux>> must be f diff --git a/core/io/files/files-tests.factor b/core/io/files/files-tests.factor index ba880b1b32..4024e5ed5c 100644 --- a/core/io/files/files-tests.factor +++ b/core/io/files/files-tests.factor @@ -1,7 +1,7 @@ USING: alien alien.c-types alien.data arrays classes.struct compiler.units continuations destructors fry generic.single io -io.backend io.directories io.encodings io.encodings.8-bit -io.encodings.ascii io.encodings.binary io.encodings.string +io.backend io.directories io.encodings io.encodings.ascii +io.encodings.binary io.encodings.latin1 io.encodings.string io.encodings.utf16 io.encodings.utf8 io.files io.files.private io.pathnames kernel locals make math sequences specialized-arrays system threads tools.test vocabs ; diff --git a/extra/geobytes/geobytes.factor b/extra/geobytes/geobytes.factor index 567523eb7f..2859c862b8 100644 --- a/extra/geobytes/geobytes.factor +++ b/extra/geobytes/geobytes.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2009 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: combinators combinators.smart csv io.encodings.8-bit +USING: combinators combinators.smart csv io.encodings.latin1 kernel math.parser memoize money sequences unicode ; IN: geobytes diff --git a/extra/graphviz/graphviz-tests.factor b/extra/graphviz/graphviz-tests.factor index 4d37f6f9a2..b5b58fb465 100644 --- a/extra/graphviz/graphviz-tests.factor +++ b/extra/graphviz/graphviz-tests.factor @@ -1,7 +1,7 @@ USING: accessors arrays assocs continuations formatting graphviz graphviz.notation graphviz.render graphviz.render.private -images.loader.private io.directories io.encodings.8-bit -io.encodings.ascii io.encodings.utf8 io.files io.launcher kernel +images.loader.private io.directories io.encodings.ascii +io.encodings.latin1 io.encodings.utf8 io.files io.launcher kernel locals make math math.combinatorics math.parser namespaces sequences sequences.extras sets splitting system tools.test ; IN: graphviz.tests diff --git a/extra/graphviz/render/render-docs.factor b/extra/graphviz/render/render-docs.factor index 230bcb0ed8..2f75dcb8bd 100644 --- a/extra/graphviz/render/render-docs.factor +++ b/extra/graphviz/render/render-docs.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2012 Alex Vondrak. ! See http://factorcode.org/license.txt for BSD license. USING: graphviz help.markup help.syntax images.viewer -io.encodings.8-bit io.encodings.utf8 io.launcher kernel +io.encodings.latin1 io.encodings.utf8 io.launcher kernel strings ; IN: graphviz.render diff --git a/extra/graphviz/render/render.factor b/extra/graphviz/render/render.factor index 2b96d8a441..ea077a15ec 100644 --- a/extra/graphviz/render/render.factor +++ b/extra/graphviz/render/render.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: calendar combinators compiler.units continuations graphviz.dot images.viewer io.backend io.directories -io.encodings.8-bit io.encodings.utf8 io.files +io.encodings.latin1 io.encodings.utf8 io.files io.files.temp io.files.unique io.launcher io.standard-paths kernel locals make namespaces sequences summary system threads unicode vocabs webbrowser words ; diff --git a/extra/images/bitmap/bitmap.factor b/extra/images/bitmap/bitmap.factor index ee514bc9dc..8c9de5005b 100644 --- a/extra/images/bitmap/bitmap.factor +++ b/extra/images/bitmap/bitmap.factor @@ -3,7 +3,7 @@ USING: accessors alien.c-types alien.data arrays byte-arrays combinators compression.run-length fry grouping images images.loader images.normalization io io.binary -io.encodings.8-bit io.encodings.string io.streams.throwing +io.encodings.latin1 io.encodings.string io.streams.throwing kernel math math.bitwise sequences specialized-arrays summary ; QUALIFIED-WITH: bitstreams b SPECIALIZED-ARRAYS: uint ushort ; diff --git a/extra/images/png/png.factor b/extra/images/png/png.factor index eff32f4736..c5a195d546 100644 --- a/extra/images/png/png.factor +++ b/extra/images/png/png.factor @@ -2,10 +2,10 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays assocs checksums checksums.crc32 combinators compression.inflate fry grouping images -images.loader io io.binary io.encodings.8-bit io.encodings.ascii -io.encodings.binary io.encodings.string io.streams.byte-array -io.streams.throwing kernel locals math math.bitwise -math.functions sequences sorting ; +images.loader io io.binary io.encodings.ascii +io.encodings.binary io.encodings.latin1 io.encodings.string +io.streams.byte-array io.streams.throwing kernel locals math +math.bitwise math.functions sequences sorting ; QUALIFIED: bitstreams IN: images.png diff --git a/extra/io/encodings/detect/detect-tests.factor b/extra/io/encodings/detect/detect-tests.factor index 61e4a4ff41..5a8c00fcb2 100644 --- a/extra/io/encodings/detect/detect-tests.factor +++ b/extra/io/encodings/detect/detect-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2010 Joe Groff. ! See http://factorcode.org/license.txt for BSD license. -USING: byte-arrays io.encodings.8-bit io.encodings.binary -io.encodings.detect io.encodings.utf16 io.encodings.utf32 +USING: byte-arrays io.encodings.binary io.encodings.detect +io.encodings.latin1 io.encodings.utf16 io.encodings.utf32 io.encodings.utf8 namespaces tools.test ; ! UTF encodings with BOMs diff --git a/extra/io/encodings/detect/detect.factor b/extra/io/encodings/detect/detect.factor index 370100d9d5..2f1ab0572f 100644 --- a/extra/io/encodings/detect/detect.factor +++ b/extra/io/encodings/detect/detect.factor @@ -1,15 +1,14 @@ ! Copyright (C) 2010 Joe Groff. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays combinators continuations fry io -io.encodings io.encodings.8-bit io.encodings.ascii -io.encodings.binary io.encodings.iana io.encodings.string -io.encodings.utf16 io.encodings.utf32 io.encodings.utf8 -io.files io.streams.string kernel literals math namespaces -sequences strings ; +USING: byte-arrays combinators continuations fry io io.encodings +io.encodings.ascii io.encodings.binary io.encodings.iana +io.encodings.latin1 io.encodings.string io.encodings.utf16 +io.encodings.utf32 io.encodings.utf8 io.files kernel literals +math namespaces sequences strings ; IN: io.encodings.detect -SYMBOL: default-8bit-encoding -default-8bit-encoding [ latin1 ] initialize +SYMBOL: default-encoding +default-encoding [ latin1 ] initialize { [ 0 over member? ] [ drop binary ] } { [ dup empty? ] [ drop utf8 ] } { [ dup valid-utf8? ] [ drop utf8 ] } - [ drop default-8bit-encoding get ] + [ drop default-encoding get ] } cond ; : detect-stream ( stream -- sample encoding )