Merge branch 'master' of git://factorcode.org/git/factor

db4
Doug Coleman 2009-03-20 22:01:14 -05:00
commit 50535b1a74
21 changed files with 13587 additions and 83 deletions

View File

@ -1,22 +1,32 @@
USING: help.markup help.syntax ;
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax assocs kernel sequences ;
IN: interval-maps
HELP: interval-at*
{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
{ $description "Looks up a key in an interval map, returning the corresponding value if the item is in an interval in the map, and a boolean flag. The operation takes O(log n) time." } ;
HELP: interval-at
{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } }
{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } }
{ $description "Looks up a key in an interval map, returning the value of the corresponding interval, or f if the interval is not present in the map." } ;
HELP: interval-key?
{ $values { "key" "an object" } { "map" "an interval map" } { "?" "a boolean" } }
{ $values { "key" object } { "map" interval-map } { "?" "a boolean" } }
{ $description "Tests whether an object is in an interval in the interval map, returning t if the object is present." } ;
HELP: <interval-map>
{ $values { "specification" "an assoc" } { "map" "an interval map" } }
{ $values { "specification" assoc } { "map" interval-map } }
{ $description "From a specification, produce an interval tree. The specification is an assoc where the keys are intervals, or pairs of numbers to represent intervals, or individual numbers to represent singleton intervals. The values are the values int he interval map. Construction time is O(n log n)." } ;
HELP: interval-values
{ $values { "map" interval-map } { "values" sequence } }
{ $description "Constructs a list of all of the values that interval map keys are associated with. This list may contain duplicates." } ;
HELP: coalesce
{ $values { "alist" "an association list with integer keys" } { "specification" { "array of the format used by " { $link <interval-map> } } } }
{ $description "Finds ranges used in the given alist, coalescing them into a single range." } ;
ARTICLE: "interval-maps" "Interval maps"
"The " { $vocab-link "interval-maps" } " vocabulary implements a data structure, similar to assocs, where a set of closed intervals of keys are associated with values. As such, interval maps do not conform to the assoc protocol, because intervals of floats, for example, can be used, and it is impossible to get a list of keys in between."
$nl
@ -24,7 +34,9 @@ $nl
{ $subsection interval-at* }
{ $subsection interval-at }
{ $subsection interval-key? }
{ $subsection interval-values }
"Use the following to construct interval maps"
{ $subsection <interval-map> } ;
{ $subsection <interval-map> }
{ $subsection coalesce } ;
ABOUT: "interval-maps"

View File

@ -8,17 +8,21 @@ TUPLE: interval-map array ;
<PRIVATE
ALIAS: start first
ALIAS: end second
ALIAS: value third
: find-interval ( key interval-map -- interval-node )
[ first <=> ] with search nip ;
array>> [ start <=> ] with search nip ;
: interval-contains? ( key interval-node -- ? )
first2 between? ;
[ start ] [ end ] bi between? ;
: all-intervals ( sequence -- intervals )
[ [ dup number? [ dup 2array ] when ] dip ] { } assoc-map-as ;
: disjoint? ( node1 node2 -- ? )
[ second ] [ first ] bi* < ;
[ end ] [ start ] bi* < ;
: ensure-disjoint ( intervals -- intervals )
dup [ disjoint? ] monotonic?
@ -30,14 +34,17 @@ TUPLE: interval-map array ;
PRIVATE>
: interval-at* ( key map -- value ? )
[ drop ] [ array>> find-interval ] 2bi
[ drop ] [ find-interval ] 2bi
[ nip ] [ interval-contains? ] 2bi
[ third t ] [ drop f f ] if ;
[ value t ] [ drop f f ] if ;
: interval-at ( key map -- value ) interval-at* drop ;
: interval-key? ( key map -- ? ) interval-at* nip ;
: interval-values ( map -- values )
array>> [ value ] map ;
: <interval-map> ( specification -- map )
all-intervals [ [ first second ] compare ] sort
>intervals ensure-disjoint interval-map boa ;

View File

@ -0,0 +1,208 @@
#
# Name: JIS X 0201 (1976) to Unicode 1.1 Table
# Unicode version: 1.1
# Table version: 0.9
# Table format: Format A
# Date: 8 March 1994
#
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
#
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
# No claims are made as to fitness for any particular purpose. No
# warranties of any kind are expressed or implied. The recipient
# agrees to determine applicability of information provided. If this
# file has been provided on magnetic media by Unicode, Inc., the sole
# remedy for any claim will be exchange of defective media within 90
# days of receipt.
#
# Recipient is granted the right to make copies in any form for
# internal distribution and to freely use the information supplied
# in the creation of products supporting Unicode. Unicode, Inc.
# specifically excludes the right to re-distribute this file directly
# to third parties or other organizations whether for profit or not.
#
# General notes:
#
#
# This table contains one set of mappings from JIS X 0201 into Unicode.
# Note that these data are *possible* mappings only and may not be the
# same as those used by actual products, nor may they be the best suited
# for all uses. For more information on the mappings between various code
# pages incorporating the repertoire of JIS X 0201 and Unicode, consult the
# VENDORS mapping data. Normative information on the mapping between
# JIS X 0201 and Unicode may be found in the Unihan.txt file in the
# latest Unicode Character Database.
#
# If you have carefully considered the fact that the mappings in
# this table are only one possible set of mappings between JIS X 0201 and
# Unicode and have no normative status, but still feel that you
# have located an error in the table that requires fixing, you may
# report any such error to errata@unicode.org.
#
#
# Format: Three tab-separated columns
# Column #1 is the shift JIS code (in hex as 0xXX)
# Column #2 is the Unicode (in hex as 0xXXXX)
# Column #3 the Unicode (ISO 10646) name (follows a comment sign)
#
# The entries are in JIS order
#
#
0x20 0x0020 # SPACE
0x21 0x0021 # EXCLAMATION MARK
0x22 0x0022 # QUOTATION MARK
0x23 0x0023 # NUMBER SIGN
0x24 0x0024 # DOLLAR SIGN
0x25 0x0025 # PERCENT SIGN
0x26 0x0026 # AMPERSAND
0x27 0x0027 # APOSTROPHE
0x28 0x0028 # LEFT PARENTHESIS
0x29 0x0029 # RIGHT PARENTHESIS
0x2A 0x002A # ASTERISK
0x2B 0x002B # PLUS SIGN
0x2C 0x002C # COMMA
0x2D 0x002D # HYPHEN-MINUS
0x2E 0x002E # FULL STOP
0x2F 0x002F # SOLIDUS
0x30 0x0030 # DIGIT ZERO
0x31 0x0031 # DIGIT ONE
0x32 0x0032 # DIGIT TWO
0x33 0x0033 # DIGIT THREE
0x34 0x0034 # DIGIT FOUR
0x35 0x0035 # DIGIT FIVE
0x36 0x0036 # DIGIT SIX
0x37 0x0037 # DIGIT SEVEN
0x38 0x0038 # DIGIT EIGHT
0x39 0x0039 # DIGIT NINE
0x3A 0x003A # COLON
0x3B 0x003B # SEMICOLON
0x3C 0x003C # LESS-THAN SIGN
0x3D 0x003D # EQUALS SIGN
0x3E 0x003E # GREATER-THAN SIGN
0x3F 0x003F # QUESTION MARK
0x40 0x0040 # COMMERCIAL AT
0x41 0x0041 # LATIN CAPITAL LETTER A
0x42 0x0042 # LATIN CAPITAL LETTER B
0x43 0x0043 # LATIN CAPITAL LETTER C
0x44 0x0044 # LATIN CAPITAL LETTER D
0x45 0x0045 # LATIN CAPITAL LETTER E
0x46 0x0046 # LATIN CAPITAL LETTER F
0x47 0x0047 # LATIN CAPITAL LETTER G
0x48 0x0048 # LATIN CAPITAL LETTER H
0x49 0x0049 # LATIN CAPITAL LETTER I
0x4A 0x004A # LATIN CAPITAL LETTER J
0x4B 0x004B # LATIN CAPITAL LETTER K
0x4C 0x004C # LATIN CAPITAL LETTER L
0x4D 0x004D # LATIN CAPITAL LETTER M
0x4E 0x004E # LATIN CAPITAL LETTER N
0x4F 0x004F # LATIN CAPITAL LETTER O
0x50 0x0050 # LATIN CAPITAL LETTER P
0x51 0x0051 # LATIN CAPITAL LETTER Q
0x52 0x0052 # LATIN CAPITAL LETTER R
0x53 0x0053 # LATIN CAPITAL LETTER S
0x54 0x0054 # LATIN CAPITAL LETTER T
0x55 0x0055 # LATIN CAPITAL LETTER U
0x56 0x0056 # LATIN CAPITAL LETTER V
0x57 0x0057 # LATIN CAPITAL LETTER W
0x58 0x0058 # LATIN CAPITAL LETTER X
0x59 0x0059 # LATIN CAPITAL LETTER Y
0x5A 0x005A # LATIN CAPITAL LETTER Z
0x5B 0x005B # LEFT SQUARE BRACKET
0x5C 0x00A5 # YEN SIGN
0x5D 0x005D # RIGHT SQUARE BRACKET
0x5E 0x005E # CIRCUMFLEX ACCENT
0x5F 0x005F # LOW LINE
0x60 0x0060 # GRAVE ACCENT
0x61 0x0061 # LATIN SMALL LETTER A
0x62 0x0062 # LATIN SMALL LETTER B
0x63 0x0063 # LATIN SMALL LETTER C
0x64 0x0064 # LATIN SMALL LETTER D
0x65 0x0065 # LATIN SMALL LETTER E
0x66 0x0066 # LATIN SMALL LETTER F
0x67 0x0067 # LATIN SMALL LETTER G
0x68 0x0068 # LATIN SMALL LETTER H
0x69 0x0069 # LATIN SMALL LETTER I
0x6A 0x006A # LATIN SMALL LETTER J
0x6B 0x006B # LATIN SMALL LETTER K
0x6C 0x006C # LATIN SMALL LETTER L
0x6D 0x006D # LATIN SMALL LETTER M
0x6E 0x006E # LATIN SMALL LETTER N
0x6F 0x006F # LATIN SMALL LETTER O
0x70 0x0070 # LATIN SMALL LETTER P
0x71 0x0071 # LATIN SMALL LETTER Q
0x72 0x0072 # LATIN SMALL LETTER R
0x73 0x0073 # LATIN SMALL LETTER S
0x74 0x0074 # LATIN SMALL LETTER T
0x75 0x0075 # LATIN SMALL LETTER U
0x76 0x0076 # LATIN SMALL LETTER V
0x77 0x0077 # LATIN SMALL LETTER W
0x78 0x0078 # LATIN SMALL LETTER X
0x79 0x0079 # LATIN SMALL LETTER Y
0x7A 0x007A # LATIN SMALL LETTER Z
0x7B 0x007B # LEFT CURLY BRACKET
0x7C 0x007C # VERTICAL LINE
0x7D 0x007D # RIGHT CURLY BRACKET
0x7E 0x203E # OVERLINE
0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP
0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET
0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET
0xA4 0xFF64 # HALFWIDTH IDEOGRAPHIC COMMA
0xA5 0xFF65 # HALFWIDTH KATAKANA MIDDLE DOT
0xA6 0xFF66 # HALFWIDTH KATAKANA LETTER WO
0xA7 0xFF67 # HALFWIDTH KATAKANA LETTER SMALL A
0xA8 0xFF68 # HALFWIDTH KATAKANA LETTER SMALL I
0xA9 0xFF69 # HALFWIDTH KATAKANA LETTER SMALL U
0xAA 0xFF6A # HALFWIDTH KATAKANA LETTER SMALL E
0xAB 0xFF6B # HALFWIDTH KATAKANA LETTER SMALL O
0xAC 0xFF6C # HALFWIDTH KATAKANA LETTER SMALL YA
0xAD 0xFF6D # HALFWIDTH KATAKANA LETTER SMALL YU
0xAE 0xFF6E # HALFWIDTH KATAKANA LETTER SMALL YO
0xAF 0xFF6F # HALFWIDTH KATAKANA LETTER SMALL TU
0xB0 0xFF70 # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
0xB1 0xFF71 # HALFWIDTH KATAKANA LETTER A
0xB2 0xFF72 # HALFWIDTH KATAKANA LETTER I
0xB3 0xFF73 # HALFWIDTH KATAKANA LETTER U
0xB4 0xFF74 # HALFWIDTH KATAKANA LETTER E
0xB5 0xFF75 # HALFWIDTH KATAKANA LETTER O
0xB6 0xFF76 # HALFWIDTH KATAKANA LETTER KA
0xB7 0xFF77 # HALFWIDTH KATAKANA LETTER KI
0xB8 0xFF78 # HALFWIDTH KATAKANA LETTER KU
0xB9 0xFF79 # HALFWIDTH KATAKANA LETTER KE
0xBA 0xFF7A # HALFWIDTH KATAKANA LETTER KO
0xBB 0xFF7B # HALFWIDTH KATAKANA LETTER SA
0xBC 0xFF7C # HALFWIDTH KATAKANA LETTER SI
0xBD 0xFF7D # HALFWIDTH KATAKANA LETTER SU
0xBE 0xFF7E # HALFWIDTH KATAKANA LETTER SE
0xBF 0xFF7F # HALFWIDTH KATAKANA LETTER SO
0xC0 0xFF80 # HALFWIDTH KATAKANA LETTER TA
0xC1 0xFF81 # HALFWIDTH KATAKANA LETTER TI
0xC2 0xFF82 # HALFWIDTH KATAKANA LETTER TU
0xC3 0xFF83 # HALFWIDTH KATAKANA LETTER TE
0xC4 0xFF84 # HALFWIDTH KATAKANA LETTER TO
0xC5 0xFF85 # HALFWIDTH KATAKANA LETTER NA
0xC6 0xFF86 # HALFWIDTH KATAKANA LETTER NI
0xC7 0xFF87 # HALFWIDTH KATAKANA LETTER NU
0xC8 0xFF88 # HALFWIDTH KATAKANA LETTER NE
0xC9 0xFF89 # HALFWIDTH KATAKANA LETTER NO
0xCA 0xFF8A # HALFWIDTH KATAKANA LETTER HA
0xCB 0xFF8B # HALFWIDTH KATAKANA LETTER HI
0xCC 0xFF8C # HALFWIDTH KATAKANA LETTER HU
0xCD 0xFF8D # HALFWIDTH KATAKANA LETTER HE
0xCE 0xFF8E # HALFWIDTH KATAKANA LETTER HO
0xCF 0xFF8F # HALFWIDTH KATAKANA LETTER MA
0xD0 0xFF90 # HALFWIDTH KATAKANA LETTER MI
0xD1 0xFF91 # HALFWIDTH KATAKANA LETTER MU
0xD2 0xFF92 # HALFWIDTH KATAKANA LETTER ME
0xD3 0xFF93 # HALFWIDTH KATAKANA LETTER MO
0xD4 0xFF94 # HALFWIDTH KATAKANA LETTER YA
0xD5 0xFF95 # HALFWIDTH KATAKANA LETTER YU
0xD6 0xFF96 # HALFWIDTH KATAKANA LETTER YO
0xD7 0xFF97 # HALFWIDTH KATAKANA LETTER RA
0xD8 0xFF98 # HALFWIDTH KATAKANA LETTER RI
0xD9 0xFF99 # HALFWIDTH KATAKANA LETTER RU
0xDA 0xFF9A # HALFWIDTH KATAKANA LETTER RE
0xDB 0xFF9B # HALFWIDTH KATAKANA LETTER RO
0xDC 0xFF9C # HALFWIDTH KATAKANA LETTER WA
0xDD 0xFF9D # HALFWIDTH KATAKANA LETTER N
0xDE 0xFF9E # HALFWIDTH KATAKANA VOICED SOUND MARK
0xDF 0xFF9F # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
Daniel Ehrenberg

View File

@ -0,0 +1,13 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup ;
IN: io.encodings.iso2022
HELP: iso2022
{ $class-description "This encoding class implements ISO 2022-JP-1, a Japanese text encoding commonly used for email." }
{ $see-also "encodings-introduction" } ;
ARTICLE: "io.encodings.iso2022" "ISO 2022-JP-1 encoding"
{ $subsection iso2022 } ;
ABOUT: "io.encodings.iso2022"

View File

@ -0,0 +1,36 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: io.encodings.string io.encodings.iso2022 tools.test
io.encodings.iso2022.private literals strings byte-arrays ;
IN: io.encodings.iso2022
[ "hello" ] [ "hello" >byte-array iso2022 decode ] unit-test
[ "hello" ] [ "hello" iso2022 encode >string ] unit-test
[ "hi" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: B CHAR: i } iso2022 decode ] unit-test
[ "hi" ] [ B{ CHAR: h CHAR: i $ ESC CHAR: ( CHAR: B } iso2022 decode ] unit-test
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i $ ESC CHAR: ( } iso2022 decode ] unit-test
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i $ ESC } iso2022 decode ] unit-test
[ B{ CHAR: h $ ESC CHAR: ( CHAR: J HEX: D8 } ] [ "h\u00ff98" iso2022 encode ] unit-test
[ "h\u00ff98" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: J HEX: D8 } iso2022 decode ] unit-test
[ "hi" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: J CHAR: i } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: J } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: J HEX: 80 } iso2022 decode ] unit-test
[ B{ CHAR: h $ ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } ] [ "h\u007126" iso2022 encode ] unit-test
[ "h\u007126" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: B HEX: 3E } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: B } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: B HEX: 80 HEX: 80 } iso2022 decode ] unit-test
[ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } ] [ "h\u0058ce" iso2022 encode ] unit-test
[ "h\u0058ce" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D HEX: 70 HEX: 70 } iso2022 decode ] unit-test
[ "\u{syriac-music}" iso2022 encode ] must-fail

View File

@ -0,0 +1,107 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: io.encodings kernel sequences io simple-flat-file sets math
combinators.short-circuit io.binary values arrays assocs
locals accessors combinators literals biassocs byte-arrays ;
IN: io.encodings.iso2022
SINGLETON: iso2022
<PRIVATE
VALUE: jis201
VALUE: jis208
VALUE: jis212
"vocab:io/encodings/iso2022/201.txt" flat-file>biassoc to: jis201
"vocab:io/encodings/iso2022/208.txt" flat-file>biassoc to: jis208
"vocab:io/encodings/iso2022/212.txt" flat-file>biassoc to: jis212
VALUE: ascii
128 unique >biassoc to: ascii
TUPLE: iso2022-state type ;
: make-iso-coder ( encoding -- state )
drop ascii iso2022-state boa ;
M: iso2022 <encoder>
make-iso-coder <encoder> ;
M: iso2022 <decoder>
make-iso-coder <decoder> ;
CONSTANT: ESC HEX: 16
CONSTANT: switch-ascii B{ $ ESC CHAR: ( CHAR: B }
CONSTANT: switch-jis201 B{ $ ESC CHAR: ( CHAR: J }
CONSTANT: switch-jis208 B{ $ ESC CHAR: $ CHAR: B }
CONSTANT: switch-jis212 B{ $ ESC CHAR: $ CHAR: ( CHAR: D }
: find-type ( char -- code type )
{
{ [ dup ascii value? ] [ drop switch-ascii ascii ] }
{ [ dup jis201 value? ] [ drop switch-jis201 jis201 ] }
{ [ dup jis208 value? ] [ drop switch-jis208 jis208 ] }
{ [ dup jis212 value? ] [ drop switch-jis212 jis212 ] }
[ encode-error ]
} cond ;
: stream-write-num ( num stream -- )
over 256 >=
[ [ h>b/b swap 2byte-array ] dip stream-write ]
[ stream-write1 ] if ;
M:: iso2022-state encode-char ( char stream encoding -- )
char encoding type>> value? [
char find-type
[ stream stream-write ]
[ encoding (>>type) ] bi*
] unless
char encoding type>> value-at stream stream-write-num ;
: read-escape ( stream -- type/f )
dup stream-read1 {
{ CHAR: ( [
stream-read1 {
{ CHAR: B [ ascii ] }
{ CHAR: J [ jis201 ] }
[ drop f ]
} case
] }
{ CHAR: $ [
dup stream-read1 {
{ CHAR: @ [ drop jis208 ] } ! want: JIS X 0208-1978
{ CHAR: B [ drop jis208 ] }
{ CHAR: ( [
stream-read1 CHAR: D = jis212 f ?
] }
[ 2drop f ]
} case
] }
[ 2drop f ]
} case ;
: double-width? ( type -- ? )
{ [ jis208 eq? ] [ jis212 eq? ] } 1|| ;
: finish-decode ( num encoding -- char )
type>> at replacement-char or ;
M:: iso2022-state decode-char ( stream encoding -- char )
stream stream-read1 {
{ ESC [
stream read-escape [
encoding (>>type)
stream encoding decode-char
] [ replacement-char ] if*
] }
{ f [ f ] }
[
encoding type>> double-width? [
stream stream-read1
[ 2byte-array be> encoding finish-decode ]
[ drop replacement-char ] if*
] [ encoding finish-decode ] if
]
} case ;

View File

@ -0,0 +1 @@
ISO-2022-JP-1 text encoding

View File

@ -76,6 +76,18 @@ ERROR: end-of-stream multipart ;
: empty-name? ( string -- ? )
{ "''" "\"\"" "" f } member? ;
: quote? ( ch -- ? ) "'\"" member? ;
: quoted? ( str -- ? )
{
[ length 1 > ]
[ first quote? ]
[ [ first ] [ peek ] bi = ]
} 1&& ;
: unquote ( str -- newstr )
dup quoted? [ but-last-slice rest-slice >string ] when ;
: save-uploaded-file ( multipart -- )
dup filename>> empty-name? [
drop

View File

@ -1 +0,0 @@
Doug Coleman

View File

@ -1,32 +0,0 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax strings ;
IN: quoting
HELP: quote?
{ $values
{ "ch" "a character" }
{ "?" "a boolean" }
}
{ $description "Returns true if the character is a single or double quote." } ;
HELP: quoted?
{ $values
{ "str" string }
{ "?" "a boolean" }
}
{ $description "Returns true if a string is surrounded by matching single or double quotes as the first and last characters." } ;
HELP: unquote
{ $values
{ "str" string }
{ "newstr" string }
}
{ $description "Removes a pair of matching single or double quotes from a string." } ;
ARTICLE: "quoting" "Quotation marks"
"The " { $vocab-link "quoting" } " vocabulary is for removing quotes from a string." $nl
"Removing quotes:"
{ $subsection unquote } ;
ABOUT: "quoting"

View File

@ -1,10 +0,0 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test quoting ;
IN: quoting.tests
[ "abc" ] [ "'abc'" unquote ] unit-test
[ "abc" ] [ "\"abc\"" unquote ] unit-test
[ "'abc" ] [ "'abc" unquote ] unit-test
[ "abc'" ] [ "abc'" unquote ] unit-test

View File

@ -1,16 +0,0 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: combinators.short-circuit kernel math sequences strings ;
IN: quoting
: quote? ( ch -- ? ) "'\"" member? ;
: quoted? ( str -- ? )
{
[ length 1 > ]
[ first quote? ]
[ [ first ] [ peek ] bi = ]
} 1&& ;
: unquote ( str -- newstr )
dup quoted? [ but-last-slice rest-slice >string ] when ;

View File

@ -108,21 +108,24 @@ M: terminator-class class-member? ( obj class -- ? )
M: f class-member? 2drop f ;
: same? ( obj1 obj2 quot1: ( obj1 -- val1 ) quot2: ( obj2 -- val2 ) -- ? )
bi* = ; inline
M: script-class class-member?
[ script-of ] [ script>> ] bi* = ;
[ script-of ] [ script>> ] same? ;
M: category-class class-member?
[ category# ] [ category>> ] bi* = ;
[ category ] [ category>> ] same? ;
M: category-range-class class-member?
[ category first ] [ category>> ] bi* = ;
[ category first ] [ category>> ] same? ;
TUPLE: not-class class ;
PREDICATE: not-integer < not-class class>> integer? ;
UNION: simple-class
primitive-class range-class category-class category-range-class dot ;
primitive-class range-class dot ;
PREDICATE: not-simple < not-class class>> simple-class? ;
M: not-class class-member?

View File

@ -2,7 +2,8 @@
! See http://factorcode.org/license.txt for BSD license.
USING: peg.ebnf kernel math.parser sequences assocs arrays fry math
combinators regexp.classes strings splitting peg locals accessors
regexp.ast unicode.case ;
regexp.ast unicode.case unicode.script.private unicode.categories
memoize interval-maps sets unicode.data combinators.short-circuit ;
IN: regexp.parser
: allowed-char? ( ch -- ? )
@ -18,15 +19,41 @@ ERROR: bad-number ;
ERROR: bad-class name ;
: simple ( str -- simple )
! Alternatively, first collation key level?
>case-fold [ " \t_" member? not ] filter ;
: simple-table ( seq -- table )
[ [ simple ] keep ] H{ } map>assoc ;
MEMO: simple-script-table ( -- table )
script-table interval-values prune simple-table ;
MEMO: simple-category-table ( -- table )
categories simple-table ;
: parse-unicode-class ( name -- class )
! Implement this!
drop f ;
{
{ [ dup { [ length 1 = ] [ first "clmnpsz" member? ] } 1&& ] [
>upper first
<category-range-class>
] }
{ [ dup >title categories member? ] [
simple-category-table at <category-class>
] }
{ [ "script=" ?head ] [
dup simple-script-table at
[ <script-class> ]
[ "script=" prepend bad-class ] ?if
] }
[ bad-class ]
} cond ;
: unicode-class ( name -- class )
dup parse-unicode-class [ ] [ bad-class ] ?if ;
: name>class ( name -- class )
>string >case-fold {
>string simple {
{ "lower" letter-class }
{ "upper" LETTER-class }
{ "alpha" Letter-class }

View File

@ -72,8 +72,10 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
{ { $snippet "\\p{blank}" } "Non-newline whitespace" }
{ { $snippet "\\p{cntrl}" } "Control character" }
{ { $snippet "\\p{space}" } "Whitespace" }
{ { $snippet "\\p{xdigit}" } "Hexidecimal digit" } } ! In the future: Unicode
"Full unicode properties are not yet supported."
{ { $snippet "\\p{xdigit}" } "Hexidecimal digit" }
{ { $snippet "\\p{Nd}" } "Character in Unicode category Nd" }
{ { $snippet "\\p{Z}" } "Character in Unicode category beginning with Z" }
{ { $snippet "\\p{script=Cham}" } "Character in the Cham writing system" } }
{ $heading "Boundaries" }
"Special operators exist to match certain points in the string. These are called 'zero-width' because they do not consume any characters."
{ $table

View File

@ -480,3 +480,31 @@ IN: regexp-tests
[ f ] [ "a\r" R/ a$./mds matches? ] unit-test
[ t ] [ "a\n" R/ a$./ms matches? ] unit-test
[ t ] [ "a\n" R/ a$./mds matches? ] unit-test
! Unicode categories
[ t ] [ "a" R/ \p{L}/ matches? ] unit-test
[ t ] [ "A" R/ \p{L}/ matches? ] unit-test
[ f ] [ " " R/ \p{L}/ matches? ] unit-test
[ f ] [ "a" R/ \P{L}/ matches? ] unit-test
[ f ] [ "A" R/ \P{L}/ matches? ] unit-test
[ t ] [ " " R/ \P{L}/ matches? ] unit-test
[ t ] [ "a" R/ \p{Ll}/ matches? ] unit-test
[ f ] [ "A" R/ \p{Ll}/ matches? ] unit-test
[ f ] [ " " R/ \p{Ll}/ matches? ] unit-test
[ f ] [ "a" R/ \P{Ll}/ matches? ] unit-test
[ t ] [ "A" R/ \P{Ll}/ matches? ] unit-test
[ t ] [ " " R/ \P{Ll}/ matches? ] unit-test
[ t ] [ "a" R/ \p{script=Latin}/ matches? ] unit-test
[ f ] [ " " R/ \p{script=Latin}/ matches? ] unit-test
[ f ] [ "a" R/ \P{script=Latin}/ matches? ] unit-test
[ t ] [ " " R/ \P{script=Latin}/ matches? ] unit-test
! These should be case-insensitive
[ f ] [ " " R/ \p{l}/ matches? ] unit-test
[ f ] [ "a" R/ \P{l}/ matches? ] unit-test
[ f ] [ "a" R/ \P{ll}/ matches? ] unit-test
[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test

View File

@ -139,10 +139,10 @@ check_library_exists() {
}
check_X11_libraries() {
check_library_exists freetype
check_library_exists GLU
check_library_exists GL
check_library_exists X11
check_library_exists pango
}
check_libraries() {
@ -502,7 +502,7 @@ make_boot_image() {
}
install_build_system_apt() {
sudo apt-get --yes install libc6-dev libpango-1.0-dev libx11-dev xorg-dev glutg3-dev wget git-core git-doc rlwrap gcc make
sudo apt-get --yes install libc6-dev libpango1.0-dev libx11-dev xorg-dev glutg3-dev wget git-core git-doc rlwrap gcc make
check_ret sudo
}

View File

@ -48,6 +48,6 @@ IN: benchmark.spectral-norm
HINTS: spectral-norm fixnum ;
: spectral-norm-main ( -- )
5500 spectral-norm . ;
2000 spectral-norm . ;
MAIN: spectral-norm-main