Merge branch 'master' of git://factorcode.org/git/factor into clean-linux-x86-32

2009-03-23 03:22:13 -04:00 · 2009-03-23 03:22:13 -04:00 · 2c19aa1f25
parent 83592b7d84 a38e9c2e79
commit 2c19aa1f25
130 changed files with 14201 additions and 618 deletions
--- a/basis/alien/destructors/destructors.factor
+++ b/basis/alien/destructors/destructors.factor
@ -26,4 +26,4 @@ M: F-destructor dispose* alien>> F ;

 ;FUNCTOR

-: DESTRUCTOR: scan-word define-destructor ; parsing
+SYNTAX: DESTRUCTOR: scan-word define-destructor ;
--- a/basis/alien/fortran/fortran.factor
+++ b/basis/alien/fortran/fortran.factor
@ -416,7 +416,7 @@ PRIVATE>
 : define-fortran-record ( name vocab fields -- )
    [ >lower ] [ ] [ fortran-record>c-struct ] tri* define-struct ;

-: RECORD: scan in get parse-definition define-fortran-record ; parsing
+SYNTAX: RECORD: scan in get parse-definition define-fortran-record ;

 : set-fortran-abi ( library -- )
    library-fortran-abis get-global at fortran-abi set ;
@ -437,16 +437,16 @@ MACRO: fortran-invoke ( return library function parameters -- )
    return library function parameters return [ "void" ] unless* parse-arglist
    [ \ fortran-invoke 5 [ ] nsequence ] dip define-declared ;

-: SUBROUTINE: 
+SYNTAX: SUBROUTINE: 
    f "c-library" get scan ";" parse-tokens
-    [ "()" subseq? not ] filter define-fortran-function ; parsing
+    [ "()" subseq? not ] filter define-fortran-function ;

-: FUNCTION:
+SYNTAX: FUNCTION:
    scan "c-library" get scan ";" parse-tokens
-    [ "()" subseq? not ] filter define-fortran-function ; parsing
+    [ "()" subseq? not ] filter define-fortran-function ;

-: LIBRARY:
+SYNTAX: LIBRARY:
    scan
    [ "c-library" set ]
-    [ set-fortran-abi ] bi  ; parsing
+    [ set-fortran-abi ] bi ;

--- a/basis/alien/syntax/syntax.factor
+++ b/basis/alien/syntax/syntax.factor
@ -7,35 +7,34 @@ effects assocs combinators lexer strings.parser alien.parser
 fry vocabs.parser words.constant ;
 IN: alien.syntax

-: DLL" lexer get skip-blank parse-string dlopen parsed ; parsing
+SYNTAX: DLL" lexer get skip-blank parse-string dlopen parsed ;

-: ALIEN: scan string>number <alien> parsed ; parsing
+SYNTAX: ALIEN: scan string>number <alien> parsed ;

-: BAD-ALIEN <bad-alien> parsed ; parsing
+SYNTAX: BAD-ALIEN <bad-alien> parsed ;

-: LIBRARY: scan "c-library" set ; parsing
+SYNTAX: LIBRARY: scan "c-library" set ;

-: FUNCTION:
+SYNTAX: FUNCTION:
    scan "c-library" get scan ";" parse-tokens
    [ "()" subseq? not ] filter
-    define-function ; parsing
+    define-function ;

-: TYPEDEF:
-    scan scan typedef ; parsing
+SYNTAX: TYPEDEF:
+    scan scan typedef ;

-: C-STRUCT:
-    scan in get parse-definition define-struct ; parsing
+SYNTAX: C-STRUCT:
+    scan in get parse-definition define-struct ;

-: C-UNION:
-    scan parse-definition define-union ; parsing
+SYNTAX: C-UNION:
+    scan parse-definition define-union ;

-: C-ENUM:
+SYNTAX: C-ENUM:
    ";" parse-tokens
    [ [ create-in ] dip define-constant ] each-index ;
-    parsing

 : address-of ( name library -- value )
    load-library dlsym [ "No such symbol" throw ] unless* ;

-: &:
-    scan "c-library" get '[ _ _ address-of ] over push-all ; parsing
+SYNTAX: &:
+    scan "c-library" get '[ _ _ address-of ] over push-all ;
--- a/basis/bit-arrays/bit-arrays.factor
+++ b/basis/bit-arrays/bit-arrays.factor
@ -68,7 +68,7 @@ M: bit-array resize

 M: bit-array byte-length length 7 + -3 shift ;

-: ?{ \ } [ >bit-array ] parse-literal ; parsing
+SYNTAX: ?{ \ } [ >bit-array ] parse-literal ;

 : integer>bit-array ( n -- bit-array )
    dup 0 = [
--- a/basis/bit-vectors/bit-vectors.factor
+++ b/basis/bit-vectors/bit-vectors.factor
@ -31,7 +31,7 @@ M: bit-array new-resizable drop <bit-vector> ;

 INSTANCE: bit-vector growable

-: ?V{ \ } [ >bit-vector ] parse-literal ; parsing
+SYNTAX: ?V{ \ } [ >bit-vector ] parse-literal ;

 M: bit-vector >pprint-sequence ;
 M: bit-vector pprint-delims drop \ ?V{ \ } ;
--- a/basis/byte-vectors/byte-vectors.factor
+++ b/basis/byte-vectors/byte-vectors.factor
@ -42,7 +42,7 @@ M: byte-array like

 M: byte-array new-resizable drop <byte-vector> ;

-: BV{ \ } [ >byte-vector ] parse-literal ; parsing
+SYNTAX: BV{ \ } [ >byte-vector ] parse-literal ;

 M: byte-vector pprint* pprint-object ;
 M: byte-vector pprint-delims drop \ BV{ \ } ;
--- a/basis/cocoa/cocoa.factor
+++ b/basis/cocoa/cocoa.factor
@ -14,18 +14,14 @@ SYMBOL: sent-messages
 : remember-send ( selector -- )
    sent-messages (remember-send) ;

-: ->
-    scan dup remember-send parsed \ send parsed ;
-    parsing
+SYNTAX: -> scan dup remember-send parsed \ send parsed ;

 SYMBOL: super-sent-messages

 : remember-super-send ( selector -- )
    super-sent-messages (remember-send) ;

-: SUPER->
-    scan dup remember-super-send parsed \ super-send parsed ;
-    parsing
+SYNTAX: SUPER-> scan dup remember-super-send parsed \ super-send parsed ;

 SYMBOL: frameworks

@ -33,9 +29,9 @@ frameworks [ V{ } clone ] initialize

 [ frameworks get [ load-framework ] each ] "cocoa.messages" add-init-hook

-: FRAMEWORK: scan [ load-framework ] [ frameworks get push ] bi ; parsing
+SYNTAX: FRAMEWORK: scan [ load-framework ] [ frameworks get push ] bi ;

-: IMPORT: scan [ ] import-objc-class ; parsing
+SYNTAX: IMPORT: scan [ ] import-objc-class ;

 "Compiling Objective C bridge..." print

--- a/basis/cocoa/subclassing/subclassing.factor
+++ b/basis/cocoa/subclassing/subclassing.factor
@ -76,6 +76,6 @@ SYMBOL: +superclass+
        import-objc-class
    ] bind ;

-: CLASS:
+SYNTAX: CLASS:
    parse-definition unclip
-    >hashtable define-objc-class ; parsing
+    >hashtable define-objc-class ;
--- a/basis/colors/constants/constants.factor
+++ b/basis/colors/constants/constants.factor
@ -30,4 +30,4 @@ ERROR: no-such-color name ;
 : named-color ( name -- color )
    dup rgb.txt at [ ] [ no-such-color ] ?if ;

-: COLOR: scan named-color parsed ; parsing
+SYNTAX: COLOR: scan named-color parsed ;
--- a/basis/compiler/cfg/instructions/syntax/syntax.factor
+++ b/basis/compiler/cfg/instructions/syntax/syntax.factor
@ -13,10 +13,10 @@ IN: compiler.cfg.instructions.syntax
 : insn-effect ( word -- effect )
    boa-effect in>> but-last f <effect> ;

-: INSN:
+SYNTAX: INSN:
    parse-tuple-definition "regs" suffix
    [ dup tuple eq? [ drop insn-word ] when ] dip
    [ define-tuple-class ]
    [ 2drop save-location ]
    [ 2drop [ ] [ '[ f _ boa , ] ] [ insn-effect ] tri define-inline ]
-    3tri ; parsing
+    3tri ;
--- a/basis/compiler/cfg/registers/registers.factor
+++ b/basis/compiler/cfg/registers/registers.factor
@ -17,6 +17,6 @@ C: <ds-loc> ds-loc
 TUPLE: rs-loc < loc ;
 C: <rs-loc> rs-loc

-: V scan-word scan-word vreg boa parsed ; parsing
-: D scan-word <ds-loc> parsed ; parsing
-: R scan-word <rs-loc> parsed ; parsing
+SYNTAX: V scan-word scan-word vreg boa parsed ;
+SYNTAX: D scan-word <ds-loc> parsed ;
+SYNTAX: R scan-word <rs-loc> parsed ;
--- a/basis/constructors/constructors.factor
+++ b/basis/constructors/constructors.factor
@ -16,8 +16,8 @@ MACRO: set-slots ( slots -- quot )
    [ [ in>> '[ _ _ construct ] ] dip compose ] [ drop ] 2bi
    define-declared ;

-: CONSTRUCTOR:
+SYNTAX: CONSTRUCTOR:
    scan-word [ name>> "<" ">" surround create-in ] keep
    "(" expect ")" parse-effect
    parse-definition
-    define-constructor ; parsing
+    define-constructor ;
--- a/basis/core-text/utilities/utilities.factor
+++ b/basis/core-text/utilities/utilities.factor
@ -3,7 +3,7 @@
 USING: words parser alien alien.c-types kernel fry accessors ;
 IN: core-text.utilities

-: C-GLOBAL:
+SYNTAX: C-GLOBAL:
    CREATE-WORD
    dup name>> '[ _ f dlsym *void* ]
-    (( -- value )) define-declared ; parsing
+    (( -- value )) define-declared ;
--- a/basis/cpu/ppc/assembler/backend/backend.factor
+++ b/basis/cpu/ppc/assembler/backend/backend.factor
@ -21,7 +21,7 @@ IN: cpu.ppc.assembler.backend
 : define-d-insn ( word opcode -- )
    [ d-insn ] curry (( d a simm -- )) define-declared ;

-: D: CREATE scan-word define-d-insn ; parsing
+SYNTAX: D: CREATE scan-word define-d-insn ;

 : sd-insn ( d a simm opcode -- )
    [ s>u16 { 0 21 16 } bitfield ] dip insn ;
@ -29,7 +29,7 @@ IN: cpu.ppc.assembler.backend
 : define-sd-insn ( word opcode -- )
    [ sd-insn ] curry (( d a simm -- )) define-declared ;

-: SD: CREATE scan-word define-sd-insn ; parsing
+SYNTAX: SD: CREATE scan-word define-sd-insn ;

 : i-insn ( li aa lk opcode -- )
    [ { 0 1 0 } bitfield ] dip insn ;
@ -40,26 +40,26 @@ IN: cpu.ppc.assembler.backend
 : (X) ( -- word quot )
    CREATE scan-word scan-word scan-word [ x-insn ] 3curry ;

-: X: (X) (( a s b -- )) define-declared ; parsing
+SYNTAX: X: (X) (( a s b -- )) define-declared ;

 : (1) ( quot -- quot' ) [ 0 ] prepose ;

-: X1: (X) (1) (( a s -- )) define-declared ; parsing
+SYNTAX: X1: (X) (1) (( a s -- )) define-declared ;

 : xfx-insn ( d spr xo opcode -- )
    [ { 1 11 21 } bitfield ] dip insn ;

 : CREATE-MF ( -- word ) scan "MF" prepend create-in ;

-: MFSPR:
+SYNTAX: MFSPR:
    CREATE-MF scan-word 5 shift [ 339 31 xfx-insn ] curry
-    (( d -- )) define-declared ; parsing
+    (( d -- )) define-declared ;

 : CREATE-MT ( -- word ) scan "MT" prepend create-in ;

-: MTSPR:
+SYNTAX: MTSPR:
    CREATE-MT scan-word 5 shift [ 467 31 xfx-insn ] curry
-    (( d -- )) define-declared ; parsing
+    (( d -- )) define-declared ;

 : xo-insn ( d a b oe rc xo opcode -- )
    [ { 1 0 10 11 16 21 } bitfield ] dip insn ;
@ -68,9 +68,9 @@ IN: cpu.ppc.assembler.backend
    CREATE scan-word scan-word scan-word scan-word
    [ xo-insn ] 2curry 2curry ;

-: XO: (XO) (( a s b -- )) define-declared ; parsing
+SYNTAX: XO: (XO) (( a s b -- )) define-declared ;

-: XO1: (XO) (1) (( a s -- )) define-declared ; parsing
+SYNTAX: XO1: (XO) (1) (( a s -- )) define-declared ;

 GENERIC# (B) 2 ( dest aa lk -- )
 M: integer (B) 18 i-insn ;
@ -84,11 +84,11 @@ M: label BC [ 0 BC ] dip rc-relative-ppc-2 label-fixup ;

 : CREATE-B ( -- word ) scan "B" prepend create-in ;

-: BC:
+SYNTAX: BC:
    CREATE-B scan-word scan-word
-    [ rot BC ] 2curry (( c -- )) define-declared ; parsing
+    [ rot BC ] 2curry (( c -- )) define-declared ;

-: B:
+SYNTAX: B:
    CREATE-B scan-word scan-word scan-word scan-word scan-word
    [ b-insn ] curry curry curry curry curry
-    (( bo -- )) define-declared ; parsing
+    (( bo -- )) define-declared ;
--- a/basis/cpu/x86/assembler/syntax/syntax.factor
+++ b/basis/cpu/x86/assembler/syntax/syntax.factor
@ -11,5 +11,4 @@ IN: cpu.x86.assembler.syntax
 : define-registers ( names size -- )
    '[ _ define-register ] each-index ;

-: REGISTERS: ( -- )
-    scan-word ";" parse-tokens swap define-registers ; parsing
+SYNTAX: REGISTERS: scan-word ";" parse-tokens swap define-registers ;
--- a/basis/definitions/icons/icons.factor
+++ b/basis/definitions/icons/icons.factor
@ -14,10 +14,10 @@ GENERIC: definition-icon ( definition -- path )

 <<

-: ICON:
+SYNTAX: ICON:
    scan-word \ definition-icon create-method
    scan '[ drop _ definition-icon-path ]
-    define ; parsing
+    define ;

 >>

--- a/basis/delegate/delegate.factor
+++ b/basis/delegate/delegate.factor
@ -85,9 +85,9 @@ PRIVATE>
 : define-consult ( consultation -- )
    [ register-consult ] [ consult-methods ] bi ;

-: CONSULT:
+SYNTAX: CONSULT:
    scan-word scan-word parse-definition <consultation>
-    [ save-location ] [ define-consult ] bi ; parsing
+    [ save-location ] [ define-consult ] bi ;

 M: consultation where loc>> ;

@ -144,8 +144,8 @@ PRIVATE>
        [ initialize-protocol-props ] 2tri
    ] 2bi ;

-: PROTOCOL:
-    CREATE-WORD parse-definition define-protocol ; parsing
+SYNTAX: PROTOCOL:
+    CREATE-WORD parse-definition define-protocol ;

 PREDICATE: protocol < word protocol-words ; ! Subclass of symbol?

@ -159,7 +159,7 @@ M: protocol definer drop \ PROTOCOL: \ ; ;

 M: protocol group-words protocol-words ;

-: SLOT-PROTOCOL:
+SYNTAX: SLOT-PROTOCOL:
    CREATE-WORD ";" parse-tokens
    [ [ reader-word ] [ writer-word ] bi 2array ] map concat
-    define-protocol ; parsing
+    define-protocol ;
--- a/basis/fry/fry.factor
+++ b/basis/fry/fry.factor
@ -53,4 +53,4 @@ M: callable deep-fry

 M: object deep-fry , ;

-: '[ parse-quotation fry over push-all ; parsing
+SYNTAX: '[ parse-quotation fry over push-all ;
--- a/basis/functors/functors.factor
+++ b/basis/functors/functors.factor
@ -16,6 +16,8 @@ IN: functors

 : define* ( word def effect -- ) pick set-word define-declared ;

+: define-syntax* ( word def -- ) over set-word define-syntax ;
+
 TUPLE: fake-quotation seq ;

 GENERIC: >fake-quotations ( quot -- fake )
@ -41,7 +43,7 @@ M: object fake-quotations> ;

 : DEFINE* ( accum -- accum ) effect get parsed \ define* parsed ;

-: `TUPLE:
+SYNTAX: `TUPLE:
    scan-param parsed
    scan {
        { ";" [ tuple parsed f parsed ] }
@ -52,40 +54,44 @@ M: object fake-quotations> ;
            make parsed
        ]
    } case
-    \ define-tuple-class parsed ; parsing
+    \ define-tuple-class parsed ;

-: `M:
+SYNTAX: `M:
    effect off
    scan-param parsed
    scan-param parsed
    \ create-method-in parsed
    parse-definition*
-    DEFINE* ; parsing
+    DEFINE* ;

-: `C:
+SYNTAX: `C:
    effect off
    scan-param parsed
    scan-param parsed
    [ [ boa ] curry ] over push-all
-    DEFINE* ; parsing
+    DEFINE* ;

-: `:
+SYNTAX: `:
    effect off
    scan-param parsed
    parse-definition*
-    DEFINE* ; parsing
+    DEFINE* ;

-: `INSTANCE:
+SYNTAX: `SYNTAX:
+    effect off
+    scan-param parsed
+    parse-definition*
+    \ define-syntax* parsed ;
+
+SYNTAX: `INSTANCE:
    scan-param parsed
    scan-param parsed
-    \ add-mixin-instance parsed ; parsing
+    \ add-mixin-instance parsed ;

-: `inline [ word make-inline ] over push-all ; parsing
+SYNTAX: `inline [ word make-inline ] over push-all ;

-: `parsing [ word make-parsing ] over push-all ; parsing
-
-: `(
-    ")" parse-effect effect set ; parsing
+SYNTAX: `(
+    ")" parse-effect effect set ;

 : (INTERPOLATE) ( accum quot -- accum )
    [ scan interpolate-locals ] dip
@ -93,11 +99,11 @@ M: object fake-quotations> ;

 PRIVATE>

-: IS [ dup search [ ] [ no-word ] ?if ] (INTERPOLATE) ; parsing
+SYNTAX: IS [ dup search [ ] [ no-word ] ?if ] (INTERPOLATE) ;

-: DEFINES [ create-in ] (INTERPOLATE) ; parsing
+SYNTAX: DEFINES [ create-in ] (INTERPOLATE) ;

-: DEFINES-CLASS [ create-class-in ] (INTERPOLATE) ; parsing
+SYNTAX: DEFINES-CLASS [ create-class-in ] (INTERPOLATE) ;

 DEFER: ;FUNCTOR delimiter

@ -110,8 +116,8 @@ DEFER: ;FUNCTOR delimiter
        { "C:" POSTPONE: `C: }
        { ":" POSTPONE: `: }
        { "INSTANCE:" POSTPONE: `INSTANCE: }
+        { "SYNTAX:" POSTPONE: `SYNTAX: }
        { "inline" POSTPONE: `inline }
-        { "parsing" POSTPONE: `parsing }
        { "(" POSTPONE: `( }
    } ;

@ -132,4 +138,4 @@ DEFER: ;FUNCTOR delimiter

 PRIVATE>

-: FUNCTOR: (FUNCTOR:) define ; parsing
+SYNTAX: FUNCTOR: (FUNCTOR:) define ;
--- a/basis/help/syntax/syntax.factor
+++ b/basis/help/syntax/syntax.factor
@ -5,19 +5,19 @@ help.topics namespaces vocabs definitions compiler.units
 vocabs.parser ;
 IN: help.syntax

-: HELP:
+SYNTAX: HELP:
    scan-word bootstrap-word
    dup set-word
    dup >link save-location
-    \ ; parse-until >array swap set-word-help ; parsing
+    \ ; parse-until >array swap set-word-help ;

-: ARTICLE:
+SYNTAX: ARTICLE:
    location [
        \ ; parse-until >array [ first2 ] keep 2 tail <article>
        over add-article >link
-    ] dip remember-definition ; parsing
+    ] dip remember-definition ;

-: ABOUT:
+SYNTAX: ABOUT:
    in get vocab
    dup changed-definition
-    scan-object >>help drop ; parsing
+    scan-object >>help drop ;
--- a/basis/hints/hints.factor
+++ b/basis/hints/hints.factor
@ -59,12 +59,11 @@ M: object specializer-declaration class ;
 : specialized-length ( specializer -- n )
    dup [ array? ] all? [ first ] when length ;

-: HINTS:
+SYNTAX: HINTS:
    scan-object
    dup method-spec? [ first2 method ] when
    [ redefined ]
    [ parse-definition "specializer" set-word-prop ] bi ;
-    parsing

 ! Default specializers
 { first first2 first3 first4 }
--- a/basis/html/templates/chloe/components/components.factor
+++ b/basis/html/templates/chloe/components/components.factor
@ -25,8 +25,7 @@ M: tuple-class component-tag ( tag class -- )
    [ compile-component-attrs ] 2bi
    [ render ] [code] ;

-: COMPONENT:
+SYNTAX: COMPONENT:
    scan-word
    [ name>> ] [ '[ _ component-tag ] ] bi
    define-chloe-tag ;
-    parsing
--- a/basis/html/templates/chloe/syntax/syntax.factor
+++ b/basis/html/templates/chloe/syntax/syntax.factor
@ -15,8 +15,8 @@ tags [ H{ } clone ] initialize

 : define-chloe-tag ( name quot -- ) swap tags get set-at ;

-: CHLOE:
-    scan parse-definition define-chloe-tag ; parsing
+SYNTAX: CHLOE:
+    scan parse-definition define-chloe-tag ;

 CONSTANT: chloe-ns "http://factorcode.org/chloe/1.0"

--- a/basis/html/templates/fhtml/fhtml.factor
+++ b/basis/html/templates/fhtml/fhtml.factor
@ -49,7 +49,7 @@ DEFER: <% delimiter
        drop
    ] if ;

-: %> lexer get parse-%> ; parsing
+SYNTAX: %> lexer get parse-%> ;

 : parse-template-lines ( lines -- quot )
    <template-lexer> [
--- a/basis/interpolate/interpolate.factor
+++ b/basis/interpolate/interpolate.factor
@ -38,6 +38,6 @@ MACRO: interpolate ( string -- )
 : interpolate-locals ( string -- quot )
    [ search [ ] ] (interpolate) ;

-: I[
+SYNTAX: I[
    "]I" parse-multiline-string
-    interpolate-locals over push-all ; parsing
+    interpolate-locals over push-all ;
--- a/basis/interval-maps/interval-maps-docs.factor
+++ b/basis/interval-maps/interval-maps-docs.factor
@ -1,22 +1,32 @@
-USING: help.markup help.syntax ;
+! Copyright (C) 2008, 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.markup help.syntax assocs kernel sequences ;
 IN: interval-maps

 HELP: interval-at*
-{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
+{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
 { $description "Looks up a key in an interval map, returning the corresponding value if the item is in an interval in the map, and a boolean flag. The operation takes O(log n) time." } ;

 HELP: interval-at
-{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } }
+{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } }
 { $description "Looks up a key in an interval map, returning the value of the corresponding interval, or f if the interval is not present in the map." } ;

 HELP: interval-key?
-{ $values { "key" "an object" } { "map" "an interval map" } { "?" "a boolean" } }
+{ $values { "key" object } { "map" interval-map } { "?" "a boolean" } }
 { $description "Tests whether an object is in an interval in the interval map, returning t if the object is present." } ;

 HELP: <interval-map>
-{ $values { "specification" "an assoc" } { "map" "an interval map" } }
+{ $values { "specification" assoc } { "map" interval-map } }
 { $description "From a specification, produce an interval tree. The specification is an assoc where the keys are intervals, or pairs of numbers to represent intervals, or individual numbers to represent singleton intervals. The values are the values int he interval map. Construction time is O(n log n)." } ;

+HELP: interval-values
+{ $values { "map" interval-map } { "values" sequence } }
+{ $description "Constructs a list of all of the values that interval map keys are associated with. This list may contain duplicates." } ;
+
+HELP: coalesce
+{ $values { "alist" "an association list with integer keys" } { "specification" { "array of the format used by " { $link <interval-map> } } } }
+{ $description "Finds ranges used in the given alist, coalescing them into a single range." } ;
+
 ARTICLE: "interval-maps" "Interval maps"
 "The " { $vocab-link "interval-maps" } " vocabulary implements a data structure, similar to assocs, where a set of closed intervals of keys are associated with values. As such, interval maps do not conform to the assoc protocol, because intervals of floats, for example, can be used, and it is impossible to get a list of keys in between."
 $nl
@ -24,7 +34,9 @@ $nl
 { $subsection interval-at* }
 { $subsection interval-at }
 { $subsection interval-key? }
+{ $subsection interval-values }
 "Use the following to construct interval maps"
-{ $subsection <interval-map> } ;
+{ $subsection <interval-map> }
+{ $subsection coalesce } ;

 ABOUT: "interval-maps"
--- a/basis/interval-maps/interval-maps.factor
+++ b/basis/interval-maps/interval-maps.factor
@ -8,17 +8,21 @@ TUPLE: interval-map array ;

 <PRIVATE

+ALIAS: start first
+ALIAS: end second
+ALIAS: value third
+
 : find-interval ( key interval-map -- interval-node )
-    [ first <=> ] with search nip ;
+    array>> [ start <=> ] with search nip ;

 : interval-contains? ( key interval-node -- ? )
-    first2 between? ;
+    [ start ] [ end ] bi between? ;

 : all-intervals ( sequence -- intervals )
    [ [ dup number? [ dup 2array ] when ] dip ] { } assoc-map-as ;

 : disjoint? ( node1 node2 -- ? )
-    [ second ] [ first ] bi* < ;
+    [ end ] [ start ] bi* < ;

 : ensure-disjoint ( intervals -- intervals )
    dup [ disjoint? ] monotonic?
@ -30,14 +34,17 @@ TUPLE: interval-map array ;
 PRIVATE>

 : interval-at* ( key map -- value ? )
-    [ drop ] [ array>> find-interval ] 2bi
+    [ drop ] [ find-interval ] 2bi
    [ nip ] [ interval-contains? ] 2bi
-    [ third t ] [ drop f f ] if ;
+    [ value t ] [ drop f f ] if ;

 : interval-at ( key map -- value ) interval-at* drop ;

 : interval-key? ( key map -- ? ) interval-at* nip ;

+: interval-values ( map -- values )
+    array>> [ value ] map ;
+
 : <interval-map> ( specification -- map )
    all-intervals [ [ first second ] compare ] sort
    >intervals ensure-disjoint interval-map boa ;
--- a/basis/io/encodings/euc/euc.factor
+++ b/basis/io/encodings/euc/euc.factor
@ -63,6 +63,6 @@ SYMBOL: euc-table

 PRIVATE>

-: EUC:
+SYNTAX: EUC:
    ! EUC: euc-kr "vocab:io/encodings/korean/cp949.txt"
-    CREATE-CLASS scan-object define-euc ; parsing
+    CREATE-CLASS scan-object define-euc ;
--- a/basis/io/encodings/iso2022/201.txt
+++ b/basis/io/encodings/iso2022/201.txt
@ -0,0 +1,208 @@
+#
+#	Name:             JIS X 0201 (1976) to Unicode 1.1 Table
+#	Unicode version:  1.1
+#	Table version:    0.9
+#	Table format:     Format A
+#	Date:             8 March 1994
+#
+#	Copyright (c) 1991-1994 Unicode, Inc.  All Rights reserved.
+#
+#	This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+#	No claims are made as to fitness for any particular purpose.  No
+#	warranties of any kind are expressed or implied.  The recipient
+#	agrees to determine applicability of information provided.  If this
+#	file has been provided on magnetic media by Unicode, Inc., the sole
+#	remedy for any claim will be exchange of defective media within 90
+#	days of receipt.
+#
+#	Recipient is granted the right to make copies in any form for
+#	internal distribution and to freely use the information supplied
+#	in the creation of products supporting Unicode.  Unicode, Inc.
+#	specifically excludes the right to re-distribute this file directly
+#	to third parties or other organizations whether for profit or not.
+#
+#	General notes:
+#
+#
+# This table contains one set of mappings from JIS X 0201 into Unicode.
+# Note that these data are *possible* mappings only and may not be the
+# same as those used by actual products, nor may they be the best suited
+# for all uses.  For more information on the mappings between various code
+# pages incorporating the repertoire of JIS X 0201 and Unicode, consult the
+# VENDORS mapping data.  Normative information on the mapping between
+# JIS X 0201 and Unicode may be found in the Unihan.txt file in the
+# latest Unicode Character Database.
+#
+# If you have carefully considered the fact that the mappings in
+# this table are only one possible set of mappings between JIS X 0201 and
+# Unicode and have no normative status, but still feel that you
+# have located an error in the table that requires fixing, you may
+# report any such error to errata@unicode.org.
+#
+#
+#	Format:  Three tab-separated columns
+#		Column #1 is the shift JIS code (in hex as 0xXX)
+#		Column #2 is the Unicode (in hex as 0xXXXX)
+#		Column #3 the Unicode (ISO 10646) name (follows a comment sign)
+#
+#	The entries are in JIS order
+#
+#
+0x20	0x0020	# SPACE
+0x21	0x0021	# EXCLAMATION MARK
+0x22	0x0022	# QUOTATION MARK
+0x23	0x0023	# NUMBER SIGN
+0x24	0x0024	# DOLLAR SIGN
+0x25	0x0025	# PERCENT SIGN
+0x26	0x0026	# AMPERSAND
+0x27	0x0027	# APOSTROPHE
+0x28	0x0028	# LEFT PARENTHESIS
+0x29	0x0029	# RIGHT PARENTHESIS
+0x2A	0x002A	# ASTERISK
+0x2B	0x002B	# PLUS SIGN
+0x2C	0x002C	# COMMA
+0x2D	0x002D	# HYPHEN-MINUS
+0x2E	0x002E	# FULL STOP
+0x2F	0x002F	# SOLIDUS
+0x30	0x0030	# DIGIT ZERO
+0x31	0x0031	# DIGIT ONE
+0x32	0x0032	# DIGIT TWO
+0x33	0x0033	# DIGIT THREE
+0x34	0x0034	# DIGIT FOUR
+0x35	0x0035	# DIGIT FIVE
+0x36	0x0036	# DIGIT SIX
+0x37	0x0037	# DIGIT SEVEN
+0x38	0x0038	# DIGIT EIGHT
+0x39	0x0039	# DIGIT NINE
+0x3A	0x003A	# COLON
+0x3B	0x003B	# SEMICOLON
+0x3C	0x003C	# LESS-THAN SIGN
+0x3D	0x003D	# EQUALS SIGN
+0x3E	0x003E	# GREATER-THAN SIGN
+0x3F	0x003F	# QUESTION MARK
+0x40	0x0040	# COMMERCIAL AT
+0x41	0x0041	# LATIN CAPITAL LETTER A
+0x42	0x0042	# LATIN CAPITAL LETTER B
+0x43	0x0043	# LATIN CAPITAL LETTER C
+0x44	0x0044	# LATIN CAPITAL LETTER D
+0x45	0x0045	# LATIN CAPITAL LETTER E
+0x46	0x0046	# LATIN CAPITAL LETTER F
+0x47	0x0047	# LATIN CAPITAL LETTER G
+0x48	0x0048	# LATIN CAPITAL LETTER H
+0x49	0x0049	# LATIN CAPITAL LETTER I
+0x4A	0x004A	# LATIN CAPITAL LETTER J
+0x4B	0x004B	# LATIN CAPITAL LETTER K
+0x4C	0x004C	# LATIN CAPITAL LETTER L
+0x4D	0x004D	# LATIN CAPITAL LETTER M
+0x4E	0x004E	# LATIN CAPITAL LETTER N
+0x4F	0x004F	# LATIN CAPITAL LETTER O
+0x50	0x0050	# LATIN CAPITAL LETTER P
+0x51	0x0051	# LATIN CAPITAL LETTER Q
+0x52	0x0052	# LATIN CAPITAL LETTER R
+0x53	0x0053	# LATIN CAPITAL LETTER S
+0x54	0x0054	# LATIN CAPITAL LETTER T
+0x55	0x0055	# LATIN CAPITAL LETTER U
+0x56	0x0056	# LATIN CAPITAL LETTER V
+0x57	0x0057	# LATIN CAPITAL LETTER W
+0x58	0x0058	# LATIN CAPITAL LETTER X
+0x59	0x0059	# LATIN CAPITAL LETTER Y
+0x5A	0x005A	# LATIN CAPITAL LETTER Z
+0x5B	0x005B	# LEFT SQUARE BRACKET
+0x5C	0x00A5	# YEN SIGN
+0x5D	0x005D	# RIGHT SQUARE BRACKET
+0x5E	0x005E	# CIRCUMFLEX ACCENT
+0x5F	0x005F	# LOW LINE
+0x60	0x0060	# GRAVE ACCENT
+0x61	0x0061	# LATIN SMALL LETTER A
+0x62	0x0062	# LATIN SMALL LETTER B
+0x63	0x0063	# LATIN SMALL LETTER C
+0x64	0x0064	# LATIN SMALL LETTER D
+0x65	0x0065	# LATIN SMALL LETTER E
+0x66	0x0066	# LATIN SMALL LETTER F
+0x67	0x0067	# LATIN SMALL LETTER G
+0x68	0x0068	# LATIN SMALL LETTER H
+0x69	0x0069	# LATIN SMALL LETTER I
+0x6A	0x006A	# LATIN SMALL LETTER J
+0x6B	0x006B	# LATIN SMALL LETTER K
+0x6C	0x006C	# LATIN SMALL LETTER L
+0x6D	0x006D	# LATIN SMALL LETTER M
+0x6E	0x006E	# LATIN SMALL LETTER N
+0x6F	0x006F	# LATIN SMALL LETTER O
+0x70	0x0070	# LATIN SMALL LETTER P
+0x71	0x0071	# LATIN SMALL LETTER Q
+0x72	0x0072	# LATIN SMALL LETTER R
+0x73	0x0073	# LATIN SMALL LETTER S
+0x74	0x0074	# LATIN SMALL LETTER T
+0x75	0x0075	# LATIN SMALL LETTER U
+0x76	0x0076	# LATIN SMALL LETTER V
+0x77	0x0077	# LATIN SMALL LETTER W
+0x78	0x0078	# LATIN SMALL LETTER X
+0x79	0x0079	# LATIN SMALL LETTER Y
+0x7A	0x007A	# LATIN SMALL LETTER Z
+0x7B	0x007B	# LEFT CURLY BRACKET
+0x7C	0x007C	# VERTICAL LINE
+0x7D	0x007D	# RIGHT CURLY BRACKET
+0x7E	0x203E	# OVERLINE
+0xA1	0xFF61	# HALFWIDTH IDEOGRAPHIC FULL STOP
+0xA2	0xFF62	# HALFWIDTH LEFT CORNER BRACKET
+0xA3	0xFF63	# HALFWIDTH RIGHT CORNER BRACKET
+0xA4	0xFF64	# HALFWIDTH IDEOGRAPHIC COMMA
+0xA5	0xFF65	# HALFWIDTH KATAKANA MIDDLE DOT
+0xA6	0xFF66	# HALFWIDTH KATAKANA LETTER WO
+0xA7	0xFF67	# HALFWIDTH KATAKANA LETTER SMALL A
+0xA8	0xFF68	# HALFWIDTH KATAKANA LETTER SMALL I
+0xA9	0xFF69	# HALFWIDTH KATAKANA LETTER SMALL U
+0xAA	0xFF6A	# HALFWIDTH KATAKANA LETTER SMALL E
+0xAB	0xFF6B	# HALFWIDTH KATAKANA LETTER SMALL O
+0xAC	0xFF6C	# HALFWIDTH KATAKANA LETTER SMALL YA
+0xAD	0xFF6D	# HALFWIDTH KATAKANA LETTER SMALL YU
+0xAE	0xFF6E	# HALFWIDTH KATAKANA LETTER SMALL YO
+0xAF	0xFF6F	# HALFWIDTH KATAKANA LETTER SMALL TU
+0xB0	0xFF70	# HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+0xB1	0xFF71	# HALFWIDTH KATAKANA LETTER A
+0xB2	0xFF72	# HALFWIDTH KATAKANA LETTER I
+0xB3	0xFF73	# HALFWIDTH KATAKANA LETTER U
+0xB4	0xFF74	# HALFWIDTH KATAKANA LETTER E
+0xB5	0xFF75	# HALFWIDTH KATAKANA LETTER O
+0xB6	0xFF76	# HALFWIDTH KATAKANA LETTER KA
+0xB7	0xFF77	# HALFWIDTH KATAKANA LETTER KI
+0xB8	0xFF78	# HALFWIDTH KATAKANA LETTER KU
+0xB9	0xFF79	# HALFWIDTH KATAKANA LETTER KE
+0xBA	0xFF7A	# HALFWIDTH KATAKANA LETTER KO
+0xBB	0xFF7B	# HALFWIDTH KATAKANA LETTER SA
+0xBC	0xFF7C	# HALFWIDTH KATAKANA LETTER SI
+0xBD	0xFF7D	# HALFWIDTH KATAKANA LETTER SU
+0xBE	0xFF7E	# HALFWIDTH KATAKANA LETTER SE
+0xBF	0xFF7F	# HALFWIDTH KATAKANA LETTER SO
+0xC0	0xFF80	# HALFWIDTH KATAKANA LETTER TA
+0xC1	0xFF81	# HALFWIDTH KATAKANA LETTER TI
+0xC2	0xFF82	# HALFWIDTH KATAKANA LETTER TU
+0xC3	0xFF83	# HALFWIDTH KATAKANA LETTER TE
+0xC4	0xFF84	# HALFWIDTH KATAKANA LETTER TO
+0xC5	0xFF85	# HALFWIDTH KATAKANA LETTER NA
+0xC6	0xFF86	# HALFWIDTH KATAKANA LETTER NI
+0xC7	0xFF87	# HALFWIDTH KATAKANA LETTER NU
+0xC8	0xFF88	# HALFWIDTH KATAKANA LETTER NE
+0xC9	0xFF89	# HALFWIDTH KATAKANA LETTER NO
+0xCA	0xFF8A	# HALFWIDTH KATAKANA LETTER HA
+0xCB	0xFF8B	# HALFWIDTH KATAKANA LETTER HI
+0xCC	0xFF8C	# HALFWIDTH KATAKANA LETTER HU
+0xCD	0xFF8D	# HALFWIDTH KATAKANA LETTER HE
+0xCE	0xFF8E	# HALFWIDTH KATAKANA LETTER HO
+0xCF	0xFF8F	# HALFWIDTH KATAKANA LETTER MA
+0xD0	0xFF90	# HALFWIDTH KATAKANA LETTER MI
+0xD1	0xFF91	# HALFWIDTH KATAKANA LETTER MU
+0xD2	0xFF92	# HALFWIDTH KATAKANA LETTER ME
+0xD3	0xFF93	# HALFWIDTH KATAKANA LETTER MO
+0xD4	0xFF94	# HALFWIDTH KATAKANA LETTER YA
+0xD5	0xFF95	# HALFWIDTH KATAKANA LETTER YU
+0xD6	0xFF96	# HALFWIDTH KATAKANA LETTER YO
+0xD7	0xFF97	# HALFWIDTH KATAKANA LETTER RA
+0xD8	0xFF98	# HALFWIDTH KATAKANA LETTER RI
+0xD9	0xFF99	# HALFWIDTH KATAKANA LETTER RU
+0xDA	0xFF9A	# HALFWIDTH KATAKANA LETTER RE
+0xDB	0xFF9B	# HALFWIDTH KATAKANA LETTER RO
+0xDC	0xFF9C	# HALFWIDTH KATAKANA LETTER WA
+0xDD	0xFF9D	# HALFWIDTH KATAKANA LETTER N
+0xDE	0xFF9E	# HALFWIDTH KATAKANA VOICED SOUND MARK
+0xDF	0xFF9F	# HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
--- a/basis/io/encodings/iso2022/208.txt
+++ b/basis/io/encodings/iso2022/208.txt
--- a/basis/io/encodings/iso2022/212.txt
+++ b/basis/io/encodings/iso2022/212.txt
--- a/basis/io/encodings/iso2022/authors.txt
+++ b/basis/io/encodings/iso2022/authors.txt
--- a/basis/io/encodings/iso2022/iso2022-docs.factor
+++ b/basis/io/encodings/iso2022/iso2022-docs.factor
@ -0,0 +1,13 @@
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.syntax help.markup ;
+IN: io.encodings.iso2022
+
+HELP: iso2022
+{ $class-description "This encoding class implements ISO 2022-JP-1, a Japanese text encoding commonly used for email." }
+{ $see-also "encodings-introduction" } ;
+
+ARTICLE: "io.encodings.iso2022" "ISO 2022-JP-1 encoding"
+{ $subsection iso2022 } ;
+
+ABOUT: "io.encodings.iso2022"
--- a/basis/io/encodings/iso2022/iso2022-tests.factor
+++ b/basis/io/encodings/iso2022/iso2022-tests.factor
@ -0,0 +1,36 @@
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: io.encodings.string io.encodings.iso2022 tools.test
+io.encodings.iso2022.private literals strings byte-arrays ;
+IN: io.encodings.iso2022
+
+[ "hello" ] [ "hello" >byte-array iso2022 decode ] unit-test
+[ "hello" ] [ "hello" iso2022 encode >string ] unit-test
+
+[ "hi" ] [ B{ CHAR: h ESC CHAR: ( CHAR: B CHAR: i } iso2022 decode ] unit-test
+[ "hi" ] [ B{ CHAR: h CHAR: i ESC CHAR: ( CHAR: B } iso2022 decode ] unit-test
+[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i ESC CHAR: ( } iso2022 decode ] unit-test
+[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i ESC } iso2022 decode ] unit-test
+
+[ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: D8 } ] [ "h\u00ff98" iso2022 encode ] unit-test
+[ "h\u00ff98" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: D8 } iso2022 decode ] unit-test
+[ "hi" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J CHAR: i } iso2022 decode ] unit-test
+[ "h" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J } iso2022 decode ] unit-test
+[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: 80 } iso2022 decode ] unit-test
+
+[ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } ] [ "h\u007126" iso2022 encode ] unit-test
+[ "h\u007126" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } iso2022 decode ] unit-test
+[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E } iso2022 decode ] unit-test
+[ "h" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B } iso2022 decode ] unit-test
+[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ } iso2022 decode ] unit-test
+[ "h\u00fffd" ] [ B{ CHAR: h ESC } iso2022 decode ] unit-test
+[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 80 HEX: 80 } iso2022 decode ] unit-test
+
+[ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } ] [ "h\u0058ce" iso2022 encode ] unit-test
+[ "h\u0058ce" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } iso2022 decode ] unit-test
+[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 } iso2022 decode ] unit-test
+[ "h" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D } iso2022 decode ] unit-test
+[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( } iso2022 decode ] unit-test
+[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 70 HEX: 70 } iso2022 decode ] unit-test
+
+[ "\u{syriac-music}" iso2022 encode ] must-fail
--- a/basis/io/encodings/iso2022/iso2022.factor
+++ b/basis/io/encodings/iso2022/iso2022.factor
@ -0,0 +1,107 @@
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: io.encodings kernel sequences io simple-flat-file sets math
+combinators.short-circuit io.binary values arrays assocs
+locals accessors combinators biassocs byte-arrays parser ;
+IN: io.encodings.iso2022
+
+SINGLETON: iso2022
+
+<PRIVATE
+
+VALUE: jis201
+VALUE: jis208
+VALUE: jis212
+
+"vocab:io/encodings/iso2022/201.txt" flat-file>biassoc to: jis201
+"vocab:io/encodings/iso2022/208.txt" flat-file>biassoc to: jis208
+"vocab:io/encodings/iso2022/212.txt" flat-file>biassoc to: jis212
+
+VALUE: ascii
+128 unique >biassoc to: ascii
+
+TUPLE: iso2022-state type ;
+
+: make-iso-coder ( encoding -- state )
+    drop ascii iso2022-state boa ;
+
+M: iso2022 <encoder>
+    make-iso-coder <encoder> ;
+
+M: iso2022 <decoder>
+    make-iso-coder <decoder> ;
+
+<< SYNTAX: ESC HEX: 16 parsed ; >>
+
+CONSTANT: switch-ascii B{ ESC CHAR: ( CHAR: B }
+CONSTANT: switch-jis201 B{ ESC CHAR: ( CHAR: J }
+CONSTANT: switch-jis208 B{ ESC CHAR: $ CHAR: B }
+CONSTANT: switch-jis212 B{ ESC CHAR: $ CHAR: ( CHAR: D }
+
+: find-type ( char -- code type )
+    {
+        { [ dup ascii value? ] [ drop switch-ascii ascii ] }
+        { [ dup jis201 value? ] [ drop switch-jis201 jis201 ] }
+        { [ dup jis208 value? ] [ drop switch-jis208 jis208 ] }
+        { [ dup jis212 value? ] [ drop switch-jis212 jis212 ] }
+        [ encode-error ]
+    } cond ;
+
+: stream-write-num ( num stream -- )
+    over 256 >=
+    [ [ h>b/b swap 2byte-array ] dip stream-write ]
+    [ stream-write1 ] if ;
+
+M:: iso2022-state encode-char ( char stream encoding -- )
+    char encoding type>> value? [
+        char find-type
+        [ stream stream-write ]
+        [ encoding (>>type) ] bi*
+    ] unless
+    char encoding type>> value-at stream stream-write-num ;
+
+: read-escape ( stream -- type/f )
+    dup stream-read1 {
+        { CHAR: ( [
+            stream-read1 {
+                { CHAR: B [ ascii ] }
+                { CHAR: J [ jis201 ] }
+                [ drop f ]
+            } case
+        ] }
+        { CHAR: $ [
+            dup stream-read1 {
+                { CHAR: @ [ drop jis208 ] } ! want: JIS X 0208-1978 
+                { CHAR: B [ drop jis208 ] }
+                { CHAR: ( [
+                    stream-read1 CHAR: D = jis212 f ?
+                ] }
+                [ 2drop f ]
+            } case
+        ] }
+        [ 2drop f ]
+    } case ;
+
+: double-width? ( type -- ? )
+    { [ jis208 eq? ] [ jis212 eq? ] } 1|| ;
+
+: finish-decode ( num encoding -- char )
+    type>> at replacement-char or ;
+
+M:: iso2022-state decode-char ( stream encoding -- char )
+    stream stream-read1 {
+        { ESC [
+            stream read-escape [
+                encoding (>>type)
+                stream encoding decode-char
+            ] [ replacement-char ] if*
+        ] }
+        { f [ f ] }
+        [
+            encoding type>> double-width? [
+                stream stream-read1
+                [ 2byte-array be> encoding finish-decode ]
+                [ drop replacement-char ] if*
+            ] [ encoding finish-decode ] if
+        ]
+    } case ;
--- a/basis/io/encodings/iso2022/summary.txt
+++ b/basis/io/encodings/iso2022/summary.txt
@ -0,0 +1 @@
+ISO-2022-JP-1 text encoding
--- a/basis/listener/listener-tests.factor
+++ b/basis/listener/listener-tests.factor
@ -3,7 +3,7 @@ tools.test parser math namespaces continuations vocabs kernel
 compiler.units eval vocabs.parser ;
 IN: listener.tests

-: hello "Hi" print ; parsing
+SYNTAX: hello "Hi" print ;

 : parse-interactive ( string -- quot )
    <string-reader> stream-read-quot ;
--- a/basis/locals/locals.factor
+++ b/basis/locals/locals.factor
@ -5,27 +5,25 @@ vocabs.loader words kernel namespaces locals.parser locals.types
 locals.errors ;
 IN: locals

-: :>
+SYNTAX: :>
    scan locals get [ :>-outside-lambda-error ] unless*
-    [ make-local ] bind <def> parsed ; parsing
+    [ make-local ] bind <def> parsed ;

-: [| parse-lambda over push-all ; parsing
+SYNTAX: [| parse-lambda over push-all ;

-: [let parse-let over push-all ; parsing
+SYNTAX: [let parse-let over push-all ;

-: [let* parse-let* over push-all ; parsing
+SYNTAX: [let* parse-let* over push-all ;

-: [wlet parse-wlet over push-all ; parsing
+SYNTAX: [wlet parse-wlet over push-all ;

-: :: (::) define ; parsing
+SYNTAX: :: (::) define ;

-: M:: (M::) define ; parsing
+SYNTAX: M:: (M::) define ;

-: MACRO:: (::) define-macro ; parsing
+SYNTAX: MACRO:: (::) define-macro ;

-: MEMO:: (::) define-memoized ; parsing
-
-USE: syntax
+SYNTAX: MEMO:: (::) define-memoized ;

 {
    "locals.macros"
--- a/basis/logging/logging.factor
+++ b/basis/logging/logging.factor
@ -135,11 +135,11 @@ PRIVATE>
    [ [ input-logging-quot ] 2keep drop error-logging-quot ]
    (define-logging) ;

-: LOG:
+SYNTAX: LOG:
    #! Syntax: name level
    CREATE-WORD dup scan-word
    '[ 1array stack>message _ _ log-message ]
-    (( message -- )) define-declared ; parsing
+    (( message -- )) define-declared ;

 USE: vocabs.loader

--- a/basis/macros/macros.factor
+++ b/basis/macros/macros.factor
@ -16,7 +16,7 @@ PRIVATE>
    [ over real-macro-effect memoize-quot [ call ] append define ]
    2bi ;

-: MACRO: (:) define-macro ; parsing
+SYNTAX: MACRO: (:) define-macro ;

 PREDICATE: macro < word "macro" word-prop >boolean ;

--- a/basis/match/match.factor
+++ b/basis/match/match.factor
@ -16,8 +16,8 @@ SYMBOL: _
 : define-match-vars ( seq -- )
    [ define-match-var ] each ;

-: MATCH-VARS: ! vars ...
-    ";" parse-tokens define-match-vars ; parsing
+SYNTAX: MATCH-VARS: ! vars ...
+    ";" parse-tokens define-match-vars ;

 : match-var? ( symbol -- bool )
    dup word? [ "match-var" word-prop ] [ drop f ] if ;
--- a/basis/math/blas/matrices/matrices.factor
+++ b/basis/math/blas/matrices/matrices.factor
@ -289,7 +289,7 @@ M: MATRIX n*V(*)V+M!
 M: MATRIX n*V(*)Vconj+M!
    (prepare-ger) [ XGERC ] dip ;

-: XMATRIX{ \ } [ >MATRIX ] parse-literal ; parsing
+SYNTAX: XMATRIX{ \ } [ >MATRIX ] parse-literal ;

 M: MATRIX pprint-delims
    drop \ XMATRIX{ \ } ;
--- a/basis/math/blas/vectors/vectors.factor
+++ b/basis/math/blas/vectors/vectors.factor
@ -179,7 +179,7 @@ M: VECTOR n*V+V!
 M: VECTOR n*V!
    (prepare-scal) [ XSCAL ] dip ;

-: XVECTOR{ \ } [ >VECTOR ] parse-literal ; parsing
+SYNTAX: XVECTOR{ \ } [ >VECTOR ] parse-literal ;

 M: VECTOR pprint-delims
    drop \ XVECTOR{ \ } ;
--- a/basis/math/complex/complex.factor
+++ b/basis/math/complex/complex.factor
@ -31,7 +31,7 @@ M: complex sqrt >polar [ fsqrt ] [ 2.0 / ] bi* polar> ;

 IN: syntax

-: C{ \ } [ first2 rect> ] parse-literal ; parsing
+SYNTAX: C{ \ } [ first2 rect> ] parse-literal ;

 USE: prettyprint.custom

--- a/basis/memoize/memoize.factor
+++ b/basis/memoize/memoize.factor
@ -41,7 +41,7 @@ PRIVATE>
    [ drop "memoize" set-word-prop ]
    3tri ;

-: MEMO: (:) define-memoized ; parsing
+SYNTAX: MEMO: (:) define-memoized ;

 PREDICATE: memoized < word "memoize" word-prop ;

--- a/basis/multiline/multiline.factor
+++ b/basis/multiline/multiline.factor
@ -20,10 +20,10 @@ PRIVATE>
    [ (parse-here) ] "" make but-last
    lexer get next-line ;

-: STRING:
+SYNTAX: STRING:
    CREATE-WORD
    parse-here 1quotation
-    (( -- string )) define-inline ; parsing
+    (( -- string )) define-inline ;

 <PRIVATE

@ -48,16 +48,16 @@ PRIVATE>
        change-column drop
    ] "" make ;

-: <"
-    "\">" parse-multiline-string parsed ; parsing
+SYNTAX: <"
+    "\">" parse-multiline-string parsed ;

-: <'
-    "'>" parse-multiline-string parsed ; parsing
+SYNTAX: <'
+    "'>" parse-multiline-string parsed ;

-: {'
-    "'}" parse-multiline-string parsed ; parsing
+SYNTAX: {'
+    "'}" parse-multiline-string parsed ;

-: {"
-    "\"}" parse-multiline-string parsed ; parsing
+SYNTAX: {"
+    "\"}" parse-multiline-string parsed ;

-: /* "*/" parse-multiline-string drop ; parsing
+SYNTAX: /* "*/" parse-multiline-string drop ;
--- a/basis/nibble-arrays/nibble-arrays.factor
+++ b/basis/nibble-arrays/nibble-arrays.factor
@ -62,7 +62,7 @@ M: nibble-array resize

 M: nibble-array byte-length length nibbles>bytes ;

-: N{ \ } [ >nibble-array ] parse-literal ; parsing
+SYNTAX: N{ \ } [ >nibble-array ] parse-literal ;

 INSTANCE: nibble-array sequence

--- a/basis/opengl/gl/extensions/extensions.factor
+++ b/basis/opengl/gl/extensions/extensions.factor
@ -47,7 +47,7 @@ reset-gl-function-number-counter
    parameters return parse-arglist [ abi indirect-quot ] dip
    define-declared ;

-: GL-FUNCTION:
+SYNTAX: GL-FUNCTION:
    gl-function-calling-convention
    scan
    scan dup
@ -55,5 +55,4 @@ reset-gl-function-number-counter
    gl-function-number
    [ gl-function-pointer ] 2curry swap
    ";" parse-tokens [ "()" subseq? not ] filter
-    define-indirect
-    ; parsing
+    define-indirect ;
--- a/basis/openssl/libssl/libssl.factor
+++ b/basis/openssl/libssl/libssl.factor
@ -279,12 +279,12 @@ H{ } clone verify-messages set-global

 : verify-message ( n -- word ) verify-messages get-global at ;

-: X509_V_:
+SYNTAX: X509_V_:
    scan "X509_V_" prepend create-in
    scan-word
    [ 1quotation (( -- value )) define-inline ]
    [ verify-messages get set-at ]
-    2bi ; parsing
+    2bi ;

 >>

--- a/basis/peg/ebnf/ebnf.factor
+++ b/basis/peg/ebnf/ebnf.factor
@ -34,9 +34,9 @@ TUPLE: tokenizer any one many ;
 : reset-tokenizer ( -- )
  default-tokenizer \ tokenizer set-global ;

-: TOKENIZER: 
+SYNTAX: TOKENIZER: 
  scan search [ "Tokenizer not found" throw ] unless*
-  execute( -- tokenizer ) \ tokenizer set-global ; parsing
+  execute( -- tokenizer ) \ tokenizer set-global ;

 TUPLE: ebnf-non-terminal symbol ;
 TUPLE: ebnf-terminal symbol ;
@ -522,16 +522,14 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
  parse-ebnf dup dup parser [ main swap at compile ] with-variable
  [ compiled-parse ] curry [ with-scope ast>> ] curry ;

-: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at  
-  parsed reset-tokenizer ; parsing
+SYNTAX: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at  
+  parsed reset-tokenizer ;

-: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip 
-  parsed \ call parsed reset-tokenizer ; parsing
+SYNTAX: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip 
+  parsed \ call parsed reset-tokenizer ;

-: EBNF: 
+SYNTAX: EBNF: 
  reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string  
  ebnf>quot swapd (( input -- ast )) define-declared "ebnf-parser" set-word-prop 
-  reset-tokenizer ; parsing
-
-
+  reset-tokenizer ;

--- a/basis/peg/peg.factor
+++ b/basis/peg/peg.factor
@ -616,7 +616,7 @@ PRIVATE>

 ERROR: parse-failed input word ;

-: PEG:
+SYNTAX: PEG:
  (:)
  [let | def [ ] word [ ] |
    [
@ -630,7 +630,7 @@ ERROR: parse-failed input word ;
        ]
      ] with-compilation-unit
    ] over push-all
-  ] ; parsing
+  ] ;

 USING: vocabs vocabs.loader ;

--- a/basis/persistent/hashtables/hashtables.factor
+++ b/basis/persistent/hashtables/hashtables.factor
@ -48,7 +48,7 @@ M: persistent-hash hashcode* nip assoc-size ;

 M: persistent-hash clone ;

-: PH{ \ } [ >persistent-hash ] parse-literal ; parsing
+SYNTAX: PH{ \ } [ >persistent-hash ] parse-literal ;

 M: persistent-hash pprint-delims drop \ PH{ \ } ;
 M: persistent-hash >pprint-sequence >alist ;
--- a/basis/persistent/vectors/vectors.factor
+++ b/basis/persistent/vectors/vectors.factor
@ -179,7 +179,7 @@ M: persistent-vector equal?
 : >persistent-vector ( seq -- pvec )
    T{ persistent-vector } like ;

-: PV{ \ } [ >persistent-vector ] parse-literal ; parsing
+SYNTAX: PV{ \ } [ >persistent-vector ] parse-literal ;

 M: persistent-vector pprint-delims drop \ PV{ \ } ;
 M: persistent-vector >pprint-sequence ;
--- a/basis/prettyprint/prettyprint-docs.factor
+++ b/basis/prettyprint/prettyprint-docs.factor
@ -96,12 +96,12 @@ ARTICLE: "prettyprint-literal" "Literal prettyprinting protocol"
 { $code
    "TUPLE: rect w h ;"
    ""
-    ": RECT["
+    "SYNTAX: RECT["
    "    scan-word"
    "    scan-word \\ * assert="
    "    scan-word"
    "    scan-word \\ ] assert="
-    "    <rect> parsed ; parsing"
+    "    <rect> parsed ;"
 }
 "An example literal might be:"
 { $code "RECT[ 100 * 200 ]" }
--- a/basis/quoting/quoting-docs.factor
+++ b/basis/quoting/quoting-docs.factor
@ -1,32 +0,0 @@
-! Copyright (C) 2009 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: help.markup help.syntax strings ;
-IN: quoting
-
-HELP: quote?
-{ $values
-     { "ch" "a character" }
-     { "?" "a boolean" }
-}
-{ $description "Returns true if the character is a single or double quote." } ;
-
-HELP: quoted?
-{ $values
-     { "str" string }
-     { "?" "a boolean" }
-}
-{ $description "Returns true if a string is surrounded by matching single or double quotes as the first and last characters." } ;
-
-HELP: unquote
-{ $values
-     { "str" string }
-     { "newstr" string }
-}
-{ $description "Removes a pair of matching single or double quotes from a string." } ;
-
-ARTICLE: "quoting" "Quotation marks"
-"The " { $vocab-link "quoting" } " vocabulary is for removing quotes from a string." $nl
-"Removing quotes:"
-{ $subsection unquote } ;
-
-ABOUT: "quoting"
--- a/basis/quoting/quoting-tests.factor
+++ b/basis/quoting/quoting-tests.factor
@ -1,10 +0,0 @@
-! Copyright (C) 2009 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: tools.test quoting ;
-IN: quoting.tests
-
-
-[ "abc" ] [ "'abc'" unquote ] unit-test
-[ "abc" ] [ "\"abc\"" unquote ] unit-test
-[ "'abc" ] [ "'abc" unquote ] unit-test
-[ "abc'" ] [ "abc'" unquote ] unit-test
--- a/basis/quoting/quoting.factor
+++ b/basis/quoting/quoting.factor
@ -1,6 +1,6 @@
 ! Copyright (C) 2009 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: combinators.short-circuit kernel math sequences strings ;
+USING: sequences math kernel strings combinators.short-circuit ;
 IN: quoting

 : quote? ( ch -- ? ) "'\"" member? ;
@ -13,4 +13,4 @@ IN: quoting
    } 1&& ;

 : unquote ( str -- newstr )
-    dup quoted? [ but-last-slice rest-slice >string ] when ;
+    dup quoted? [ but-last-slice rest-slice >string ] when ;
--- a/basis/regexp/authors.txt
+++ b/basis/regexp/authors.txt
@ -1 +1,2 @@
 Doug Coleman
+Daniel Ehrenberg
--- a/basis/regexp/classes/classes.factor
+++ b/basis/regexp/classes/classes.factor
@ -108,21 +108,24 @@ M: terminator-class class-member? ( obj class -- ? )

 M: f class-member? 2drop f ;

+: same? ( obj1 obj2 quot1: ( obj1 -- val1 ) quot2: ( obj2 -- val2 ) -- ? )
+    bi* = ; inline
+
 M: script-class class-member?
-    [ script-of ] [ script>> ] bi* = ;
+    [ script-of ] [ script>> ] same? ;

 M: category-class class-member?
-    [ category# ] [ category>> ] bi* = ;
+    [ category ] [ category>> ] same? ;

 M: category-range-class class-member?
-    [ category first ] [ category>> ] bi* = ;
+    [ category first ] [ category>> ] same? ;

 TUPLE: not-class class ;

 PREDICATE: not-integer < not-class class>> integer? ;

 UNION: simple-class
-    primitive-class range-class category-class category-range-class dot ;
+    primitive-class range-class dot ;
 PREDICATE: not-simple < not-class class>> simple-class? ;

 M: not-class class-member?
@ -227,7 +230,10 @@ TUPLE: class-partition integers not-integers simples not-simples and or other ;
    dup or-class flatten partition-classes
    dup not-integers>> length {
        { 0 [ nip make-or-class ] }
-        { 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] }
+        { 1 [
+            not-integers>> first
+            [ class>> '[ _ swap class-member? ] any? ] keep or
+        ] }
        [ 3drop t ]
    } case ;

@ -248,6 +254,12 @@ M: or-class <not-class>
 M: t <not-class> drop f ;
 M: f <not-class> drop t ;

+: <minus-class> ( a b -- a-b )
+    <not-class> 2array <and-class> ;
+
+: <sym-diff-class> ( a b -- a~b )
+    2array [ <or-class> ] [ <and-class> ] bi <minus-class> ;
+
 M: primitive-class class-member?
    class>> class-member? ;

--- a/basis/regexp/parser/parser-tests.factor
+++ b/basis/regexp/parser/parser-tests.factor
@ -11,7 +11,7 @@ IN: regexp.parser.tests
    "a|b" "a.b" "a|b|c" "abc|b" "a|bcd" "a|(b)" "(?-i:a)" "||"
    "(a)|b" "(a|b)" "((a)|(b))" "(?:a)" "(?i:a)" "|b" "b|"
    "[abc]" "[a-c]" "[^a-c]" "[^]]" "[]a]" "[[]" "[]-a]" "[a-]" "[-]"
-    "[--a]" "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
+    "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
    "(foo){2}" "{2,3}" "{," "{,}" "}" "foo}" "[^]-a]" "[^-]a]"
    "[a-]" "[^a-]" "[^a-]" "a{,2}" "(?#foobar)"
    "\\p{Space}" "\\t" "\\[" "[\\]]" "\\P{Space}"
--- a/basis/regexp/parser/parser.factor
+++ b/basis/regexp/parser/parser.factor
@ -2,7 +2,8 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: peg.ebnf kernel math.parser sequences assocs arrays fry math
 combinators regexp.classes strings splitting peg locals accessors
-regexp.ast unicode.case ;
+regexp.ast unicode.case unicode.script.private unicode.categories
+memoize interval-maps sets unicode.data combinators.short-circuit ;
 IN: regexp.parser

 : allowed-char? ( ch -- ? )
@ -18,15 +19,41 @@ ERROR: bad-number ;

 ERROR: bad-class name ;

+: simple ( str -- simple )
+    ! Alternatively, first collation key level?
+    >case-fold [ " \t_" member? not ] filter ;
+
+: simple-table ( seq -- table )
+    [ [ simple ] keep ] H{ } map>assoc ;
+
+MEMO: simple-script-table ( -- table )
+    script-table interval-values prune simple-table ;
+
+MEMO: simple-category-table ( -- table )
+    categories simple-table ;
+
 : parse-unicode-class ( name -- class )
-    ! Implement this!
-    drop f ;
+    {
+        { [ dup { [ length 1 = ] [ first "clmnpsz" member? ] } 1&& ] [
+            >upper first
+            <category-range-class>
+        ] }
+        { [ dup >title categories member? ] [
+            simple-category-table at <category-class>
+        ] }
+        { [ "script=" ?head ] [
+            dup simple-script-table at
+            [ <script-class> ]
+            [ "script=" prepend bad-class ] ?if
+        ] }
+        [ bad-class ]
+    } cond ;

 : unicode-class ( name -- class )
    dup parse-unicode-class [ ] [ bad-class ] ?if ;

 : name>class ( name -- class )
-    >string >case-fold {
+    >string simple {
        { "lower" letter-class }
        { "upper" LETTER-class }
        { "alpha" Letter-class }
@ -121,19 +148,29 @@ Character = EscapeSequence
          | "^" => [[ ^ <tagged-epsilon> ]]
          | . ?[ allowed-char? ]?

-AnyRangeCharacter = EscapeSequence | .
+AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)

 RangeCharacter = !("]") AnyRangeCharacter

-Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
+Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
      | RangeCharacter

-StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
+StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
           | AnyRangeCharacter

 Ranges = StartRange:s Range*:r => [[ r s prefix ]]

-CharClass = "^"?:n Ranges:e => [[ e n char-class ]]
+BasicCharClass =  "^"?:n Ranges:e => [[ e n char-class ]]
+
+CharClass = BasicCharClass:b "&&" CharClass:c
+                => [[ b c 2array <and-class> ]]
+          | BasicCharClass:b "||" CharClass:c
+                => [[ b c 2array <or-class> ]]
+          | BasicCharClass:b "~~" CharClass:c
+                => [[ b c <sym-diff-class> ]]
+          | BasicCharClass:b "--" CharClass:c
+                => [[ b c <minus-class> ]]
+          | BasicCharClass

 Options = [idmsux]*

--- a/basis/regexp/regexp-docs.factor
+++ b/basis/regexp/regexp-docs.factor
@ -45,11 +45,11 @@ ARTICLE: { "regexp" "construction" } "Constructing regular expressions"
 ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
 "Regexp syntax is largely compatible with Perl, Java and extended POSIX regexps, but not completely. Below, the syntax is documented."
 { $heading "Characters" }
-"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } "for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
+"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } " for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
 { $heading "Concatenation, alternation and grouping" }
-"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for gropuing. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
+"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for grouping. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
 { $heading "Character classes" }
-"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a carat, as in " { $snippet "[^a]" } " which matches all characters which are not a."
+"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a caret, as in " { $snippet "[^a]" } " which matches all characters which are not a."
 { $heading "Predefined character classes" }
 "Several character classes are predefined, both for convenience and because they are too large to represent directly. In Factor regular expressions, all character classes are Unicode-aware."
 { $table
@ -72,8 +72,12 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
    { { $snippet "\\p{blank}" } "Non-newline whitespace" }
    { { $snippet "\\p{cntrl}" } "Control character" }
    { { $snippet "\\p{space}" } "Whitespace" }
-    { { $snippet "\\p{xdigit}" } "Hexidecimal digit" } } ! In the future: Unicode
-"Full unicode properties are not yet supported."
+    { { $snippet "\\p{xdigit}" } "Hexadecimal digit" }
+    { { $snippet "\\p{Nd}" } "Character in Unicode category Nd" } 
+    { { $snippet "\\p{Z}" } "Character in Unicode category beginning with Z" } 
+    { { $snippet "\\p{script=Cham}" } "Character in the Cham writing system" } }
+{ $heading "Character class operations" }
+"Character classes can be composed using four binary operations: " { $snippet "|| && ~~ --" } ". These do the operations union, intersection, symmetric difference and difference, respectively. For example, characters which are lower-case but not Latin script could be matched as " { $snippet "[\\p{lower}--\\p{script=latin}]" } ". These operations are right-associative, and " { $snippet "^" } " binds tighter than them. There is no syntax for grouping."
 { $heading "Boundaries" }
 "Special operators exist to match certain points in the string. These are called 'zero-width' because they do not consume any characters."
 { $table
@ -105,9 +109,18 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
 { $heading "Quotation" }
 "To make it convenient to have a long string which uses regexp operators, a special syntax is provided. If a substring begins with " { $snippet "\\Q" } " then everything until " { $snippet "\\E" } " is quoted (escaped). For example, " { $snippet "R/ \\Qfoo\\bar|baz()\\E/" } " matches exactly the string " { $snippet "\"foo\\bar|baz()\"" } "."
 { $heading "Unsupported features" }
-"One missing feature is backreferences. This is because of a design decision to allow only regular expressions following the formal theory of regular languages. For more information, see " { $link { "regexp" "theory" } } ". You can create a new regular expression to match a particular string using " { $vocab-link "regexp.combinators" } " and group capture is available to extract parts of a regular expression match." $nl
-"Another feature is Perl's " { $snippet "\\G" } " syntax, which references the previous match, is not included. This is because that sequence is inherently stateful, and Factor regexps don't hold state." $nl
-"None of the operations which embed code into a regexp are supported, as this would require the inclusion of the Factor parser and compiler in any application which wants to expose regexps to the user. None of the casing operations are included of Perl like \\L, for simplicity." ; ! Also describe syntax, from the beginning
+{ $subheading "Group capture" }
+{ $subheading "Reluctant and posessive quantifiers" }
+{ $subheading "Backreferences" }
+"Backreferences were omitted because of a design decision to allow only regular expressions following the formal theory of regular languages. For more information, see " { $link { "regexp" "theory" } } "."
+$nl
+"To work around the lack of backreferences, consider using group capture and then creating a new regular expression to match the captured string using " { $vocab-link "regexp.combinators" } "."
+{ $subheading "Previous match" }
+"Another feature that is not included is Perl's " { $snippet "\\G" } " syntax, which references the previous match. This is because that sequence is inherently stateful, and Factor regexps don't hold state."
+{ $subheading "Embedding code" }
+"Operations which embed code into a regexp are not supported. This would require the inclusion of the Factor parser and compiler in any deployed application which wants to expose regexps to the user, leading to an undesirable increase in the code size."
+{ $heading "Casing operations" }
+"No special casing operations are included, for example Perl's " { $snippet "\\L" } "." ;

 ARTICLE: { "regexp" "options" } "Regular expression options"
 "When " { $link { "regexp" "construction" } } ", various options can be provided. Options have single-character names. A string of options has one of the following two forms:"
@ -150,7 +163,7 @@ ARTICLE: { "regexp" "theory" } "The theory of regular expressions"
 "This implies, by DeMorgan's law, that, if you have two regular languages, their intersection is also regular. That is, for any two regular expressions, there exists a regular expression which matches strings that match both inputs." $nl
 "Traditionally, regular expressions on computer support an additional operation: backreferences. For example, the Perl regexp " { $snippet "/(.*)$1/" } " matches a string repated twice. If a backreference refers to a string with a predetermined maximum length, then the resulting language is still regular." $nl
 "But, if not, the language is not regular. There is strong evidence that there is no efficient way to parse with backreferences in the general case. Perl uses a naive backtracking algorithm which has pathological behavior in some cases, taking exponential time to match even if backreferences aren't used. Additionally, expressions with backreferences don't have the properties with negation and intersection described above." $nl
-"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ;
+"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex use the same algorithm." ;

 ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions"
 "Testing if a string matches a regular expression:"
--- a/basis/regexp/regexp-tests.factor
+++ b/basis/regexp/regexp-tests.factor
@ -480,3 +480,57 @@ IN: regexp-tests
 [ f ] [ "a\r" R/ a$./mds matches? ] unit-test
 [ t ] [ "a\n" R/ a$./ms matches? ] unit-test
 [ t ] [ "a\n" R/ a$./mds matches? ] unit-test
+
+! Unicode categories
+[ t ] [ "a" R/ \p{L}/ matches? ] unit-test
+[ t ] [ "A" R/ \p{L}/ matches? ] unit-test
+[ f ] [ " " R/ \p{L}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{L}/ matches? ] unit-test
+[ f ] [ "A" R/ \P{L}/ matches? ] unit-test
+[ t ] [ " " R/ \P{L}/ matches? ] unit-test
+
+[ t ] [ "a" R/ \p{Ll}/ matches? ] unit-test
+[ f ] [ "A" R/ \p{Ll}/ matches? ] unit-test
+[ f ] [ " " R/ \p{Ll}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{Ll}/ matches? ] unit-test
+[ t ] [ "A" R/ \P{Ll}/ matches? ] unit-test
+[ t ] [ " " R/ \P{Ll}/ matches? ] unit-test
+
+[ t ] [ "a" R/ \p{script=Latin}/ matches? ] unit-test
+[ f ] [ " " R/ \p{script=Latin}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{script=Latin}/ matches? ] unit-test
+[ t ] [ " " R/ \P{script=Latin}/ matches? ] unit-test
+
+! These should be case-insensitive
+[ f ] [ " " R/ \p{l}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{l}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{ll}/ matches? ] unit-test
+[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
+[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
+
+! Logical operators
+[ t ] [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
+[ t ] [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
+[ t ] [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
+
+[ t ] [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
+[ t ] [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
+[ t ] [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
+
+[ t ] [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
+[ f ] [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
+[ f ] [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
+
+[ f ] [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
+[ t ] [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
+[ t ] [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
+
+[ f ] [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
+[ f ] [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
+[ t ] [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
--- a/basis/regexp/regexp.factor
+++ b/basis/regexp/regexp.factor
@ -204,17 +204,17 @@ PRIVATE>

 PRIVATE>

-: R! CHAR: ! parsing-regexp ; parsing
-: R" CHAR: " parsing-regexp ; parsing
-: R# CHAR: # parsing-regexp ; parsing
-: R' CHAR: ' parsing-regexp ; parsing
-: R( CHAR: ) parsing-regexp ; parsing
-: R/ CHAR: / parsing-regexp ; parsing
-: R@ CHAR: @ parsing-regexp ; parsing
-: R[ CHAR: ] parsing-regexp ; parsing
-: R` CHAR: ` parsing-regexp ; parsing
-: R{ CHAR: } parsing-regexp ; parsing
-: R| CHAR: | parsing-regexp ; parsing
+SYNTAX: R! CHAR: ! parsing-regexp ;
+SYNTAX: R" CHAR: " parsing-regexp ;
+SYNTAX: R# CHAR: # parsing-regexp ;
+SYNTAX: R' CHAR: ' parsing-regexp ;
+SYNTAX: R( CHAR: ) parsing-regexp ;
+SYNTAX: R/ CHAR: / parsing-regexp ;
+SYNTAX: R@ CHAR: @ parsing-regexp ;
+SYNTAX: R[ CHAR: ] parsing-regexp ;
+SYNTAX: R` CHAR: ` parsing-regexp ;
+SYNTAX: R{ CHAR: } parsing-regexp ;
+SYNTAX: R| CHAR: | parsing-regexp ;

 USING: vocabs vocabs.loader ;

--- a/basis/roman/roman.factor
+++ b/basis/roman/roman.factor
@ -74,4 +74,4 @@ PRIVATE>
 : roman/mod ( str1 str2 -- str3 str4 )
    [ /mod ] binary-roman-op [ >roman ] dip ;

-: ROMAN: scan roman> parsed ; parsing
+SYNTAX: ROMAN: scan roman> parsed ;
--- a/basis/see/see.factor
+++ b/basis/see/see.factor
@ -93,7 +93,6 @@ M: object declarations. drop ;

 M: word declarations.
    {
-        POSTPONE: parsing
        POSTPONE: delimiter
        POSTPONE: inline
        POSTPONE: recursive
--- a/basis/shuffle/shuffle.factor
+++ b/basis/shuffle/shuffle.factor
@ -19,8 +19,8 @@ MACRO: shuffle-effect ( effect -- )
        [ [ at \ swap \ nth [ ] 3sequence ] curry map , \ cleave , ] 2bi
    ] [ ] make ;

-: shuffle(
-    ")" parse-effect parsed \ shuffle-effect parsed ; parsing
+SYNTAX: shuffle(
+    ")" parse-effect parsed \ shuffle-effect parsed ;

 : 2swap ( x y z t -- z t x y ) 2 2 mnswap ; inline

--- a/basis/simple-flat-file/simple-flat-file-docs.factor
+++ b/basis/simple-flat-file/simple-flat-file-docs.factor
@ -1,8 +1,24 @@
-USING: help.syntax help.markup strings ;
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.syntax help.markup strings biassocs arrays ;
 IN: simple-flat-file

 ABOUT: "simple-flat-file"

 ARTICLE: "simple-flat-file" "Parsing simple flat files"
-"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding tasks."
-{ $subsection flat-file>biassoc } ;
+"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding and Unicode tasks."
+{ $subsection flat-file>biassoc }
+{ $subsection load-interval-file }
+{ $subsection data } ;
+
+HELP: load-interval-file
+{ $values { "filename" string } { "table" "an interval map" } }
+{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ;
+
+HELP: data
+{ $values { "filename" string } { "data" array } }
+{ $description "This loads a file that's delineated by semicolons and lines, returning an array of lines, where each line is an array split by the semicolons, with whitespace trimmed off." } ;
+
+HELP: flat-file>biassoc
+{ $values { "filename" string } { "biassoc" biassoc } }
+{ $description "This loads a flat file, in the form that many encoding resource files are in, with two columns of numeric data in hex, and returns a biassoc associating them." } ;
--- a/basis/simple-flat-file/simple-flat-file.factor
+++ b/basis/simple-flat-file/simple-flat-file.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: sequences splitting kernel math.parser io.files io.encodings.utf8
-biassocs ascii ;
+biassocs ascii namespaces arrays make assocs interval-maps sets ;
 IN: simple-flat-file

 : drop-comments ( seq -- newseq )
@ -30,3 +30,25 @@ IN: simple-flat-file

 : data ( filename -- data )
    utf8 file-lines drop-comments [ split-; ] map ;
+
+SYMBOL: interned
+
+: range, ( value key -- )
+    swap interned get
+    [ = ] with find nip 2array , ;
+
+: expand-ranges ( assoc -- interval-map )
+    [
+        [
+            swap CHAR: . over member? [
+                ".." split1 [ hex> ] bi@ 2array
+            ] [ hex> ] if range,
+        ] assoc-each
+    ] { } make <interval-map> ;
+
+: process-interval-file ( ranges -- table )
+    dup values prune interned
+    [ expand-ranges ] with-variable ;
+
+: load-interval-file ( filename -- table )
+    data process-interval-file ;
--- a/basis/specialized-arrays/functor/functor.factor
+++ b/basis/specialized-arrays/functor/functor.factor
@ -70,7 +70,7 @@ M: A >pprint-sequence ;

 M: A pprint* pprint-object ;

-: A{ \ } [ >A ] parse-literal ; parsing
+SYNTAX: A{ \ } [ >A ] parse-literal ;

 INSTANCE: A sequence

--- a/basis/specialized-vectors/functor/functor.factor
+++ b/basis/specialized-vectors/functor/functor.factor
@ -39,7 +39,7 @@ M: V >pprint-sequence ;

 M: V pprint* pprint-object ;

-: V{ \ } [ >V ] parse-literal ; parsing
+SYNTAX: V{ \ } [ >V ] parse-literal ;

 INSTANCE: V growable

--- a/basis/stack-checker/transforms/transforms-tests.factor
+++ b/basis/stack-checker/transforms/transforms-tests.factor
@ -65,4 +65,9 @@ DEFER: curry-folding-test ( quot -- )

 { 3 0 } [ [ 1 2 3 ] curry-folding-test ] must-infer-as
 { 3 0 } [ 1 [ 2 3 ] curry curry-folding-test ] must-infer-as
-{ 3 0 } [ [ 1 2 ] 3 [ ] curry compose curry-folding-test ] must-infer-as
+{ 3 0 } [ [ 1 2 ] 3 [ ] curry compose curry-folding-test ] must-infer-as
+
+: member?-test ( a -- ? ) { 1 2 3 10 7 58 } member? ;
+
+[ f ] [ 1.0 member?-test ] unit-test
+[ t ] [ \ member?-test def>> first [ member?-test ] all? ] unit-test
--- a/basis/stack-checker/transforms/transforms.factor
+++ b/basis/stack-checker/transforms/transforms.factor
@ -1,12 +1,12 @@
 ! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: fry accessors arrays kernel kernel.private combinators.private
-words sequences generic math namespaces make quotations assocs
-combinators classes.tuple classes.tuple.private effects summary
-hashtables classes generic sets definitions generic.standard
-slots.private continuations locals generalizations
-stack-checker.backend stack-checker.state stack-checker.visitor
-stack-checker.errors stack-checker.values
+words sequences generic math math.order namespaces make quotations assocs
+combinators combinators.short-circuit classes.tuple
+classes.tuple.private effects summary hashtables classes generic sets
+definitions generic.standard slots.private continuations locals
+generalizations stack-checker.backend stack-checker.state
+stack-checker.visitor stack-checker.errors stack-checker.values
 stack-checker.recursive-state ;
 IN: stack-checker.transforms

@ -107,36 +107,28 @@ IN: stack-checker.transforms
 ] 1 define-transform

 ! Membership testing
-CONSTANT: bit-member-n 256
+CONSTANT: bit-member-max 256

 : bit-member? ( seq -- ? )
    #! Can we use a fast byte array test here?
    {
-        { [ dup length 8 < ] [ f ] }
-        { [ dup [ integer? not ] any? ] [ f ] }
-        { [ dup [ 0 < ] any? ] [ f ] }
-        { [ dup [ bit-member-n >= ] any? ] [ f ] }
-        [ t ]
-    } cond nip ;
+        [ length 4 > ]
+        [ [ integer? ] all? ]
+        [ [ 0 bit-member-max between? ] any? ]
+    } 1&& ;

 : bit-member-seq ( seq -- flags )
-    bit-member-n swap [ member? 1 0 ? ] curry B{ } map-as ;
-
-: exact-float? ( f -- ? )
-    dup float? [ dup >integer >float = ] [ drop f ] if ; inline
+    [ supremum 1+ ] keep '[ _ member? 1 0 ? ] B{ } map-as ;

 : bit-member-quot ( seq -- newquot )
-    [
-        bit-member-seq ,
-        [
-            {
-                { [ over fixnum? ] [ ?nth 1 eq? ] }
-                { [ over bignum? ] [ ?nth 1 eq? ] }
-                { [ over exact-float? ] [ ?nth 1 eq? ] }
-                [ 2drop f ]
-            } cond
-        ] %
-    ] [ ] make ;
+    bit-member-seq
+    '[
+        _ {
+            { [ over fixnum? ] [ ?nth 1 eq? ] }
+            { [ over bignum? ] [ ?nth 1 eq? ] }
+            [ 2drop f ]
+        } cond
+    ] ;

 : member-quot ( seq -- newquot )
    dup bit-member? [
--- a/basis/suffix-arrays/suffix-arrays.factor
+++ b/basis/suffix-arrays/suffix-arrays.factor
@ -32,7 +32,7 @@ PRIVATE>
 : >suffix-array ( seq -- array )
    [ suffixes ] map concat natural-sort ;

-: SA{ \ } [ >suffix-array ] parse-literal ; parsing
+SYNTAX: SA{ \ } [ >suffix-array ] parse-literal ;

 : query ( begin suffix-array -- matches )
    2dup find-index dup
--- a/basis/tools/disassembler/disassembler.factor
+++ b/basis/tools/disassembler/disassembler.factor
@ -18,7 +18,7 @@ M: word disassemble word-xt 2array disassemble ;

 M: method-spec disassemble first2 method disassemble ;

-cpu x86? os unix? and
+cpu x86?
 "tools.disassembler.udis"
 "tools.disassembler.gdb" ?
 require
--- a/basis/tools/disassembler/udis/udis.factor
+++ b/basis/tools/disassembler/udis/udis.factor
@ -30,9 +30,9 @@ CONSTANT: UD_VENDOR_AMD   0
 CONSTANT: UD_VENDOR_INTEL 1

 FUNCTION: void ud_init ( ud* u ) ;
-FUNCTION: void ud_set_mode ( ud* u, uint8_t mode ) ;
+FUNCTION: void ud_set_mode ( ud* u, uchar mode ) ;
 FUNCTION: void ud_set_pc ( ud* u, ulonglong pc ) ;
-FUNCTION: void ud_set_input_buffer ( ud* u, uint8_t* offset, size_t size ) ;
+FUNCTION: void ud_set_input_buffer ( ud* u, uchar* offset, size_t size ) ;
 FUNCTION: void ud_set_vendor ( ud* u, uint vendor ) ;
 FUNCTION: void ud_set_syntax ( ud* u, void* syntax ) ;
 FUNCTION: void ud_input_skip ( ud* u, size_t size ) ;
--- a/basis/tr/tr.factor
+++ b/basis/tr/tr.factor
@ -40,10 +40,9 @@ M: bad-tr summary

 PRIVATE>

-: TR:
+SYNTAX: TR:
    scan parse-definition
    unclip-last [ unclip-last ] dip compute-tr
    [ check-tr ]
    [ [ create-tr ] dip define-tr ]
    [ [ "-fast" append create-tr ] dip define-fast-tr ] 2tri ;
-    parsing
--- a/basis/unicode/breaks/breaks.factor
+++ b/basis/unicode/breaks/breaks.factor
@ -4,8 +4,9 @@ USING: combinators.short-circuit unicode.categories kernel math
 combinators splitting sequences math.parser io.files io assocs
 arrays namespaces make math.ranges unicode.normalize
 unicode.normalize.private values io.encodings.ascii
-unicode.syntax unicode.data compiler.units fry
-alien.syntax sets accessors interval-maps memoize locals words ;
+unicode.data compiler.units fry unicode.categories.syntax
+alien.syntax sets accessors interval-maps memoize locals words
+simple-flat-file ;
 IN: unicode.breaks

 <PRIVATE
@ -31,9 +32,9 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
        [ drop Control ]
    } case ;

-CATEGORY: (extend) Me Mn ;
-: extend? ( ch -- ? )
-    { [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
+CATEGORY: extend
+    Me Mn |
+    "Other_Grapheme_Extend" property? ;

 : loe? ( ch -- ? )
    "Logical_Order_Exception" property? ;
@ -127,7 +128,7 @@ to: grapheme-table

 VALUE: word-break-table

-"vocab:unicode/data/WordBreakProperty.txt" load-key-value
+"vocab:unicode/data/WordBreakProperty.txt" load-interval-file
 to: word-break-table

 C-ENUM: wOther wCR wLF wNewline wExtend wFormat wKatakana wALetter wMidLetter
--- a/basis/unicode/case/case.factor
+++ b/basis/unicode/case/case.factor
@ -1,8 +1,8 @@
 ! Copyright (C) 2008, 2009 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: unicode.data sequences namespaces
-sbufs make unicode.syntax unicode.normalize math hints
-unicode.categories combinators unicode.syntax assocs combinators.short-circuit
+sbufs make unicode.normalize math hints
+unicode.categories combinators assocs combinators.short-circuit
 strings splitting kernel accessors unicode.breaks fry locals ;
 QUALIFIED: ascii
 IN: unicode.case
--- a/basis/unicode/categories/categories-docs.factor
+++ b/basis/unicode/categories/categories-docs.factor
@ -12,6 +12,9 @@ HELP: Letter
 HELP: alpha
 { $class-description "The class of alphanumeric characters." } ;

+HELP: math
+{ $class-description "The class of Unicode math characters." } ;
+
 HELP: blank
 { $class-description "The class of whitespace characters." } ;

@ -54,6 +57,8 @@ ARTICLE: "unicode.categories" "Character classes"
 { $subsection uncased }
 { $subsection uncased? }
 { $subsection character }
-{ $subsection character? } ;
+{ $subsection character? }
+{ $subsection math }
+{ $subsection math? } ;

 ABOUT: "unicode.categories"
--- a/basis/unicode/categories/categories.factor
+++ b/basis/unicode/categories/categories.factor
@ -1,15 +1,16 @@
 ! Copyright (C) 2008 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: unicode.syntax ;
+USING: unicode.categories.syntax sequences unicode.data ;
 IN: unicode.categories

-CATEGORY: blank Zs Zl Zp \r\n ;
-CATEGORY: letter Ll ;
-CATEGORY: LETTER Lu ;
-CATEGORY: Letter Lu Ll Lt Lm Lo ;
+CATEGORY: blank Zs Zl Zp | "\r\n" member? ;
+CATEGORY: letter Ll | "Other_Lowercase" property? ;
+CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
+CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
 CATEGORY: digit Nd Nl No ;
 CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
-CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No ;
+CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
 CATEGORY: control Cc ;
 CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ; 
 CATEGORY-NOT: character Cn ;
+CATEGORY: math Sm | "Other_Math" property? ;
--- a/basis/unicode/categories/syntax/authors.txt
+++ b/basis/unicode/categories/syntax/authors.txt
--- a/basis/unicode/categories/syntax/summary.txt
+++ b/basis/unicode/categories/syntax/summary.txt
--- a/basis/unicode/categories/syntax/syntax-docs.factor
+++ b/basis/unicode/categories/syntax/syntax-docs.factor
@ -0,0 +1,19 @@
+! Copyright (C) 2008 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.syntax help.markup ;
+IN: unicode.categories.syntax
+
+ABOUT: "unicode.categories.syntax"
+
+ARTICLE: "unicode.categories.syntax" "Unicode category syntax"
+"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code."
+{ $subsection POSTPONE: CATEGORY: }
+{ $subsection POSTPONE: CATEGORY-NOT: } ;
+
+HELP: CATEGORY:
+{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" }
+{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ;
+
+HELP: CATEGORY-NOT:
+{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" }
+{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ;
--- a/basis/unicode/categories/syntax/syntax-tests.factor
+++ b/basis/unicode/categories/syntax/syntax-tests.factor
@ -0,0 +1,3 @@
+! Copyright (C) 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+
--- a/basis/unicode/categories/syntax/syntax.factor
+++ b/basis/unicode/categories/syntax/syntax.factor
@ -0,0 +1,34 @@
+! Copyright (C) 2008, 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: unicode.data kernel math sequences parser unicode.data.private
+bit-arrays namespaces sequences.private arrays classes.parser
+assocs classes.predicate sets fry splitting accessors ;
+IN: unicode.categories.syntax
+
+! For use in CATEGORY:
+SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ;
+
+<PRIVATE
+
+: [category] ( categories code -- quot )
+    '[ dup category# _ member? [ drop t ] _ if ] ;
+
+: integer-predicate-class ( word predicate -- )
+    integer swap define-predicate-class ;
+
+: define-category ( word categories code -- )
+    [category] integer-predicate-class ;
+
+: define-not-category ( word categories code -- )
+    [category] [ not ] compose integer-predicate-class ;
+
+: parse-category ( -- word tokens quot )
+    CREATE-CLASS \ ; parse-until { | } split1
+    [ [ name>> categories-map at ] map ]
+    [ [ [ ] like ] [ [ drop f ] ] if* ] bi* ;
+
+PRIVATE>
+
+SYNTAX: CATEGORY: parse-category define-category ;
+
+SYNTAX: CATEGORY-NOT: parse-category define-not-category ;
--- a/basis/unicode/categories/syntax/tags.txt
+++ b/basis/unicode/categories/syntax/tags.txt
--- a/basis/unicode/collation/collation.factor
+++ b/basis/unicode/collation/collation.factor
@ -4,7 +4,7 @@ USING: combinators.short-circuit sequences io.files
 io.encodings.ascii kernel values splitting accessors math.parser
 ascii io assocs strings math namespaces make sorting combinators
 math.order arrays unicode.normalize unicode.data locals
-unicode.syntax macros sequences.deep words unicode.breaks
+macros sequences.deep words unicode.breaks
 quotations combinators.short-circuit simple-flat-file ;
 IN: unicode.collation

--- a/basis/unicode/data/data-docs.factor
+++ b/basis/unicode/data/data-docs.factor
@ -6,7 +6,7 @@ IN: unicode.data
 ABOUT: "unicode.data"

 ARTICLE: "unicode.data" "Unicode data tables"
-"The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files."
+"The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files. The following words access these data tables."
 { $subsection canonical-entry }
 { $subsection combine-chars }
 { $subsection combining-class }
@ -14,7 +14,11 @@ ARTICLE: "unicode.data" "Unicode data tables"
 { $subsection name>char }
 { $subsection char>name }
 { $subsection property? }
-{ $subsection load-key-value } ;
+{ $subsection category }
+{ $subsection ch>upper }
+{ $subsection ch>lower } 
+{ $subsection ch>title } 
+{ $subsection special-case } ;

 HELP: canonical-entry
 { $values { "char" "a code point" } { "seq" string } }
@ -48,6 +52,22 @@ HELP: property?
 { $values { "char" "a code point" } { "property" string } { "?" "a boolean" } }
 { $description "Tests whether the code point is listed under the given property in PropList.txt in the Unicode Character Database." } ;

-HELP: load-key-value
-{ $values { "filename" string } { "table" "an interval map" } }
-{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ;
+HELP: category
+{ $values { "char" "a code point" } { "category" string } }
+{ $description "Returns the general category of a code point, in the form of a string. This will always be a string within the ASCII range of length two. If the code point is unassigned, then it returns " { $snippet "Cn" } "." } ;
+
+HELP: ch>upper
+{ $values { "ch" "a code point" } { "upper" "a code point" } }
+{ $description "Returns the simple upper-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
+
+HELP: ch>lower
+{ $values { "ch" "a code point" } { "lower" "a code point" } }
+{ $description "Returns the simple lower-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
+
+HELP: ch>title
+{ $values { "ch" "a code point" } { "title" "a code point" } }
+{ $description "Returns the simple title-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
+
+HELP: special-case
+{ $values { "ch" "a code point" } { "casing-tuple" { "a tuple, or " { $link f } } } }
+{ $description "If a code point has special casing behavior, returns a tuple which represents that information." } ;
--- a/basis/unicode/data/data.factor
+++ b/basis/unicode/data/data.factor
@ -58,7 +58,7 @@ CONSTANT: num-chars HEX: 2FA1E

 PRIVATE>

-: category# ( char -- category )
+: category# ( char -- n )
    ! There are a few characters that should be Cn
    ! that this gives Cf or Mn
    ! Cf = 26; Mn = 5; Cn = 29
@ -219,27 +219,3 @@ load-properties to: properties

 [ name>char [ "Invalid character" throw ] unless* ]
 name>char-hook set-global
-
-SYMBOL: interned
-
-: range, ( value key -- )
-    swap interned get
-    [ = ] with find nip 2array , ;
-
-: expand-ranges ( assoc -- interval-map )
-    [
-        [
-            swap CHAR: . over member? [
-                ".." split1 [ hex> ] bi@ 2array
-            ] [ hex> ] if range,
-        ] assoc-each
-    ] { } make <interval-map> ;
-
-: process-key-value ( ranges -- table )
-    dup values prune interned
-    [ expand-ranges ] with-variable ;
-
-PRIVATE>
-
-: load-key-value ( filename -- table )
-    data process-key-value ;
--- a/basis/unicode/normalize/normalize.factor
+++ b/basis/unicode/normalize/normalize.factor
@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: ascii sequences namespaces make unicode.data kernel math arrays
 locals sorting.insertion accessors assocs math.order combinators
-unicode.syntax strings sbufs hints combinators.short-circuit vectors ;
+strings sbufs hints combinators.short-circuit vectors ;
 IN: unicode.normalize

 <PRIVATE
--- a/basis/unicode/script/script.factor
+++ b/basis/unicode/script/script.factor
@ -1,17 +1,13 @@
 ! Copyright (C) 2008 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors values kernel sequences assocs io.files
-io.encodings ascii math.ranges io splitting math.parser
-namespaces make byte-arrays locals math sets io.encodings.ascii
-words words.symbol compiler.units arrays interval-maps
-unicode.data ;
+USING: values interval-maps simple-flat-file ;
 IN: unicode.script

 <PRIVATE

 VALUE: script-table

-"vocab:unicode/script/Scripts.txt" load-key-value
+"vocab:unicode/script/Scripts.txt" load-interval-file
 to: script-table

 PRIVATE>
--- a/basis/unicode/syntax/syntax.factor
+++ b/basis/unicode/syntax/syntax.factor
@ -1,38 +0,0 @@
-! Copyright (C) 2008 Daniel Ehrenberg.
-! See http://factorcode.org/license.txt for BSD license.
-USING: unicode.data kernel math sequences parser lexer
-bit-arrays namespaces make sequences.private arrays quotations
-assocs classes.predicate math.order strings.parser ;
-IN: unicode.syntax
-
-<PRIVATE
-
-: >category-array ( categories -- bitarray )
-    categories [ swap member? ] with map >bit-array ;
-
-: as-string ( strings -- bit-array )
-    concat unescape-string ;
-
-: [category] ( categories -- quot )
-    [
-        [ [ categories member? not ] filter as-string ] keep 
-        [ categories member? ] filter >category-array
-        [ dup category# ] % , [ nth-unsafe [ drop t ] ] %
-        \ member? 2array >quotation ,
-        \ if ,
-    ] [ ] make ;
-
-: define-category ( word categories -- )
-    [category] integer swap define-predicate-class ;
-
-PRIVATE>
-
-: CATEGORY:
-    CREATE ";" parse-tokens define-category ; parsing
-
-: seq-minus ( seq1 seq2 -- diff )
-    [ member? not ] curry filter ;
-
-: CATEGORY-NOT:
-    CREATE ";" parse-tokens
-    categories swap seq-minus define-category ; parsing
--- a/basis/unicode/unicode-docs.factor
+++ b/basis/unicode/unicode-docs.factor
@ -15,7 +15,7 @@ $nl
 { $vocab-subsection "Word and grapheme breaks" "unicode.breaks" }
 { $vocab-subsection "Unicode normalization" "unicode.normalize" }
 "The following are mostly for internal use:"
-{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
+{ $vocab-subsection "Unicode category syntax" "unicode.categories.syntax" }
 { $vocab-subsection "Unicode data tables" "unicode.data" }
 { $see-also "ascii" "io.encodings" } ;

--- a/basis/urls/urls.factor
+++ b/basis/urls/urls.factor
@ -179,7 +179,7 @@ PRIVATE>
    dup protocol>> '[ _ protocol-port or ] change-port ;

 ! Literal syntax
-: URL" lexer get skip-blank parse-string >url parsed ; parsing
+SYNTAX: URL" lexer get skip-blank parse-string >url parsed ;

 USING: vocabs vocabs.loader ;

--- a/basis/values/values.factor
+++ b/basis/values/values.factor
@ -30,11 +30,11 @@ PREDICATE: value-word < word
        [ second \ obj>> = ]
    } 1&& ;

-: VALUE:
+SYNTAX: VALUE:
    CREATE-WORD
    dup t "no-def-strip" set-word-prop
    T{ value-holder } clone [ obj>> ] curry
-    (( -- value )) define-declared ; parsing
+    (( -- value )) define-declared ;

 M: value-word definer drop \ VALUE: f ;

@ -43,9 +43,9 @@ M: value-word definition drop f ;
 : set-value ( value word -- )
    def>> first (>>obj) ;

-: to:
+SYNTAX: to:
    scan-word literalize parsed
-    \ set-value parsed ; parsing
+    \ set-value parsed ;

 : get-value ( word -- value )
    def>> first obj>> ;
--- a/basis/vlists/vlists.factor
+++ b/basis/vlists/vlists.factor
@ -50,7 +50,7 @@ M: vlist like

 INSTANCE: vlist immutable-sequence

-: VL{ \ } [ >vlist ] parse-literal ; parsing
+SYNTAX: VL{ \ } [ >vlist ] parse-literal ;

 M: vlist pprint-delims drop \ VL{ \ } ;
 M: vlist >pprint-sequence ;
@ -87,7 +87,7 @@ M: valist assoc-like

 INSTANCE: valist assoc

-: VA{ \ } [ >valist ] parse-literal ; parsing
+SYNTAX: VA{ \ } [ >valist ] parse-literal ;

 M: valist pprint-delims drop \ VA{ \ } ;
 M: valist >pprint-sequence >alist ;
--- a/basis/windows/com/syntax/syntax.factor
+++ b/basis/windows/com/syntax/syntax.factor
@ -90,14 +90,13 @@ unless

 PRIVATE>

-: COM-INTERFACE:
+SYNTAX: COM-INTERFACE:
    scan
    scan find-com-interface-definition
    scan string>guid
    parse-com-functions
    <com-interface-definition>
    dup save-com-interface-definition
-    define-words-for-com-interface
-    ; parsing
+    define-words-for-com-interface ;

-: GUID: scan string>guid parsed ; parsing
+SYNTAX: GUID: scan string>guid parsed ;
--- a/basis/xml/char-classes/char-classes.factor
+++ b/basis/xml/char-classes/char-classes.factor
@ -1,19 +1,26 @@
 ! Copyright (C) 2005, 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
-USING: kernel sequences unicode.syntax math math.order combinators
-hints ;
+USING: kernel sequences unicode.categories.syntax math math.order
+combinators hints combinators.short-circuit ;
 IN: xml.char-classes

-CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
-: 1.0name-start? ( char -- ? )
-    dup 1.0name-start*? [ drop t ] 
-    [ HEX: 2BB HEX: 2C1 between? ] if ;
+CATEGORY: 1.0name-start
+    Ll Lu Lo Lt Nl | {
+        [ HEX: 2BB HEX: 2C1 between? ]
+        [ "\u000559\u0006E5\u0006E6_:" member? ]
+    } 1|| ;

-CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
+CATEGORY: 1.0name-char
+    Ll Lu Lo Lt Nl Mc Me Mn Lm Nd |
+    "_-.\u000387:" member? ;

-CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
+CATEGORY: 1.1name-start
+    Ll Lu Lo Lm Nl |
+    "_:" member? ;

-CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
+CATEGORY: 1.1name-char
+    Ll Lu Lo Lm Nl Mc Mn Nd Pc Cf |
+    "_-.\u0000b7:" member? ;

 : name-start? ( 1.0? char -- ? )
    swap [ 1.0name-start? ] [ 1.1name-start? ] if ;
--- a/basis/xml/syntax/syntax.factor
+++ b/basis/xml/syntax/syntax.factor
@ -26,17 +26,17 @@ M: no-tag summary

 PRIVATE>

-: TAGS:
+SYNTAX: TAGS:
    CREATE
    [ H{ } clone "xtable" set-word-prop ]
-    [ define-tags ] bi ; parsing
+    [ define-tags ] bi ;

-: TAG:
-    scan scan-word parse-definition define-tag ; parsing
+SYNTAX: TAG:
+    scan scan-word parse-definition define-tag ;

-: XML-NS:
+SYNTAX: XML-NS:
    CREATE-WORD (( string -- name )) over set-stack-effect
-    scan '[ f swap _ <name> ] define-memoized ; parsing
+    scan '[ f swap _ <name> ] define-memoized ;

 <PRIVATE

@ -168,11 +168,11 @@ MACRO: interpolate-xml ( xml -- quot )

 PRIVATE>

-: <XML
-    "XML>" [ string>doc ] parse-def ; parsing
+SYNTAX: <XML
+    "XML>" [ string>doc ] parse-def ;

-: [XML
-    "XML]" [ string>chunk ] parse-def ; parsing
+SYNTAX: [XML
+    "XML]" [ string>chunk ] parse-def ;

 <PRIVATE

--- a/Show More
+++ b/Show More