Moving unicode.syntax to unicode.categories.syntax; documenting and modifying syntax
parent
f3038f2ae8
commit
62638fb4d3
|
@ -4,7 +4,7 @@ USING: combinators.short-circuit unicode.categories kernel math
|
|||
combinators splitting sequences math.parser io.files io assocs
|
||||
arrays namespaces make math.ranges unicode.normalize
|
||||
unicode.normalize.private values io.encodings.ascii
|
||||
unicode.syntax unicode.data compiler.units fry
|
||||
unicode.data compiler.units fry unicode.categories.syntax
|
||||
alien.syntax sets accessors interval-maps memoize locals words
|
||||
simple-flat-file ;
|
||||
IN: unicode.breaks
|
||||
|
@ -32,9 +32,9 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
|
|||
[ drop Control ]
|
||||
} case ;
|
||||
|
||||
CATEGORY: (extend) Me Mn ;
|
||||
: extend? ( ch -- ? )
|
||||
{ [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
|
||||
CATEGORY: extend
|
||||
Me Mn |
|
||||
"Other_Grapheme_Extend" property? ;
|
||||
|
||||
: loe? ( ch -- ? )
|
||||
"Logical_Order_Exception" property? ;
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: unicode.data sequences namespaces
|
||||
sbufs make unicode.syntax unicode.normalize math hints
|
||||
unicode.categories combinators unicode.syntax assocs combinators.short-circuit
|
||||
sbufs make unicode.normalize math hints
|
||||
unicode.categories combinators assocs combinators.short-circuit
|
||||
strings splitting kernel accessors unicode.breaks fry locals ;
|
||||
QUALIFIED: ascii
|
||||
IN: unicode.case
|
||||
|
|
|
@ -12,6 +12,9 @@ HELP: Letter
|
|||
HELP: alpha
|
||||
{ $class-description "The class of alphanumeric characters." } ;
|
||||
|
||||
HELP: math
|
||||
{ $class-description "The class of Unicode math characters." } ;
|
||||
|
||||
HELP: blank
|
||||
{ $class-description "The class of whitespace characters." } ;
|
||||
|
||||
|
@ -54,6 +57,8 @@ ARTICLE: "unicode.categories" "Character classes"
|
|||
{ $subsection uncased }
|
||||
{ $subsection uncased? }
|
||||
{ $subsection character }
|
||||
{ $subsection character? } ;
|
||||
{ $subsection character? }
|
||||
{ $subsection math }
|
||||
{ $subsection math? } ;
|
||||
|
||||
ABOUT: "unicode.categories"
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: unicode.syntax ;
|
||||
USING: unicode.categories.syntax sequences unicode.data ;
|
||||
IN: unicode.categories
|
||||
|
||||
CATEGORY: blank Zs Zl Zp \r\n ;
|
||||
CATEGORY: letter Ll ;
|
||||
CATEGORY: LETTER Lu ;
|
||||
CATEGORY: Letter Lu Ll Lt Lm Lo ;
|
||||
CATEGORY: blank Zs Zl Zp | "\r\n" member? ;
|
||||
CATEGORY: letter Ll | "Other_Lowercase" property? ;
|
||||
CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
|
||||
CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
|
||||
CATEGORY: digit Nd Nl No ;
|
||||
CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
|
||||
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No ;
|
||||
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
|
||||
CATEGORY: control Cc ;
|
||||
CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ;
|
||||
CATEGORY-NOT: character Cn ;
|
||||
CATEGORY: math Sm | "Other_Math" property? ;
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.syntax help.markup ;
|
||||
IN: unicode.categories.syntax
|
||||
|
||||
ABOUT: "unicode.categories.syntax"
|
||||
|
||||
ARTICLE: "unicode.categories.syntax" "Unicode category syntax"
|
||||
"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code."
|
||||
{ $subsection POSTPONE: CATEGORY: }
|
||||
{ $subsection POSTPONE: CATEGORY-NOT: } ;
|
||||
|
||||
HELP: CATEGORY:
|
||||
{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" }
|
||||
{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ;
|
||||
|
||||
HELP: CATEGORY-NOT:
|
||||
{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" }
|
||||
{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ;
|
|
@ -0,0 +1,3 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: unicode.data kernel math sequences parser
|
||||
bit-arrays namespaces sequences.private arrays classes.parser
|
||||
assocs classes.predicate sets fry splitting accessors ;
|
||||
IN: unicode.categories.syntax
|
||||
|
||||
! For use in CATEGORY:
|
||||
SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: >category-array ( categories -- bitarray )
|
||||
categories [ swap member? ] with map >bit-array ;
|
||||
|
||||
: [category] ( categories code -- quot )
|
||||
[ >category-array ] dip
|
||||
'[ dup category# _ nth-unsafe [ drop t ] _ if ] ;
|
||||
|
||||
: define-category ( word categories code -- )
|
||||
[category] integer swap define-predicate-class ;
|
||||
|
||||
: parse-category ( -- word tokens quot )
|
||||
CREATE-CLASS \ ; parse-until { | } split1
|
||||
[ [ name>> ] map ]
|
||||
[ [ [ ] like ] [ [ drop f ] ] if* ] bi* ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: CATEGORY:
|
||||
parse-category define-category ; parsing
|
||||
|
||||
: CATEGORY-NOT:
|
||||
parse-category
|
||||
[ categories swap diff ] dip
|
||||
define-category ; parsing
|
|
@ -4,7 +4,7 @@ USING: combinators.short-circuit sequences io.files
|
|||
io.encodings.ascii kernel values splitting accessors math.parser
|
||||
ascii io assocs strings math namespaces make sorting combinators
|
||||
math.order arrays unicode.normalize unicode.data locals
|
||||
unicode.syntax macros sequences.deep words unicode.breaks
|
||||
macros sequences.deep words unicode.breaks
|
||||
quotations combinators.short-circuit simple-flat-file ;
|
||||
IN: unicode.collation
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: ascii sequences namespaces make unicode.data kernel math arrays
|
||||
locals sorting.insertion accessors assocs math.order combinators
|
||||
unicode.syntax strings sbufs hints combinators.short-circuit vectors ;
|
||||
strings sbufs hints combinators.short-circuit vectors ;
|
||||
IN: unicode.normalize
|
||||
|
||||
<PRIVATE
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: unicode.data kernel math sequences parser lexer
|
||||
bit-arrays namespaces make sequences.private arrays quotations
|
||||
assocs classes.predicate math.order strings.parser sets ;
|
||||
IN: unicode.syntax
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: >category-array ( categories -- bitarray )
|
||||
categories [ swap member? ] with map >bit-array ;
|
||||
|
||||
: as-string ( strings -- bit-array )
|
||||
concat unescape-string ;
|
||||
|
||||
: [category] ( categories -- quot )
|
||||
[
|
||||
[ [ categories member? not ] filter as-string ] keep
|
||||
[ categories member? ] filter >category-array
|
||||
[ dup category# ] % , [ nth-unsafe [ drop t ] ] %
|
||||
\ member? 2array >quotation ,
|
||||
\ if ,
|
||||
] [ ] make ;
|
||||
|
||||
: define-category ( word categories -- )
|
||||
[category] integer swap define-predicate-class ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: CATEGORY:
|
||||
CREATE ";" parse-tokens define-category ; parsing
|
||||
|
||||
: CATEGORY-NOT:
|
||||
CREATE ";" parse-tokens
|
||||
categories swap diff define-category ; parsing
|
|
@ -15,7 +15,7 @@ $nl
|
|||
{ $vocab-subsection "Word and grapheme breaks" "unicode.breaks" }
|
||||
{ $vocab-subsection "Unicode normalization" "unicode.normalize" }
|
||||
"The following are mostly for internal use:"
|
||||
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
|
||||
{ $vocab-subsection "Unicode category syntax" "unicode.categories.syntax" }
|
||||
{ $vocab-subsection "Unicode data tables" "unicode.data" }
|
||||
{ $see-also "ascii" "io.encodings" } ;
|
||||
|
||||
|
|
|
@ -1,19 +1,26 @@
|
|||
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel sequences unicode.syntax math math.order combinators
|
||||
hints ;
|
||||
USING: kernel sequences unicode.categories.syntax math math.order
|
||||
combinators hints ;
|
||||
IN: xml.char-classes
|
||||
|
||||
CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
|
||||
: 1.0name-start? ( char -- ? )
|
||||
dup 1.0name-start*? [ drop t ]
|
||||
[ HEX: 2BB HEX: 2C1 between? ] if ;
|
||||
CATEGORY: 1.0name-start
|
||||
Ll Lu Lo Lt Nl | {
|
||||
[ HEX: 2BB HEX: 2C1 between? ]
|
||||
[ "\u000559\u0006E5\u0006E6_:" member? ]
|
||||
} 1|| ;
|
||||
|
||||
CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
|
||||
CATEGORY: 1.0name-char
|
||||
Ll Lu Lo Lt Nl Mc Me Mn Lm Nd |
|
||||
"_-.\u000387:" member? ;
|
||||
|
||||
CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
|
||||
CATEGORY: 1.1name-start
|
||||
Ll Lu Lo Lm Ln Nl |
|
||||
"_:" member? ;
|
||||
|
||||
CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
|
||||
CATEGORY: 1.1name-char
|
||||
Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf |
|
||||
"_-.\u0000b7:" member? ;
|
||||
|
||||
: name-start? ( 1.0? char -- ? )
|
||||
swap [ 1.0name-start? ] [ 1.1name-start? ] if ;
|
||||
|
|
Loading…
Reference in New Issue