Moving unicode.syntax to unicode.categories.syntax; documenting and modifying syntax

db4
Daniel Ehrenberg 2009-03-21 01:11:45 -05:00
parent f3038f2ae8
commit 62638fb4d3
15 changed files with 96 additions and 60 deletions

View File

@ -4,7 +4,7 @@ USING: combinators.short-circuit unicode.categories kernel math
combinators splitting sequences math.parser io.files io assocs
arrays namespaces make math.ranges unicode.normalize
unicode.normalize.private values io.encodings.ascii
unicode.syntax unicode.data compiler.units fry
unicode.data compiler.units fry unicode.categories.syntax
alien.syntax sets accessors interval-maps memoize locals words
simple-flat-file ;
IN: unicode.breaks
@ -32,9 +32,9 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
[ drop Control ]
} case ;
CATEGORY: (extend) Me Mn ;
: extend? ( ch -- ? )
{ [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
CATEGORY: extend
Me Mn |
"Other_Grapheme_Extend" property? ;
: loe? ( ch -- ? )
"Logical_Order_Exception" property? ;

View File

@ -1,8 +1,8 @@
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data sequences namespaces
sbufs make unicode.syntax unicode.normalize math hints
unicode.categories combinators unicode.syntax assocs combinators.short-circuit
sbufs make unicode.normalize math hints
unicode.categories combinators assocs combinators.short-circuit
strings splitting kernel accessors unicode.breaks fry locals ;
QUALIFIED: ascii
IN: unicode.case

View File

@ -12,6 +12,9 @@ HELP: Letter
HELP: alpha
{ $class-description "The class of alphanumeric characters." } ;
HELP: math
{ $class-description "The class of Unicode math characters." } ;
HELP: blank
{ $class-description "The class of whitespace characters." } ;
@ -54,6 +57,8 @@ ARTICLE: "unicode.categories" "Character classes"
{ $subsection uncased }
{ $subsection uncased? }
{ $subsection character }
{ $subsection character? } ;
{ $subsection character? }
{ $subsection math }
{ $subsection math? } ;
ABOUT: "unicode.categories"

View File

@ -1,15 +1,16 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.syntax ;
USING: unicode.categories.syntax sequences unicode.data ;
IN: unicode.categories
CATEGORY: blank Zs Zl Zp \r\n ;
CATEGORY: letter Ll ;
CATEGORY: LETTER Lu ;
CATEGORY: Letter Lu Ll Lt Lm Lo ;
CATEGORY: blank Zs Zl Zp | "\r\n" member? ;
CATEGORY: letter Ll | "Other_Lowercase" property? ;
CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
CATEGORY: digit Nd Nl No ;
CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No ;
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
CATEGORY: control Cc ;
CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ;
CATEGORY-NOT: character Cn ;
CATEGORY: math Sm | "Other_Math" property? ;

View File

@ -0,0 +1,19 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup ;
IN: unicode.categories.syntax
ABOUT: "unicode.categories.syntax"
ARTICLE: "unicode.categories.syntax" "Unicode category syntax"
"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code."
{ $subsection POSTPONE: CATEGORY: }
{ $subsection POSTPONE: CATEGORY-NOT: } ;
HELP: CATEGORY:
{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" }
{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ;
HELP: CATEGORY-NOT:
{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" }
{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ;

View File

@ -0,0 +1,3 @@
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.

View File

@ -0,0 +1,36 @@
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data kernel math sequences parser
bit-arrays namespaces sequences.private arrays classes.parser
assocs classes.predicate sets fry splitting accessors ;
IN: unicode.categories.syntax
! For use in CATEGORY:
SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ;
<PRIVATE
: >category-array ( categories -- bitarray )
categories [ swap member? ] with map >bit-array ;
: [category] ( categories code -- quot )
[ >category-array ] dip
'[ dup category# _ nth-unsafe [ drop t ] _ if ] ;
: define-category ( word categories code -- )
[category] integer swap define-predicate-class ;
: parse-category ( -- word tokens quot )
CREATE-CLASS \ ; parse-until { | } split1
[ [ name>> ] map ]
[ [ [ ] like ] [ [ drop f ] ] if* ] bi* ;
PRIVATE>
: CATEGORY:
parse-category define-category ; parsing
: CATEGORY-NOT:
parse-category
[ categories swap diff ] dip
define-category ; parsing

View File

@ -4,7 +4,7 @@ USING: combinators.short-circuit sequences io.files
io.encodings.ascii kernel values splitting accessors math.parser
ascii io assocs strings math namespaces make sorting combinators
math.order arrays unicode.normalize unicode.data locals
unicode.syntax macros sequences.deep words unicode.breaks
macros sequences.deep words unicode.breaks
quotations combinators.short-circuit simple-flat-file ;
IN: unicode.collation

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: ascii sequences namespaces make unicode.data kernel math arrays
locals sorting.insertion accessors assocs math.order combinators
unicode.syntax strings sbufs hints combinators.short-circuit vectors ;
strings sbufs hints combinators.short-circuit vectors ;
IN: unicode.normalize
<PRIVATE

View File

@ -1,35 +0,0 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data kernel math sequences parser lexer
bit-arrays namespaces make sequences.private arrays quotations
assocs classes.predicate math.order strings.parser sets ;
IN: unicode.syntax
<PRIVATE
: >category-array ( categories -- bitarray )
categories [ swap member? ] with map >bit-array ;
: as-string ( strings -- bit-array )
concat unescape-string ;
: [category] ( categories -- quot )
[
[ [ categories member? not ] filter as-string ] keep
[ categories member? ] filter >category-array
[ dup category# ] % , [ nth-unsafe [ drop t ] ] %
\ member? 2array >quotation ,
\ if ,
] [ ] make ;
: define-category ( word categories -- )
[category] integer swap define-predicate-class ;
PRIVATE>
: CATEGORY:
CREATE ";" parse-tokens define-category ; parsing
: CATEGORY-NOT:
CREATE ";" parse-tokens
categories swap diff define-category ; parsing

View File

@ -15,7 +15,7 @@ $nl
{ $vocab-subsection "Word and grapheme breaks" "unicode.breaks" }
{ $vocab-subsection "Unicode normalization" "unicode.normalize" }
"The following are mostly for internal use:"
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
{ $vocab-subsection "Unicode category syntax" "unicode.categories.syntax" }
{ $vocab-subsection "Unicode data tables" "unicode.data" }
{ $see-also "ascii" "io.encodings" } ;

View File

@ -1,19 +1,26 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences unicode.syntax math math.order combinators
hints ;
USING: kernel sequences unicode.categories.syntax math math.order
combinators hints ;
IN: xml.char-classes
CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
: 1.0name-start? ( char -- ? )
dup 1.0name-start*? [ drop t ]
[ HEX: 2BB HEX: 2C1 between? ] if ;
CATEGORY: 1.0name-start
Ll Lu Lo Lt Nl | {
[ HEX: 2BB HEX: 2C1 between? ]
[ "\u000559\u0006E5\u0006E6_:" member? ]
} 1|| ;
CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
CATEGORY: 1.0name-char
Ll Lu Lo Lt Nl Mc Me Mn Lm Nd |
"_-.\u000387:" member? ;
CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
CATEGORY: 1.1name-start
Ll Lu Lo Lm Ln Nl |
"_:" member? ;
CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
CATEGORY: 1.1name-char
Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf |
"_-.\u0000b7:" member? ;
: name-start? ( 1.0? char -- ? )
swap [ 1.0name-start? ] [ 1.1name-start? ] if ;