Moving unicode.syntax to unicode.categories.syntax; documenting and modifying syntax
							parent
							
								
									f3038f2ae8
								
							
						
					
					
						commit
						62638fb4d3
					
				| 
						 | 
				
			
			@ -4,7 +4,7 @@ USING: combinators.short-circuit unicode.categories kernel math
 | 
			
		|||
combinators splitting sequences math.parser io.files io assocs
 | 
			
		||||
arrays namespaces make math.ranges unicode.normalize
 | 
			
		||||
unicode.normalize.private values io.encodings.ascii
 | 
			
		||||
unicode.syntax unicode.data compiler.units fry
 | 
			
		||||
unicode.data compiler.units fry unicode.categories.syntax
 | 
			
		||||
alien.syntax sets accessors interval-maps memoize locals words
 | 
			
		||||
simple-flat-file ;
 | 
			
		||||
IN: unicode.breaks
 | 
			
		||||
| 
						 | 
				
			
			@ -32,9 +32,9 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
 | 
			
		|||
        [ drop Control ]
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
CATEGORY: (extend) Me Mn ;
 | 
			
		||||
: extend? ( ch -- ? )
 | 
			
		||||
    { [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
 | 
			
		||||
CATEGORY: extend
 | 
			
		||||
    Me Mn |
 | 
			
		||||
    "Other_Grapheme_Extend" property? ;
 | 
			
		||||
 | 
			
		||||
: loe? ( ch -- ? )
 | 
			
		||||
    "Logical_Order_Exception" property? ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,8 +1,8 @@
 | 
			
		|||
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: unicode.data sequences namespaces
 | 
			
		||||
sbufs make unicode.syntax unicode.normalize math hints
 | 
			
		||||
unicode.categories combinators unicode.syntax assocs combinators.short-circuit
 | 
			
		||||
sbufs make unicode.normalize math hints
 | 
			
		||||
unicode.categories combinators assocs combinators.short-circuit
 | 
			
		||||
strings splitting kernel accessors unicode.breaks fry locals ;
 | 
			
		||||
QUALIFIED: ascii
 | 
			
		||||
IN: unicode.case
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,6 +12,9 @@ HELP: Letter
 | 
			
		|||
HELP: alpha
 | 
			
		||||
{ $class-description "The class of alphanumeric characters." } ;
 | 
			
		||||
 | 
			
		||||
HELP: math
 | 
			
		||||
{ $class-description "The class of Unicode math characters." } ;
 | 
			
		||||
 | 
			
		||||
HELP: blank
 | 
			
		||||
{ $class-description "The class of whitespace characters." } ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -54,6 +57,8 @@ ARTICLE: "unicode.categories" "Character classes"
 | 
			
		|||
{ $subsection uncased }
 | 
			
		||||
{ $subsection uncased? }
 | 
			
		||||
{ $subsection character }
 | 
			
		||||
{ $subsection character? } ;
 | 
			
		||||
{ $subsection character? }
 | 
			
		||||
{ $subsection math }
 | 
			
		||||
{ $subsection math? } ;
 | 
			
		||||
 | 
			
		||||
ABOUT: "unicode.categories"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,15 +1,16 @@
 | 
			
		|||
! Copyright (C) 2008 Daniel Ehrenberg.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: unicode.syntax ;
 | 
			
		||||
USING: unicode.categories.syntax sequences unicode.data ;
 | 
			
		||||
IN: unicode.categories
 | 
			
		||||
 | 
			
		||||
CATEGORY: blank Zs Zl Zp \r\n ;
 | 
			
		||||
CATEGORY: letter Ll ;
 | 
			
		||||
CATEGORY: LETTER Lu ;
 | 
			
		||||
CATEGORY: Letter Lu Ll Lt Lm Lo ;
 | 
			
		||||
CATEGORY: blank Zs Zl Zp | "\r\n" member? ;
 | 
			
		||||
CATEGORY: letter Ll | "Other_Lowercase" property? ;
 | 
			
		||||
CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
 | 
			
		||||
CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
 | 
			
		||||
CATEGORY: digit Nd Nl No ;
 | 
			
		||||
CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
 | 
			
		||||
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No ;
 | 
			
		||||
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
 | 
			
		||||
CATEGORY: control Cc ;
 | 
			
		||||
CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ; 
 | 
			
		||||
CATEGORY-NOT: character Cn ;
 | 
			
		||||
CATEGORY: math Sm | "Other_Math" property? ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,19 @@
 | 
			
		|||
! Copyright (C) 2008 Daniel Ehrenberg.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: help.syntax help.markup ;
 | 
			
		||||
IN: unicode.categories.syntax
 | 
			
		||||
 | 
			
		||||
ABOUT: "unicode.categories.syntax"
 | 
			
		||||
 | 
			
		||||
ARTICLE: "unicode.categories.syntax" "Unicode category syntax"
 | 
			
		||||
"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code."
 | 
			
		||||
{ $subsection POSTPONE: CATEGORY: }
 | 
			
		||||
{ $subsection POSTPONE: CATEGORY-NOT: } ;
 | 
			
		||||
 | 
			
		||||
HELP: CATEGORY:
 | 
			
		||||
{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" }
 | 
			
		||||
{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ;
 | 
			
		||||
 | 
			
		||||
HELP: CATEGORY-NOT:
 | 
			
		||||
{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" }
 | 
			
		||||
{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ;
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,3 @@
 | 
			
		|||
! Copyright (C) 2009 Daniel Ehrenberg.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,36 @@
 | 
			
		|||
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: unicode.data kernel math sequences parser
 | 
			
		||||
bit-arrays namespaces sequences.private arrays classes.parser
 | 
			
		||||
assocs classes.predicate sets fry splitting accessors ;
 | 
			
		||||
IN: unicode.categories.syntax
 | 
			
		||||
 | 
			
		||||
! For use in CATEGORY:
 | 
			
		||||
SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ;
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
: >category-array ( categories -- bitarray )
 | 
			
		||||
    categories [ swap member? ] with map >bit-array ;
 | 
			
		||||
 | 
			
		||||
: [category] ( categories code -- quot )
 | 
			
		||||
    [ >category-array ] dip
 | 
			
		||||
    '[ dup category# _ nth-unsafe [ drop t ] _ if ] ;
 | 
			
		||||
 | 
			
		||||
: define-category ( word categories code -- )
 | 
			
		||||
    [category] integer swap define-predicate-class ;
 | 
			
		||||
 | 
			
		||||
: parse-category ( -- word tokens quot )
 | 
			
		||||
    CREATE-CLASS \ ; parse-until { | } split1
 | 
			
		||||
    [ [ name>> ] map ]
 | 
			
		||||
    [ [ [ ] like ] [ [ drop f ] ] if* ] bi* ;
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: CATEGORY:
 | 
			
		||||
    parse-category define-category ; parsing
 | 
			
		||||
 | 
			
		||||
: CATEGORY-NOT:
 | 
			
		||||
    parse-category
 | 
			
		||||
    [ categories swap diff ] dip
 | 
			
		||||
    define-category ; parsing
 | 
			
		||||
| 
						 | 
				
			
			@ -4,7 +4,7 @@ USING: combinators.short-circuit sequences io.files
 | 
			
		|||
io.encodings.ascii kernel values splitting accessors math.parser
 | 
			
		||||
ascii io assocs strings math namespaces make sorting combinators
 | 
			
		||||
math.order arrays unicode.normalize unicode.data locals
 | 
			
		||||
unicode.syntax macros sequences.deep words unicode.breaks
 | 
			
		||||
macros sequences.deep words unicode.breaks
 | 
			
		||||
quotations combinators.short-circuit simple-flat-file ;
 | 
			
		||||
IN: unicode.collation
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,7 +2,7 @@
 | 
			
		|||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: ascii sequences namespaces make unicode.data kernel math arrays
 | 
			
		||||
locals sorting.insertion accessors assocs math.order combinators
 | 
			
		||||
unicode.syntax strings sbufs hints combinators.short-circuit vectors ;
 | 
			
		||||
strings sbufs hints combinators.short-circuit vectors ;
 | 
			
		||||
IN: unicode.normalize
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,35 +0,0 @@
 | 
			
		|||
! Copyright (C) 2008 Daniel Ehrenberg.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: unicode.data kernel math sequences parser lexer
 | 
			
		||||
bit-arrays namespaces make sequences.private arrays quotations
 | 
			
		||||
assocs classes.predicate math.order strings.parser sets ;
 | 
			
		||||
IN: unicode.syntax
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
: >category-array ( categories -- bitarray )
 | 
			
		||||
    categories [ swap member? ] with map >bit-array ;
 | 
			
		||||
 | 
			
		||||
: as-string ( strings -- bit-array )
 | 
			
		||||
    concat unescape-string ;
 | 
			
		||||
 | 
			
		||||
: [category] ( categories -- quot )
 | 
			
		||||
    [
 | 
			
		||||
        [ [ categories member? not ] filter as-string ] keep 
 | 
			
		||||
        [ categories member? ] filter >category-array
 | 
			
		||||
        [ dup category# ] % , [ nth-unsafe [ drop t ] ] %
 | 
			
		||||
        \ member? 2array >quotation ,
 | 
			
		||||
        \ if ,
 | 
			
		||||
    ] [ ] make ;
 | 
			
		||||
 | 
			
		||||
: define-category ( word categories -- )
 | 
			
		||||
    [category] integer swap define-predicate-class ;
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: CATEGORY:
 | 
			
		||||
    CREATE ";" parse-tokens define-category ; parsing
 | 
			
		||||
 | 
			
		||||
: CATEGORY-NOT:
 | 
			
		||||
    CREATE ";" parse-tokens
 | 
			
		||||
    categories swap diff define-category ; parsing
 | 
			
		||||
| 
						 | 
				
			
			@ -15,7 +15,7 @@ $nl
 | 
			
		|||
{ $vocab-subsection "Word and grapheme breaks" "unicode.breaks" }
 | 
			
		||||
{ $vocab-subsection "Unicode normalization" "unicode.normalize" }
 | 
			
		||||
"The following are mostly for internal use:"
 | 
			
		||||
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
 | 
			
		||||
{ $vocab-subsection "Unicode category syntax" "unicode.categories.syntax" }
 | 
			
		||||
{ $vocab-subsection "Unicode data tables" "unicode.data" }
 | 
			
		||||
{ $see-also "ascii" "io.encodings" } ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,19 +1,26 @@
 | 
			
		|||
! Copyright (C) 2005, 2009 Daniel Ehrenberg
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: kernel sequences unicode.syntax math math.order combinators
 | 
			
		||||
hints ;
 | 
			
		||||
USING: kernel sequences unicode.categories.syntax math math.order
 | 
			
		||||
combinators hints ;
 | 
			
		||||
IN: xml.char-classes
 | 
			
		||||
 | 
			
		||||
CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
 | 
			
		||||
: 1.0name-start? ( char -- ? )
 | 
			
		||||
    dup 1.0name-start*? [ drop t ] 
 | 
			
		||||
    [ HEX: 2BB HEX: 2C1 between? ] if ;
 | 
			
		||||
CATEGORY: 1.0name-start
 | 
			
		||||
    Ll Lu Lo Lt Nl | {
 | 
			
		||||
        [ HEX: 2BB HEX: 2C1 between? ]
 | 
			
		||||
        [ "\u000559\u0006E5\u0006E6_:" member? ]
 | 
			
		||||
    } 1|| ;
 | 
			
		||||
 | 
			
		||||
CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
 | 
			
		||||
CATEGORY: 1.0name-char
 | 
			
		||||
    Ll Lu Lo Lt Nl Mc Me Mn Lm Nd |
 | 
			
		||||
    "_-.\u000387:" member? ;
 | 
			
		||||
 | 
			
		||||
CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
 | 
			
		||||
CATEGORY: 1.1name-start
 | 
			
		||||
    Ll Lu Lo Lm Ln Nl |
 | 
			
		||||
    "_:" member? ;
 | 
			
		||||
 | 
			
		||||
CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
 | 
			
		||||
CATEGORY: 1.1name-char
 | 
			
		||||
    Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf |
 | 
			
		||||
    "_-.\u0000b7:" member? ;
 | 
			
		||||
 | 
			
		||||
: name-start? ( 1.0? char -- ? )
 | 
			
		||||
    swap [ 1.0name-start? ] [ 1.1name-start? ] if ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue