factor/extra/unicode/syntax/syntax.factor

49 lines
1.4 KiB
Factor
Raw Normal View History

2008-01-09 18:13:26 -05:00
USING: unicode.data kernel math sequences parser bit-arrays namespaces
2008-01-29 14:33:14 -05:00
sequences.private arrays quotations classes.predicate assocs ;
2008-01-09 14:44:07 -05:00
IN: unicode.syntax
! Character classes (categories)
: category# ( char -- category )
! There are a few characters that should be Cn
! that this gives Cf or Mn
! Cf = 26; Mn = 5; Cn = 29
! Use a compressed array instead?
dup category-map ?nth [ ] [
dup HEX: E0001 HEX: E007F between?
[ drop 26 ] [
HEX: E0100 HEX: E01EF between? 5 29 ?
] if
] ?if ;
: category ( char -- category )
category# categories nth ;
: >category-array ( categories -- bitarray )
categories [ swap member? ] with map >bit-array ;
2008-01-09 14:44:07 -05:00
: as-string ( strings -- bit-array )
concat "\"" tuck 3append eval ;
2008-01-09 14:44:07 -05:00
: [category] ( categories -- quot )
[
[ [ categories member? not ] subset as-string ] keep
[ categories member? ] subset >category-array
[ dup category# ] % , [ nth-unsafe [ drop t ] ] %
\ member? 2array >quotation ,
\ if ,
] [ ] make ;
: define-category ( word categories -- )
[category] fixnum -rot define-predicate-class ;
: CATEGORY:
CREATE ";" parse-tokens define-category ; parsing
: seq-minus ( seq1 seq2 -- diff )
[ member? not ] curry subset ;
: CATEGORY-NOT:
CREATE ";" parse-tokens
categories swap seq-minus define-category ; parsing