Unicode breaks fix and deletion of repetition in syntax
parent
33193a92f8
commit
cf29921083
|
@ -3,5 +3,5 @@ USING: tools.test unicode.breaks sequences math kernel ;
|
||||||
[ "\u1112\u1161\u11abA\u0300a\r\r\n" ]
|
[ "\u1112\u1161\u11abA\u0300a\r\r\n" ]
|
||||||
[ "\r\n\raA\u0300\u1112\u1161\u11ab" string-reverse ] unit-test
|
[ "\r\n\raA\u0300\u1112\u1161\u11ab" string-reverse ] unit-test
|
||||||
[ "dcba" ] [ "abcd" string-reverse ] unit-test
|
[ "dcba" ] [ "abcd" string-reverse ] unit-test
|
||||||
[ 3 ] [ "\u1112\u1161\u11abA\u0300a" [ length 1- ] keep
|
[ 3 ] [ "\u1112\u1161\u11abA\u0300a"
|
||||||
[ prev-grapheme ] keep prev-grapheme ] unit-test
|
dup last-grapheme head last-grapheme ] unit-test
|
||||||
|
|
|
@ -85,45 +85,38 @@ DEFER: grapheme-table
|
||||||
: chars ( i str n -- str[i] str[i+n] )
|
: chars ( i str n -- str[i] str[i+n] )
|
||||||
swap >r dupd + r> [ ?nth ] curry 2apply ;
|
swap >r dupd + r> [ ?nth ] curry 2apply ;
|
||||||
|
|
||||||
: next-grapheme-step ( i str -- i+1 str prev-class )
|
: find-index ( seq quot -- i ) find drop ; inline
|
||||||
2dup nth grapheme-class >r >r 1+ r> r> ;
|
: find-last-index ( seq quot -- i ) find-last drop ; inline
|
||||||
|
|
||||||
: (next-grapheme) ( i str prev-class -- next-i )
|
: first-grapheme ( str -- i )
|
||||||
3dup drop bounds-check? [
|
unclip-slice grapheme-class over
|
||||||
>r next-grapheme-step r> over grapheme-break?
|
[ grapheme-class tuck grapheme-break? ] find-index
|
||||||
[ 2drop 1- ] [ (next-grapheme) ] if
|
nip swap length or 1+ ;
|
||||||
] [ 2drop ] if ;
|
|
||||||
|
|
||||||
: next-grapheme ( i str -- next-i )
|
: (>graphemes) ( str -- )
|
||||||
next-grapheme-step (next-grapheme) ;
|
dup empty? [ drop ] [
|
||||||
|
dup first-grapheme cut-slice
|
||||||
|
swap , (>graphemes)
|
||||||
|
] if ;
|
||||||
|
|
||||||
: (>graphemes) ( i str -- )
|
|
||||||
2dup bounds-check? [
|
|
||||||
dupd [ next-grapheme ] keep
|
|
||||||
[ subseq , ] 2keep (>graphemes)
|
|
||||||
] [ 2drop ] if ;
|
|
||||||
: >graphemes ( str -- graphemes )
|
: >graphemes ( str -- graphemes )
|
||||||
[ 0 swap (>graphemes) ] { } make* ;
|
[ (>graphemes) ] { } make ;
|
||||||
|
|
||||||
: string-reverse ( str -- rts )
|
: string-reverse ( str -- rts )
|
||||||
>graphemes reverse concat ;
|
>graphemes reverse concat ;
|
||||||
|
|
||||||
: prev-grapheme-step ( i str -- i-1 str prev-class )
|
: unclip-last-slice ( seq -- beginning last )
|
||||||
2dup nth grapheme-class >r >r 1- r> r> ;
|
dup 1 head-slice* swap peek ;
|
||||||
|
|
||||||
: (prev-grapheme) ( i str next-class -- prev-i )
|
: last-grapheme ( str -- i )
|
||||||
pick zero? [
|
unclip-last-slice grapheme-class swap
|
||||||
>r prev-grapheme-step r> dupd grapheme-break?
|
[ grapheme-class dup rot grapheme-break? ] find-last-index
|
||||||
[ 2drop 1- ] [ (prev-grapheme) ] if
|
nip -1 or 1+ ;
|
||||||
] [ 2drop ] if ;
|
|
||||||
|
|
||||||
: prev-grapheme ( i str -- prev-i )
|
<<
|
||||||
prev-grapheme-step (prev-grapheme) ;
|
|
||||||
|
|
||||||
[
|
|
||||||
other-extend-lines process-other-extend \ other-extend define-value
|
other-extend-lines process-other-extend \ other-extend define-value
|
||||||
|
|
||||||
init-grapheme-table table
|
init-grapheme-table table
|
||||||
[ make-grapheme-table finish-table ] with-variable
|
[ make-grapheme-table finish-table ] with-variable
|
||||||
\ grapheme-table define-value
|
\ grapheme-table define-value
|
||||||
] with-compilation-unit
|
>>
|
||||||
|
|
|
@ -47,14 +47,6 @@ IN: unicode.syntax
|
||||||
CREATE ";" parse-tokens
|
CREATE ";" parse-tokens
|
||||||
categories swap seq-minus define-category ; parsing
|
categories swap seq-minus define-category ; parsing
|
||||||
|
|
||||||
TUPLE: code-point lower title upper ;
|
|
||||||
|
|
||||||
C: <code-point> code-point
|
|
||||||
|
|
||||||
: set-code-point ( seq -- )
|
|
||||||
4 head [ multihex ] map first4
|
|
||||||
<code-point> swap first set ;
|
|
||||||
|
|
||||||
: UNICHAR:
|
: UNICHAR:
|
||||||
! This should be part of CHAR:
|
! This should be part of CHAR:
|
||||||
scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing
|
scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing
|
||||||
|
|
Loading…
Reference in New Issue