From cf299210838b5cdb485f3c213589ce0f47fe157b Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Sun, 27 Jan 2008 23:54:38 -0600 Subject: [PATCH] Unicode breaks fix and deletion of repetition in syntax --- extra/unicode/breaks/breaks-tests.factor | 4 +- extra/unicode/breaks/breaks.factor | 47 ++++++++++-------------- extra/unicode/syntax/syntax.factor | 8 ---- 3 files changed, 22 insertions(+), 37 deletions(-) diff --git a/extra/unicode/breaks/breaks-tests.factor b/extra/unicode/breaks/breaks-tests.factor index c183c3a86e..26f419ff0e 100644 --- a/extra/unicode/breaks/breaks-tests.factor +++ b/extra/unicode/breaks/breaks-tests.factor @@ -3,5 +3,5 @@ USING: tools.test unicode.breaks sequences math kernel ; [ "\u1112\u1161\u11abA\u0300a\r\r\n" ] [ "\r\n\raA\u0300\u1112\u1161\u11ab" string-reverse ] unit-test [ "dcba" ] [ "abcd" string-reverse ] unit-test -[ 3 ] [ "\u1112\u1161\u11abA\u0300a" [ length 1- ] keep - [ prev-grapheme ] keep prev-grapheme ] unit-test +[ 3 ] [ "\u1112\u1161\u11abA\u0300a" + dup last-grapheme head last-grapheme ] unit-test diff --git a/extra/unicode/breaks/breaks.factor b/extra/unicode/breaks/breaks.factor index fb893ed51b..9c9242edc3 100644 --- a/extra/unicode/breaks/breaks.factor +++ b/extra/unicode/breaks/breaks.factor @@ -85,45 +85,38 @@ DEFER: grapheme-table : chars ( i str n -- str[i] str[i+n] ) swap >r dupd + r> [ ?nth ] curry 2apply ; -: next-grapheme-step ( i str -- i+1 str prev-class ) - 2dup nth grapheme-class >r >r 1+ r> r> ; +: find-index ( seq quot -- i ) find drop ; inline +: find-last-index ( seq quot -- i ) find-last drop ; inline -: (next-grapheme) ( i str prev-class -- next-i ) - 3dup drop bounds-check? [ - >r next-grapheme-step r> over grapheme-break? - [ 2drop 1- ] [ (next-grapheme) ] if - ] [ 2drop ] if ; +: first-grapheme ( str -- i ) + unclip-slice grapheme-class over + [ grapheme-class tuck grapheme-break? ] find-index + nip swap length or 1+ ; -: next-grapheme ( i str -- next-i ) - next-grapheme-step (next-grapheme) ; +: (>graphemes) ( str -- ) + dup empty? [ drop ] [ + dup first-grapheme cut-slice + swap , (>graphemes) + ] if ; -: (>graphemes) ( i str -- ) - 2dup bounds-check? [ - dupd [ next-grapheme ] keep - [ subseq , ] 2keep (>graphemes) - ] [ 2drop ] if ; : >graphemes ( str -- graphemes ) - [ 0 swap (>graphemes) ] { } make* ; + [ (>graphemes) ] { } make ; : string-reverse ( str -- rts ) >graphemes reverse concat ; -: prev-grapheme-step ( i str -- i-1 str prev-class ) - 2dup nth grapheme-class >r >r 1- r> r> ; +: unclip-last-slice ( seq -- beginning last ) + dup 1 head-slice* swap peek ; -: (prev-grapheme) ( i str next-class -- prev-i ) - pick zero? [ - >r prev-grapheme-step r> dupd grapheme-break? - [ 2drop 1- ] [ (prev-grapheme) ] if - ] [ 2drop ] if ; +: last-grapheme ( str -- i ) + unclip-last-slice grapheme-class swap + [ grapheme-class dup rot grapheme-break? ] find-last-index + nip -1 or 1+ ; -: prev-grapheme ( i str -- prev-i ) - prev-grapheme-step (prev-grapheme) ; - -[ +<< other-extend-lines process-other-extend \ other-extend define-value init-grapheme-table table [ make-grapheme-table finish-table ] with-variable \ grapheme-table define-value -] with-compilation-unit +>> diff --git a/extra/unicode/syntax/syntax.factor b/extra/unicode/syntax/syntax.factor index 91d46d179f..5119663872 100644 --- a/extra/unicode/syntax/syntax.factor +++ b/extra/unicode/syntax/syntax.factor @@ -47,14 +47,6 @@ IN: unicode.syntax CREATE ";" parse-tokens categories swap seq-minus define-category ; parsing -TUPLE: code-point lower title upper ; - -C: code-point - -: set-code-point ( seq -- ) - 4 head [ multihex ] map first4 - swap first set ; - : UNICHAR: ! This should be part of CHAR: scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing