Privatizing unicode.case:ch>{lower,upper,title}
parent
0aec786359
commit
ad53cb8635
|
@ -37,6 +37,26 @@ HELP: quotable?
|
|||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Tests for characters which may appear in a Factor string literal without escaping." } ;
|
||||
|
||||
HELP: ascii?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Tests for whether a number is an ASCII character." } ;
|
||||
|
||||
HELP: ch>lower
|
||||
{ $values { "ch" "a character" } { "lower" "a character" } }
|
||||
{ $description "Converts an ASCII character to lower case." } ;
|
||||
|
||||
HELP: ch>upper
|
||||
{ $values { "ch" "a character" } { "upper" "a character" } }
|
||||
{ $description "Converts an ASCII character to upper case." } ;
|
||||
|
||||
HELP: >lower
|
||||
{ $values { "str" "a string" } { "lower" "a string" } }
|
||||
{ $description "Converts an ASCII string to lower case." } ;
|
||||
|
||||
HELP: >upper
|
||||
{ $values { "str" "a string" } { "upper" "a string" } }
|
||||
{ $description "Converts an ASCII string to upper case." } ;
|
||||
|
||||
ARTICLE: "ascii" "ASCII character classes"
|
||||
"The " { $vocab-link "ascii" } " vocabulary implements traditional ASCII character classes:"
|
||||
{ $subsection blank? }
|
||||
|
@ -46,6 +66,12 @@ ARTICLE: "ascii" "ASCII character classes"
|
|||
{ $subsection printable? }
|
||||
{ $subsection control? }
|
||||
{ $subsection quotable? }
|
||||
"Modern applications should use Unicode 5.0 instead (" { $vocab-link "unicode.categories" } ")." ;
|
||||
{ $subsection ascii? }
|
||||
"ASCII case conversion is also implemented:"
|
||||
{ $subsection ch>lower }
|
||||
{ $subsection ch>upper }
|
||||
{ $subsection >lower }
|
||||
{ $subsection >upper }
|
||||
"Modern applications should use Unicode 5.1 instead (" { $vocab-link "unicode.categories" } ")." ;
|
||||
|
||||
ABOUT: "ascii"
|
||||
|
|
|
@ -12,3 +12,8 @@ IN: ascii.tests
|
|||
0 "There are Four Upper Case characters"
|
||||
[ LETTER? [ 1+ ] when ] each
|
||||
] unit-test
|
||||
|
||||
[ t f ] [ CHAR: \s ascii? 400 ascii? ] unit-test
|
||||
|
||||
[ "HELLO HOW ARE YOU?" ] [ "hellO hOw arE YOU?" >upper ] unit-test
|
||||
[ "i'm good thx bai" ] [ "I'm Good THX bai" >lower ] unit-test
|
||||
|
|
|
@ -4,6 +4,8 @@ USING: kernel math math.order sequences
|
|||
combinators.short-circuit ;
|
||||
IN: ascii
|
||||
|
||||
: ascii? ( ch -- ? ) 0 127 between? ; inline
|
||||
|
||||
: blank? ( ch -- ? ) " \t\n\r" member? ; inline
|
||||
|
||||
: letter? ( ch -- ? ) CHAR: a CHAR: z between? ; inline
|
||||
|
@ -25,3 +27,15 @@ IN: ascii
|
|||
|
||||
: alpha? ( ch -- ? )
|
||||
[ [ Letter? ] [ digit? ] ] 1|| ;
|
||||
|
||||
: ch>lower ( ch -- lower )
|
||||
dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ;
|
||||
|
||||
: >lower ( str -- lower )
|
||||
[ ch>lower ] map ;
|
||||
|
||||
: ch>upper ( ch -- upper )
|
||||
dup CHAR: a CHAR: z between? [ HEX: 20 - ] when ;
|
||||
|
||||
: >upper ( str -- upper )
|
||||
[ ch>upper ] map ;
|
||||
|
|
|
@ -3,7 +3,10 @@
|
|||
USING: accessors arrays assocs grouping kernel regexp.backend
|
||||
locals math namespaces regexp.parser sequences fry quotations
|
||||
math.order math.ranges vectors unicode.categories regexp.utils
|
||||
regexp.transition-tables words sets regexp.classes unicode.case ;
|
||||
regexp.transition-tables words sets regexp.classes unicode.case.private ;
|
||||
! This uses unicode.case.private for ch>upper and ch>lower
|
||||
! but case-insensitive matching should be done by case-folding everything
|
||||
! before processing starts
|
||||
IN: regexp.nfa
|
||||
|
||||
SYMBOL: negation-mode
|
||||
|
@ -160,6 +163,8 @@ M: LETTER-class nfa-node ( node -- )
|
|||
|
||||
M: character-class-range nfa-node ( node -- )
|
||||
case-insensitive option? [
|
||||
! This should be implemented for Unicode by case-folding
|
||||
! the input and all strings in the regexp.
|
||||
dup [ from>> ] [ to>> ] bi
|
||||
2dup [ Letter? ] bi@ and [
|
||||
rot drop
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
USING: accessors arrays assocs combinators io io.streams.string
|
||||
kernel math math.parser namespaces sets
|
||||
quotations sequences splitting vectors math.order
|
||||
unicode.categories strings regexp.backend regexp.utils
|
||||
unicode.case words locals regexp.classes ;
|
||||
strings regexp.backend regexp.utils
|
||||
unicode.case unicode.categories words locals regexp.classes ;
|
||||
IN: regexp.parser
|
||||
|
||||
FROM: math.ranges => [a,b] ;
|
||||
|
@ -261,7 +261,7 @@ ERROR: bad-escaped-literals seq ;
|
|||
parse-til-E
|
||||
drop1
|
||||
[ epsilon ] [
|
||||
[ quot call <constant> ] V{ } map-as
|
||||
quot call [ <constant> ] V{ } map-as
|
||||
first|concatenation
|
||||
] if-empty ; inline
|
||||
|
||||
|
@ -269,10 +269,10 @@ ERROR: bad-escaped-literals seq ;
|
|||
[ ] (parse-escaped-literals) ;
|
||||
|
||||
: lower-case-literals ( -- obj )
|
||||
[ ch>lower ] (parse-escaped-literals) ;
|
||||
[ >lower ] (parse-escaped-literals) ;
|
||||
|
||||
: upper-case-literals ( -- obj )
|
||||
[ ch>upper ] (parse-escaped-literals) ;
|
||||
[ >upper ] (parse-escaped-literals) ;
|
||||
|
||||
: parse-escaped ( -- obj )
|
||||
read1
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences grouping assocs kernel ascii unicode.case tr ;
|
||||
USING: sequences grouping assocs kernel ascii ascii tr ;
|
||||
IN: soundex
|
||||
|
||||
TR: soundex-tr
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
IN: tr.tests
|
||||
USING: tr tools.test unicode.case ;
|
||||
USING: tr tools.test ascii ;
|
||||
|
||||
TR: tr-test ch>upper "ABC" "XYZ" ;
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: byte-arrays strings sequences sequences.private
|
||||
USING: byte-arrays strings sequences sequences.private ascii
|
||||
fry kernel words parser lexer assocs math math.order summary ;
|
||||
IN: tr
|
||||
|
||||
|
@ -11,8 +11,6 @@ M: bad-tr summary
|
|||
|
||||
<PRIVATE
|
||||
|
||||
: ascii? ( ch -- ? ) 0 127 between? ; inline
|
||||
|
||||
: tr-nth ( n mapping -- ch ) nth-unsafe 127 bitand ; inline
|
||||
|
||||
: check-tr ( from to -- )
|
||||
|
|
|
@ -9,10 +9,6 @@ ARTICLE: "unicode.case" "Case mapping"
|
|||
{ $subsection >lower }
|
||||
{ $subsection >title }
|
||||
{ $subsection >case-fold }
|
||||
"There are analogous routines which operate on individual code points, but these should " { $emphasis "not be used" } " in general as they have slightly different behavior. In some cases, for example, they do not perform the case operation, as a single code point must expand to more than one."
|
||||
{ $subsection ch>upper }
|
||||
{ $subsection ch>lower }
|
||||
{ $subsection ch>title }
|
||||
"To test if a string is in a given case:"
|
||||
{ $subsection upper? }
|
||||
{ $subsection lower? }
|
||||
|
@ -53,18 +49,3 @@ HELP: title?
|
|||
HELP: case-fold?
|
||||
{ $values { "string" string } { "?" "a boolean" } }
|
||||
{ $description "Tests if a string is in case-folded form." } ;
|
||||
|
||||
HELP: ch>lower
|
||||
{ $values { "ch" "a code point" } { "lower" "a code point" } }
|
||||
{ $description "Converts a code point to lower case." }
|
||||
{ $warning "Don't use this unless you know what you're doing! " { $code ">lower" } " is not the same as " { $code "[ ch>lower ] map" } "." } ;
|
||||
|
||||
HELP: ch>upper
|
||||
{ $values { "ch" "a code point" } { "upper" "a code point" } }
|
||||
{ $description "Converts a code point to upper case." }
|
||||
{ $warning "Don't use this unless you know what you're doing! " { $code ">upper" } " is not the same as " { $code "[ ch>upper ] map" } "." } ;
|
||||
|
||||
HELP: ch>title
|
||||
{ $values { "ch" "a code point" } { "title" "a code point" } }
|
||||
{ $description "Converts a code point to title case." }
|
||||
{ $warning "Don't use this unless you know what you're doing! " { $code ">title" } " is not the same as " { $code "[ ch>title ] map" } "." } ;
|
||||
|
|
|
@ -7,11 +7,11 @@ IN: unicode.case
|
|||
|
||||
<PRIVATE
|
||||
: at-default ( key assoc -- value/key ) [ at ] [ drop ] 2bi or ;
|
||||
PRIVATE>
|
||||
|
||||
: ch>lower ( ch -- lower ) simple-lower at-default ;
|
||||
: ch>upper ( ch -- upper ) simple-upper at-default ;
|
||||
: ch>title ( ch -- title ) simple-title at-default ;
|
||||
PRIVATE>
|
||||
|
||||
SYMBOL: locale ! Just casing locale, or overall?
|
||||
|
||||
|
|
|
@ -128,12 +128,9 @@ VALUE: properties
|
|||
cat categories index char table ?set-nth
|
||||
] assoc-each table fill-ranges ] ;
|
||||
|
||||
: ascii-lower ( string -- lower )
|
||||
[ dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ] map ;
|
||||
|
||||
: process-names ( data -- names-hash )
|
||||
1 swap (process-data) [
|
||||
ascii-lower { { CHAR: \s CHAR: - } } substitute swap
|
||||
>lower { { CHAR: \s CHAR: - } } substitute swap
|
||||
] H{ } assoc-map-as ;
|
||||
|
||||
: multihex ( hexstring -- string )
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences namespaces make unicode.data kernel math arrays
|
||||
locals sorting.insertion accessors assocs math.order combinators ;
|
||||
locals sorting.insertion accessors assocs math.order combinators
|
||||
unicode.syntax ;
|
||||
IN: unicode.normalize
|
||||
|
||||
<PRIVATE
|
||||
|
|
|
@ -5,7 +5,7 @@ USING: kernel namespaces make xmode.rules xmode.tokens
|
|||
xmode.marker.state xmode.marker.context xmode.utilities
|
||||
xmode.catalog sequences math assocs combinators strings
|
||||
parser-combinators.regexp splitting parser-combinators ascii
|
||||
unicode.case combinators.short-circuit accessors ;
|
||||
ascii combinators.short-circuit accessors ;
|
||||
|
||||
! Based on org.gjt.sp.jedit.syntax.TokenMarker
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
USING: io io.files io.files.temp io.streams.duplex kernel
|
||||
sequences sequences.private strings vectors words memoize
|
||||
splitting grouping hints tr continuations io.encodings.ascii
|
||||
unicode.case ;
|
||||
ascii ;
|
||||
IN: benchmark.reverse-complement
|
||||
|
||||
TR: trans-map ch>upper "ACGTUMRYKVHDB" "TGCAAKYRMBDHV" ;
|
||||
|
|
|
@ -2,7 +2,7 @@ USING: arrays combinators kernel lists math math.parser
|
|||
namespaces parser lexer parser-combinators
|
||||
parser-combinators.simple promises quotations sequences strings
|
||||
math.order assocs prettyprint.backend prettyprint.custom memoize
|
||||
unicode.case unicode.categories combinators.short-circuit
|
||||
ascii unicode.categories combinators.short-circuit
|
||||
accessors make io ;
|
||||
IN: parser-combinators.regexp
|
||||
|
||||
|
|
Loading…
Reference in New Issue