unicode.data: faster string operations by using constants instead of globals.

db4
John Benediktsson 2012-07-15 13:57:05 -07:00
parent e607cfb6ce
commit 2900fb9361
2 changed files with 36 additions and 39 deletions

View File

@ -93,5 +93,5 @@ PRIVATE>
all-vocabs-recursive filter-vocabs name-completions ;
: chars-matching ( str -- seq )
name-map get keys dup zip completions ;
name-map keys dup zip completions ;

View File

@ -11,36 +11,36 @@ IN: unicode.data
<PRIVATE
SYMBOL: simple-lower
SYMBOL: simple-upper
SYMBOL: simple-title
SYMBOL: canonical-map
SYMBOL: combine-map
SYMBOL: class-map
SYMBOL: compatibility-map
SYMBOL: category-map
SYMBOL: special-casing
SYMBOL: properties
CONSTANT: simple-lower H{ }
CONSTANT: simple-upper H{ }
CONSTANT: simple-title H{ }
CONSTANT: canonical-map H{ }
CONSTANT: combine-map H{ }
CONSTANT: class-map H{ }
CONSTANT: compatibility-map H{ }
SYMBOL: category-map ! B{ }
CONSTANT: special-casing H{ }
CONSTANT: properties H{ }
: >2ch ( a b -- c ) [ 21 shift ] dip + ;
: 2ch> ( c -- a b ) [ -21 shift ] [ 21 on-bits mask ] bi ;
PRIVATE>
SYMBOL: name-map
CONSTANT: name-map H{ }
: canonical-entry ( char -- seq ) canonical-map get-global at ; inline
: combine-chars ( a b -- char/f ) >2ch combine-map get-global at ; inline
: compatibility-entry ( char -- seq ) compatibility-map get-global at ; inline
: combining-class ( char -- n ) class-map get-global at ; inline
: canonical-entry ( char -- seq ) canonical-map at ; inline
: combine-chars ( a b -- char/f ) >2ch combine-map at ; inline
: compatibility-entry ( char -- seq ) compatibility-map at ; inline
: combining-class ( char -- n ) class-map at ; inline
: non-starter? ( char -- ? ) combining-class { 0 f } member? not ; inline
: name>char ( name -- char ) name-map get-global at ; inline
: char>name ( char -- name ) name-map get-global value-at ; inline
: property? ( char property -- ? ) properties get-global at interval-key? ; inline
: ch>lower ( ch -- lower ) simple-lower get-global ?at drop ; inline
: ch>upper ( ch -- upper ) simple-upper get-global ?at drop ; inline
: ch>title ( ch -- title ) simple-title get-global ?at drop ; inline
: special-case ( ch -- casing-tuple ) special-casing get-global at ; inline
: name>char ( name -- char ) name-map at ; inline
: char>name ( char -- name ) name-map value-at ; inline
: property? ( char property -- ? ) properties at interval-key? ; inline
: ch>lower ( ch -- lower ) simple-lower ?at drop ; inline
: ch>upper ( ch -- upper ) simple-upper ?at drop ; inline
: ch>title ( ch -- title ) simple-title ?at drop ; inline
: special-case ( ch -- casing-tuple ) special-casing at ; inline
! For non-existent characters, use Cn
CONSTANT: categories
@ -143,7 +143,7 @@ PRIVATE>
2dup bounds-check? [ set-nth ] [ 3drop ] if ;
:: fill-ranges ( table -- table )
name-map get-global sort-values keys
name-map sort-values keys
[ { [ "first>" tail? ] [ "last>" tail? ] } 1|| ] filter
2 group [
[ name>char ] bi@ [ [a,b] ] [ table ?nth ] bi
@ -200,26 +200,23 @@ C: <code-point> code-point
[ [ set-code-point ] each ] H{ } make-assoc ;
load-data {
[ process-names name-map set-global ]
[ 13 swap process-data simple-lower set-global ]
[ 12 swap process-data simple-upper set-global ]
[ 14 swap process-data simple-upper get-global assoc-union simple-title set-global ]
[ process-combining class-map set-global ]
[ process-canonical canonical-map set-global combine-map set-global ]
[ process-compatibility compatibility-map set-global ]
[ process-names name-map swap assoc-union! drop ]
[ 13 swap process-data simple-lower swap assoc-union! drop ]
[ 12 swap process-data simple-upper swap assoc-union! drop ]
[ 14 swap process-data simple-upper assoc-union simple-title swap assoc-union! drop ]
[ process-combining class-map swap assoc-union! drop ]
[ process-canonical canonical-map swap assoc-union! drop combine-map swap assoc-union! drop ]
[ process-compatibility compatibility-map swap assoc-union! drop ]
[ process-category category-map set-global ]
} cleave
: postprocess-class ( -- )
combine-map get-global keys [ 2ch> nip ] map
[ combining-class not ] filter
[ 0 swap class-map get-global set-at ] each ;
combine-map keys [ 2ch> nip ] map
[ combining-class not ] filter
[ 0 swap class-map set-at ] each
postprocess-class
load-special-casing special-casing swap assoc-union! drop
load-special-casing special-casing set-global
load-properties properties set-global
load-properties properties swap assoc-union! drop
[ name>char [ "Invalid character" throw ] unless* ]
name>char-hook set-global