Moving more parsing code to simple-flat-file
parent
7dba2a6a40
commit
4aa430cfd7
|
|
@ -1,10 +1,11 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg
|
! Copyright (C) 2009 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ;
|
USING: sequences splitting kernel math.parser io.files io.encodings.ascii
|
||||||
|
biassocs ascii ;
|
||||||
IN: simple-flat-file
|
IN: simple-flat-file
|
||||||
|
|
||||||
: drop-comments ( seq -- newseq )
|
: drop-comments ( seq -- newseq )
|
||||||
[ "#" split1 drop ] map harvest ;
|
[ "#@" split first ] map harvest ;
|
||||||
|
|
||||||
: split-column ( line -- columns )
|
: split-column ( line -- columns )
|
||||||
" \t" split harvest 2 short head 2 f pad-tail ;
|
" \t" split harvest 2 short head 2 f pad-tail ;
|
||||||
|
|
@ -24,3 +25,8 @@ IN: simple-flat-file
|
||||||
: flat-file>biassoc ( filename -- biassoc )
|
: flat-file>biassoc ( filename -- biassoc )
|
||||||
ascii file-lines process-codetable-lines >biassoc ;
|
ascii file-lines process-codetable-lines >biassoc ;
|
||||||
|
|
||||||
|
: split-; ( line -- array )
|
||||||
|
";" split [ [ blank? ] trim ] map ;
|
||||||
|
|
||||||
|
: data ( filename -- data )
|
||||||
|
ascii file-lines drop-comments [ split-; ] map ;
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser
|
||||||
ascii io assocs strings math namespaces make sorting combinators
|
ascii io assocs strings math namespaces make sorting combinators
|
||||||
math.order arrays unicode.normalize unicode.data locals
|
math.order arrays unicode.normalize unicode.data locals
|
||||||
unicode.syntax macros sequences.deep words unicode.breaks
|
unicode.syntax macros sequences.deep words unicode.breaks
|
||||||
quotations combinators.short-circuit ;
|
quotations combinators.short-circuit simple-flat-file ;
|
||||||
IN: unicode.collation
|
IN: unicode.collation
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
@ -20,13 +20,11 @@ TUPLE: weight primary secondary tertiary ignorable? ;
|
||||||
[ >>primary ] [ >>secondary ] [ >>tertiary ] tri*
|
[ >>primary ] [ >>secondary ] [ >>tertiary ] tri*
|
||||||
] map ;
|
] map ;
|
||||||
|
|
||||||
: parse-line ( line -- code-poing weight )
|
: parse-keys ( string -- chars )
|
||||||
";" split1 [ [ blank? ] trim ] bi@
|
" " split [ hex> ] "" map-as ;
|
||||||
[ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ;
|
|
||||||
|
|
||||||
: parse-ducet ( file -- ducet )
|
: parse-ducet ( file -- ducet )
|
||||||
ascii file-lines filter-comments
|
data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ;
|
||||||
[ parse-line ] H{ } map>assoc ;
|
|
||||||
|
|
||||||
"vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet
|
"vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ io.files hashtables quotations splitting grouping arrays io
|
||||||
math.parser hash2 math.order byte-arrays words namespaces words
|
math.parser hash2 math.order byte-arrays words namespaces words
|
||||||
compiler.units parser io.encodings.ascii values interval-maps
|
compiler.units parser io.encodings.ascii values interval-maps
|
||||||
ascii sets combinators locals math.ranges sorting make
|
ascii sets combinators locals math.ranges sorting make
|
||||||
strings.parser io.encodings.utf8 memoize ;
|
strings.parser io.encodings.utf8 memoize simple-flat-file ;
|
||||||
IN: unicode.data
|
IN: unicode.data
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
@ -18,12 +18,13 @@ VALUE: combine-map
|
||||||
VALUE: class-map
|
VALUE: class-map
|
||||||
VALUE: compatibility-map
|
VALUE: compatibility-map
|
||||||
VALUE: category-map
|
VALUE: category-map
|
||||||
VALUE: name-map
|
|
||||||
VALUE: special-casing
|
VALUE: special-casing
|
||||||
VALUE: properties
|
VALUE: properties
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
VALUE: name-map
|
||||||
|
|
||||||
: canonical-entry ( char -- seq ) canonical-map at ; inline
|
: canonical-entry ( char -- seq ) canonical-map at ; inline
|
||||||
: combine-chars ( a b -- char/f ) combine-map hash2 ; inline
|
: combine-chars ( a b -- char/f ) combine-map hash2 ; inline
|
||||||
: compatibility-entry ( char -- seq ) compatibility-map at ; inline
|
: compatibility-entry ( char -- seq ) compatibility-map at ; inline
|
||||||
|
|
@ -76,20 +77,10 @@ PRIVATE>
|
||||||
|
|
||||||
! Loading data from UnicodeData.txt
|
! Loading data from UnicodeData.txt
|
||||||
|
|
||||||
: split-; ( line -- array )
|
|
||||||
";" split [ [ blank? ] trim ] map ;
|
|
||||||
|
|
||||||
: data ( filename -- data )
|
|
||||||
ascii file-lines [ split-; ] map ;
|
|
||||||
|
|
||||||
: load-data ( -- data )
|
: load-data ( -- data )
|
||||||
"vocab:unicode/data/UnicodeData.txt" data ;
|
"vocab:unicode/data/UnicodeData.txt" data ;
|
||||||
|
|
||||||
: filter-comments ( lines -- lines )
|
|
||||||
[ "#@" split first ] map harvest ;
|
|
||||||
|
|
||||||
: (process-data) ( index data -- newdata )
|
: (process-data) ( index data -- newdata )
|
||||||
filter-comments
|
|
||||||
[ [ nth ] keep first swap ] with { } map>assoc
|
[ [ nth ] keep first swap ] with { } map>assoc
|
||||||
[ [ hex> ] dip ] assoc-map ;
|
[ [ hex> ] dip ] assoc-map ;
|
||||||
|
|
||||||
|
|
@ -182,15 +173,13 @@ C: <code-point> code-point
|
||||||
<code-point> swap first set ;
|
<code-point> swap first set ;
|
||||||
|
|
||||||
! Extra properties
|
! Extra properties
|
||||||
: properties-lines ( -- lines )
|
|
||||||
"vocab:unicode/data/PropList.txt"
|
|
||||||
ascii file-lines ;
|
|
||||||
|
|
||||||
: parse-properties ( -- {{[a,b],prop}} )
|
: parse-properties ( -- {{[a,b],prop}} )
|
||||||
properties-lines filter-comments [
|
"vocab:unicode/data/PropList.txt" data [
|
||||||
split-; first2
|
[
|
||||||
[ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip
|
".." split1 [ dup ] unless*
|
||||||
] { } map>assoc ;
|
[ hex> ] bi@ 2array
|
||||||
|
] dip
|
||||||
|
] assoc-map ;
|
||||||
|
|
||||||
: properties>intervals ( properties -- assoc[str,interval] )
|
: properties>intervals ( properties -- assoc[str,interval] )
|
||||||
dup values prune [ f ] H{ } map>assoc
|
dup values prune [ f ] H{ } map>assoc
|
||||||
|
|
@ -233,10 +222,6 @@ name>char-hook set-global
|
||||||
|
|
||||||
SYMBOL: interned
|
SYMBOL: interned
|
||||||
|
|
||||||
: parse-key-value ( filename -- assoc )
|
|
||||||
! assoc is code point/range => name
|
|
||||||
ascii file-lines filter-comments [ split-; ] map ;
|
|
||||||
|
|
||||||
: range, ( value key -- )
|
: range, ( value key -- )
|
||||||
swap interned get
|
swap interned get
|
||||||
[ = ] with find nip 2array , ;
|
[ = ] with find nip 2array , ;
|
||||||
|
|
@ -257,4 +242,4 @@ SYMBOL: interned
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: load-key-value ( filename -- table )
|
: load-key-value ( filename -- table )
|
||||||
parse-key-value process-key-value ;
|
data process-key-value ;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue