Moving more parsing code to simple-flat-file

db4
Daniel Ehrenberg 2009-03-18 22:49:06 -05:00
parent 7dba2a6a40
commit 4aa430cfd7
3 changed files with 22 additions and 33 deletions

View File

@ -1,10 +1,11 @@
! Copyright (C) 2009 Daniel Ehrenberg ! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ; USING: sequences splitting kernel math.parser io.files io.encodings.ascii
biassocs ascii ;
IN: simple-flat-file IN: simple-flat-file
: drop-comments ( seq -- newseq ) : drop-comments ( seq -- newseq )
[ "#" split1 drop ] map harvest ; [ "#@" split first ] map harvest ;
: split-column ( line -- columns ) : split-column ( line -- columns )
" \t" split harvest 2 short head 2 f pad-tail ; " \t" split harvest 2 short head 2 f pad-tail ;
@ -24,3 +25,8 @@ IN: simple-flat-file
: flat-file>biassoc ( filename -- biassoc ) : flat-file>biassoc ( filename -- biassoc )
ascii file-lines process-codetable-lines >biassoc ; ascii file-lines process-codetable-lines >biassoc ;
: split-; ( line -- array )
";" split [ [ blank? ] trim ] map ;
: data ( filename -- data )
ascii file-lines drop-comments [ split-; ] map ;

View File

@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser
ascii io assocs strings math namespaces make sorting combinators ascii io assocs strings math namespaces make sorting combinators
math.order arrays unicode.normalize unicode.data locals math.order arrays unicode.normalize unicode.data locals
unicode.syntax macros sequences.deep words unicode.breaks unicode.syntax macros sequences.deep words unicode.breaks
quotations combinators.short-circuit ; quotations combinators.short-circuit simple-flat-file ;
IN: unicode.collation IN: unicode.collation
<PRIVATE <PRIVATE
@ -20,13 +20,11 @@ TUPLE: weight primary secondary tertiary ignorable? ;
[ >>primary ] [ >>secondary ] [ >>tertiary ] tri* [ >>primary ] [ >>secondary ] [ >>tertiary ] tri*
] map ; ] map ;
: parse-line ( line -- code-poing weight ) : parse-keys ( string -- chars )
";" split1 [ [ blank? ] trim ] bi@ " " split [ hex> ] "" map-as ;
[ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ;
: parse-ducet ( file -- ducet ) : parse-ducet ( file -- ducet )
ascii file-lines filter-comments data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ;
[ parse-line ] H{ } map>assoc ;
"vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet "vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet

View File

@ -5,7 +5,7 @@ io.files hashtables quotations splitting grouping arrays io
math.parser hash2 math.order byte-arrays words namespaces words math.parser hash2 math.order byte-arrays words namespaces words
compiler.units parser io.encodings.ascii values interval-maps compiler.units parser io.encodings.ascii values interval-maps
ascii sets combinators locals math.ranges sorting make ascii sets combinators locals math.ranges sorting make
strings.parser io.encodings.utf8 memoize ; strings.parser io.encodings.utf8 memoize simple-flat-file ;
IN: unicode.data IN: unicode.data
<PRIVATE <PRIVATE
@ -18,12 +18,13 @@ VALUE: combine-map
VALUE: class-map VALUE: class-map
VALUE: compatibility-map VALUE: compatibility-map
VALUE: category-map VALUE: category-map
VALUE: name-map
VALUE: special-casing VALUE: special-casing
VALUE: properties VALUE: properties
PRIVATE> PRIVATE>
VALUE: name-map
: canonical-entry ( char -- seq ) canonical-map at ; inline : canonical-entry ( char -- seq ) canonical-map at ; inline
: combine-chars ( a b -- char/f ) combine-map hash2 ; inline : combine-chars ( a b -- char/f ) combine-map hash2 ; inline
: compatibility-entry ( char -- seq ) compatibility-map at ; inline : compatibility-entry ( char -- seq ) compatibility-map at ; inline
@ -76,20 +77,10 @@ PRIVATE>
! Loading data from UnicodeData.txt ! Loading data from UnicodeData.txt
: split-; ( line -- array )
";" split [ [ blank? ] trim ] map ;
: data ( filename -- data )
ascii file-lines [ split-; ] map ;
: load-data ( -- data ) : load-data ( -- data )
"vocab:unicode/data/UnicodeData.txt" data ; "vocab:unicode/data/UnicodeData.txt" data ;
: filter-comments ( lines -- lines )
[ "#@" split first ] map harvest ;
: (process-data) ( index data -- newdata ) : (process-data) ( index data -- newdata )
filter-comments
[ [ nth ] keep first swap ] with { } map>assoc [ [ nth ] keep first swap ] with { } map>assoc
[ [ hex> ] dip ] assoc-map ; [ [ hex> ] dip ] assoc-map ;
@ -182,15 +173,13 @@ C: <code-point> code-point
<code-point> swap first set ; <code-point> swap first set ;
! Extra properties ! Extra properties
: properties-lines ( -- lines )
"vocab:unicode/data/PropList.txt"
ascii file-lines ;
: parse-properties ( -- {{[a,b],prop}} ) : parse-properties ( -- {{[a,b],prop}} )
properties-lines filter-comments [ "vocab:unicode/data/PropList.txt" data [
split-; first2 [
[ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip ".." split1 [ dup ] unless*
] { } map>assoc ; [ hex> ] bi@ 2array
] dip
] assoc-map ;
: properties>intervals ( properties -- assoc[str,interval] ) : properties>intervals ( properties -- assoc[str,interval] )
dup values prune [ f ] H{ } map>assoc dup values prune [ f ] H{ } map>assoc
@ -233,10 +222,6 @@ name>char-hook set-global
SYMBOL: interned SYMBOL: interned
: parse-key-value ( filename -- assoc )
! assoc is code point/range => name
ascii file-lines filter-comments [ split-; ] map ;
: range, ( value key -- ) : range, ( value key -- )
swap interned get swap interned get
[ = ] with find nip 2array , ; [ = ] with find nip 2array , ;
@ -257,4 +242,4 @@ SYMBOL: interned
PRIVATE> PRIVATE>
: load-key-value ( filename -- table ) : load-key-value ( filename -- table )
parse-key-value process-key-value ; data process-key-value ;