Moving more parsing code to simple-flat-file

db4
Daniel Ehrenberg 2009-03-18 22:49:06 -05:00
parent 7dba2a6a40
commit 4aa430cfd7
3 changed files with 22 additions and 33 deletions

View File

@ -1,10 +1,11 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ;
USING: sequences splitting kernel math.parser io.files io.encodings.ascii
biassocs ascii ;
IN: simple-flat-file
: drop-comments ( seq -- newseq )
[ "#" split1 drop ] map harvest ;
[ "#@" split first ] map harvest ;
: split-column ( line -- columns )
" \t" split harvest 2 short head 2 f pad-tail ;
@ -24,3 +25,8 @@ IN: simple-flat-file
: flat-file>biassoc ( filename -- biassoc )
ascii file-lines process-codetable-lines >biassoc ;
: split-; ( line -- array )
";" split [ [ blank? ] trim ] map ;
: data ( filename -- data )
ascii file-lines drop-comments [ split-; ] map ;

View File

@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser
ascii io assocs strings math namespaces make sorting combinators
math.order arrays unicode.normalize unicode.data locals
unicode.syntax macros sequences.deep words unicode.breaks
quotations combinators.short-circuit ;
quotations combinators.short-circuit simple-flat-file ;
IN: unicode.collation
<PRIVATE
@ -20,13 +20,11 @@ TUPLE: weight primary secondary tertiary ignorable? ;
[ >>primary ] [ >>secondary ] [ >>tertiary ] tri*
] map ;
: parse-line ( line -- code-poing weight )
";" split1 [ [ blank? ] trim ] bi@
[ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ;
: parse-keys ( string -- chars )
" " split [ hex> ] "" map-as ;
: parse-ducet ( file -- ducet )
ascii file-lines filter-comments
[ parse-line ] H{ } map>assoc ;
data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ;
"vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet

View File

@ -5,7 +5,7 @@ io.files hashtables quotations splitting grouping arrays io
math.parser hash2 math.order byte-arrays words namespaces words
compiler.units parser io.encodings.ascii values interval-maps
ascii sets combinators locals math.ranges sorting make
strings.parser io.encodings.utf8 memoize ;
strings.parser io.encodings.utf8 memoize simple-flat-file ;
IN: unicode.data
<PRIVATE
@ -18,12 +18,13 @@ VALUE: combine-map
VALUE: class-map
VALUE: compatibility-map
VALUE: category-map
VALUE: name-map
VALUE: special-casing
VALUE: properties
PRIVATE>
VALUE: name-map
: canonical-entry ( char -- seq ) canonical-map at ; inline
: combine-chars ( a b -- char/f ) combine-map hash2 ; inline
: compatibility-entry ( char -- seq ) compatibility-map at ; inline
@ -76,20 +77,10 @@ PRIVATE>
! Loading data from UnicodeData.txt
: split-; ( line -- array )
";" split [ [ blank? ] trim ] map ;
: data ( filename -- data )
ascii file-lines [ split-; ] map ;
: load-data ( -- data )
"vocab:unicode/data/UnicodeData.txt" data ;
: filter-comments ( lines -- lines )
[ "#@" split first ] map harvest ;
: (process-data) ( index data -- newdata )
filter-comments
[ [ nth ] keep first swap ] with { } map>assoc
[ [ hex> ] dip ] assoc-map ;
@ -182,15 +173,13 @@ C: <code-point> code-point
<code-point> swap first set ;
! Extra properties
: properties-lines ( -- lines )
"vocab:unicode/data/PropList.txt"
ascii file-lines ;
: parse-properties ( -- {{[a,b],prop}} )
properties-lines filter-comments [
split-; first2
[ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip
] { } map>assoc ;
"vocab:unicode/data/PropList.txt" data [
[
".." split1 [ dup ] unless*
[ hex> ] bi@ 2array
] dip
] assoc-map ;
: properties>intervals ( properties -- assoc[str,interval] )
dup values prune [ f ] H{ } map>assoc
@ -233,10 +222,6 @@ name>char-hook set-global
SYMBOL: interned
: parse-key-value ( filename -- assoc )
! assoc is code point/range => name
ascii file-lines filter-comments [ split-; ] map ;
: range, ( value key -- )
swap interned get
[ = ] with find nip 2array , ;
@ -257,4 +242,4 @@ SYMBOL: interned
PRIVATE>
: load-key-value ( filename -- table )
parse-key-value process-key-value ;
data process-key-value ;