Moving more parsing code to simple-flat-file
parent
7dba2a6a40
commit
4aa430cfd7
|
@ -1,10 +1,11 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ;
|
||||
USING: sequences splitting kernel math.parser io.files io.encodings.ascii
|
||||
biassocs ascii ;
|
||||
IN: simple-flat-file
|
||||
|
||||
: drop-comments ( seq -- newseq )
|
||||
[ "#" split1 drop ] map harvest ;
|
||||
[ "#@" split first ] map harvest ;
|
||||
|
||||
: split-column ( line -- columns )
|
||||
" \t" split harvest 2 short head 2 f pad-tail ;
|
||||
|
@ -24,3 +25,8 @@ IN: simple-flat-file
|
|||
: flat-file>biassoc ( filename -- biassoc )
|
||||
ascii file-lines process-codetable-lines >biassoc ;
|
||||
|
||||
: split-; ( line -- array )
|
||||
";" split [ [ blank? ] trim ] map ;
|
||||
|
||||
: data ( filename -- data )
|
||||
ascii file-lines drop-comments [ split-; ] map ;
|
||||
|
|
|
@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser
|
|||
ascii io assocs strings math namespaces make sorting combinators
|
||||
math.order arrays unicode.normalize unicode.data locals
|
||||
unicode.syntax macros sequences.deep words unicode.breaks
|
||||
quotations combinators.short-circuit ;
|
||||
quotations combinators.short-circuit simple-flat-file ;
|
||||
IN: unicode.collation
|
||||
|
||||
<PRIVATE
|
||||
|
@ -20,13 +20,11 @@ TUPLE: weight primary secondary tertiary ignorable? ;
|
|||
[ >>primary ] [ >>secondary ] [ >>tertiary ] tri*
|
||||
] map ;
|
||||
|
||||
: parse-line ( line -- code-poing weight )
|
||||
";" split1 [ [ blank? ] trim ] bi@
|
||||
[ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ;
|
||||
: parse-keys ( string -- chars )
|
||||
" " split [ hex> ] "" map-as ;
|
||||
|
||||
: parse-ducet ( file -- ducet )
|
||||
ascii file-lines filter-comments
|
||||
[ parse-line ] H{ } map>assoc ;
|
||||
data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ;
|
||||
|
||||
"vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ io.files hashtables quotations splitting grouping arrays io
|
|||
math.parser hash2 math.order byte-arrays words namespaces words
|
||||
compiler.units parser io.encodings.ascii values interval-maps
|
||||
ascii sets combinators locals math.ranges sorting make
|
||||
strings.parser io.encodings.utf8 memoize ;
|
||||
strings.parser io.encodings.utf8 memoize simple-flat-file ;
|
||||
IN: unicode.data
|
||||
|
||||
<PRIVATE
|
||||
|
@ -18,12 +18,13 @@ VALUE: combine-map
|
|||
VALUE: class-map
|
||||
VALUE: compatibility-map
|
||||
VALUE: category-map
|
||||
VALUE: name-map
|
||||
VALUE: special-casing
|
||||
VALUE: properties
|
||||
|
||||
PRIVATE>
|
||||
|
||||
VALUE: name-map
|
||||
|
||||
: canonical-entry ( char -- seq ) canonical-map at ; inline
|
||||
: combine-chars ( a b -- char/f ) combine-map hash2 ; inline
|
||||
: compatibility-entry ( char -- seq ) compatibility-map at ; inline
|
||||
|
@ -76,20 +77,10 @@ PRIVATE>
|
|||
|
||||
! Loading data from UnicodeData.txt
|
||||
|
||||
: split-; ( line -- array )
|
||||
";" split [ [ blank? ] trim ] map ;
|
||||
|
||||
: data ( filename -- data )
|
||||
ascii file-lines [ split-; ] map ;
|
||||
|
||||
: load-data ( -- data )
|
||||
"vocab:unicode/data/UnicodeData.txt" data ;
|
||||
|
||||
: filter-comments ( lines -- lines )
|
||||
[ "#@" split first ] map harvest ;
|
||||
|
||||
: (process-data) ( index data -- newdata )
|
||||
filter-comments
|
||||
[ [ nth ] keep first swap ] with { } map>assoc
|
||||
[ [ hex> ] dip ] assoc-map ;
|
||||
|
||||
|
@ -182,15 +173,13 @@ C: <code-point> code-point
|
|||
<code-point> swap first set ;
|
||||
|
||||
! Extra properties
|
||||
: properties-lines ( -- lines )
|
||||
"vocab:unicode/data/PropList.txt"
|
||||
ascii file-lines ;
|
||||
|
||||
: parse-properties ( -- {{[a,b],prop}} )
|
||||
properties-lines filter-comments [
|
||||
split-; first2
|
||||
[ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip
|
||||
] { } map>assoc ;
|
||||
"vocab:unicode/data/PropList.txt" data [
|
||||
[
|
||||
".." split1 [ dup ] unless*
|
||||
[ hex> ] bi@ 2array
|
||||
] dip
|
||||
] assoc-map ;
|
||||
|
||||
: properties>intervals ( properties -- assoc[str,interval] )
|
||||
dup values prune [ f ] H{ } map>assoc
|
||||
|
@ -233,10 +222,6 @@ name>char-hook set-global
|
|||
|
||||
SYMBOL: interned
|
||||
|
||||
: parse-key-value ( filename -- assoc )
|
||||
! assoc is code point/range => name
|
||||
ascii file-lines filter-comments [ split-; ] map ;
|
||||
|
||||
: range, ( value key -- )
|
||||
swap interned get
|
||||
[ = ] with find nip 2array , ;
|
||||
|
@ -257,4 +242,4 @@ SYMBOL: interned
|
|||
PRIVATE>
|
||||
|
||||
: load-key-value ( filename -- table )
|
||||
parse-key-value process-key-value ;
|
||||
data process-key-value ;
|
||||
|
|
Loading…
Reference in New Issue