Moving more parsing code to simple-flat-file

2009-03-18 22:49:06 -05:00 · 2009-03-18 22:49:06 -05:00 · 4aa430cfd7
parent 7dba2a6a40
commit 4aa430cfd7
3 changed files with 22 additions and 33 deletions
--- a/basis/simple-flat-file/simple-flat-file.factor
+++ b/basis/simple-flat-file/simple-flat-file.factor
@ -1,10 +1,11 @@
 ! Copyright (C) 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
-USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ;
+USING: sequences splitting kernel math.parser io.files io.encodings.ascii
 biassocs ascii ;
 IN: simple-flat-file
 : drop-comments ( seq -- newseq )
-    [ "#" split1 drop ] map harvest ;
+    [ "#@" split first ] map harvest ;
 : split-column ( line -- columns )
    " \t" split harvest 2 short head 2 f pad-tail ;
@ -24,3 +25,8 @@ IN: simple-flat-file
 : flat-file>biassoc ( filename -- biassoc )
    ascii file-lines process-codetable-lines >biassoc ;
 : split-; ( line -- array )
    ";" split [ [ blank? ] trim ] map ;
 : data ( filename -- data )
    ascii file-lines drop-comments [ split-; ] map ;
--- a/basis/unicode/collation/collation.factor
+++ b/basis/unicode/collation/collation.factor
@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser
 ascii io assocs strings math namespaces make sorting combinators
 math.order arrays unicode.normalize unicode.data locals
 unicode.syntax macros sequences.deep words unicode.breaks
-quotations combinators.short-circuit ;
+quotations combinators.short-circuit simple-flat-file ;
 IN: unicode.collation
 <PRIVATE
@ -20,13 +20,11 @@ TUPLE: weight primary secondary tertiary ignorable? ;
        [ >>primary ] [ >>secondary ] [ >>tertiary ] tri*
    ] map ;
-: parse-line ( line -- code-poing weight )
+: parse-keys ( string -- chars )
-    ";" split1 [ [ blank? ] trim ] bi@
+    " " split [ hex> ] "" map-as ;
    [ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ;
 : parse-ducet ( file -- ducet )
-    ascii file-lines filter-comments
+    data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ;
    [ parse-line ] H{ } map>assoc ;
 "vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet
--- a/basis/unicode/data/data.factor
+++ b/basis/unicode/data/data.factor
@ -5,7 +5,7 @@ io.files hashtables quotations splitting grouping arrays io
 math.parser hash2 math.order byte-arrays words namespaces words
 compiler.units parser io.encodings.ascii values interval-maps
 ascii sets combinators locals math.ranges sorting make
-strings.parser io.encodings.utf8 memoize ;
+strings.parser io.encodings.utf8 memoize simple-flat-file ;
 IN: unicode.data
 <PRIVATE
@ -18,12 +18,13 @@ VALUE: combine-map
 VALUE: class-map
 VALUE: compatibility-map
 VALUE: category-map
 VALUE: name-map
 VALUE: special-casing
 VALUE: properties
 PRIVATE>
 VALUE: name-map
 : canonical-entry ( char -- seq ) canonical-map at ; inline
 : combine-chars ( a b -- char/f ) combine-map hash2 ; inline
 : compatibility-entry ( char -- seq ) compatibility-map at ; inline
@ -76,20 +77,10 @@ PRIVATE>
 ! Loading data from UnicodeData.txt
 : split-; ( line -- array )
    ";" split [ [ blank? ] trim ] map ;
 : data ( filename -- data )
    ascii file-lines [ split-; ] map ;
 : load-data ( -- data )
    "vocab:unicode/data/UnicodeData.txt" data ;
 : filter-comments ( lines -- lines )
    [ "#@" split first ] map harvest ;
 : (process-data) ( index data -- newdata )
    filter-comments
    [ [ nth ] keep first swap ] with { } map>assoc
    [ [ hex> ] dip ] assoc-map ;
@ -182,15 +173,13 @@ C: <code-point> code-point
    <code-point> swap first set ;
 ! Extra properties
 : properties-lines ( -- lines )
    "vocab:unicode/data/PropList.txt"
    ascii file-lines ;
 : parse-properties ( -- {{[a,b],prop}} )
-    properties-lines filter-comments [
+    "vocab:unicode/data/PropList.txt" data [
-        split-; first2
+        [
-        [ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip
+            ".." split1 [ dup ] unless*
-    ] { } map>assoc ;
+            [ hex> ] bi@ 2array
        ] dip
    ] assoc-map ;
 : properties>intervals ( properties -- assoc[str,interval] )
    dup values prune [ f ] H{ } map>assoc
@ -233,10 +222,6 @@ name>char-hook set-global
 SYMBOL: interned
 : parse-key-value ( filename -- assoc )
    ! assoc is code point/range => name
    ascii file-lines filter-comments [ split-; ] map ;
 : range, ( value key -- )
    swap interned get
    [ = ] with find nip 2array , ;
@ -257,4 +242,4 @@ SYMBOL: interned
 PRIVATE>
 : load-key-value ( filename -- table )
-    parse-key-value process-key-value ;
+    data process-key-value ;