From 67a2b9dc9aec5081f175787b16df85354161277f Mon Sep 17 00:00:00 2001 From: John Benediktsson Date: Mon, 18 Mar 2013 13:35:22 -0700 Subject: [PATCH] io.encodings: speed up ascii and utf8 stream-read-until. --- basis/hints/hints.factor | 6 ++--- basis/io/encodings/ascii/ascii.factor | 2 ++ basis/io/ports/ports.factor | 2 +- core/io/encodings/encodings.factor | 38 ++++++++++++++++----------- core/io/encodings/utf8/utf8.factor | 2 ++ 5 files changed, 31 insertions(+), 19 deletions(-) diff --git a/basis/hints/hints.factor b/basis/hints/hints.factor index 6aeedb2da0..6451e4808e 100644 --- a/basis/hints/hints.factor +++ b/basis/hints/hints.factor @@ -117,9 +117,9 @@ set-specializer \ split, { string string } set-specializer -{ member? member-eq? } [ - { array } set-specializer -] each +\ member? { { array } { string } } set-specializer + +\ member-eq? { array } set-specializer \ assoc-stack { vector } set-specializer diff --git a/basis/io/encodings/ascii/ascii.factor b/basis/io/encodings/ascii/ascii.factor index 2b5640489f..0f7f1f0760 100644 --- a/basis/io/encodings/ascii/ascii.factor +++ b/basis/io/encodings/ascii/ascii.factor @@ -25,3 +25,5 @@ M: ascii decode-char stream-read1 dup [ dup 127 <= [ >fixnum ] [ drop replacement-char ] if ] when ; inline + +M: ascii decode-until (decode-until) ; diff --git a/basis/io/ports/ports.factor b/basis/io/ports/ports.factor index cc6ce42ff1..d46672df72 100644 --- a/basis/io/ports/ports.factor +++ b/basis/io/ports/ports.factor @@ -230,7 +230,7 @@ M: object underlying-handle underlying-port handle>> ; ! Fast-path optimization -HINTS: decoder-read-until { string input-port utf8 } { string input-port ascii } ; +HINTS: (decode-until) { string input-port object } ; HINTS: M\ input-port stream-read-partial-unsafe { fixnum byte-array input-port } diff --git a/core/io/encodings/encodings.factor b/core/io/encodings/encodings.factor index fdb320ff30..bd36a65fba 100644 --- a/core/io/encodings/encodings.factor +++ b/core/io/encodings/encodings.factor @@ -15,6 +15,27 @@ M: object guess-encoded-length drop ; inline GENERIC: decode-char ( stream encoding -- char/f ) +GENERIC: decode-until ( seps stream encoding -- string/f sep/f ) + + + +: (decode-until) ( seps stream encoding -- string/f sep/f ) + [ decode-char dup ] 2curry swap [ dupd member? ] curry + [ [ drop f t ] if ] curry compose + [ 100 ] dip read-until-loop ; inline + +M: object decode-until (decode-until) ; + GENERIC: encode-char ( char stream encoding -- ) GENERIC: encode-string ( string stream encoding -- ) @@ -111,23 +132,10 @@ M: decoder stream-contents* { CHAR: \n [ line-ends\n ] } } case ; inline -! If the stop? branch is taken convert the sbuf to a string -! If sep is present, returns ``string sep'' (string can be "") -! If sep is f, returns ``string f'' or ``f f'' -: read-until-loop ( buf quot: ( -- char stop? ) -- string/f sep/f ) - dup call - [ nip [ "" like ] dip [ f like f ] unless* ] - [ pick push read-until-loop ] if ; inline recursive - -: decoder-read-until ( seps stream encoding -- string/f sep/f ) - [ decode-char dup ] 2curry swap [ dupd member? ] curry - [ [ drop f t ] if ] curry compose - [ 100 ] dip read-until-loop ; inline - -M: decoder stream-read-until >decoder< decoder-read-until ; +M: decoder stream-read-until >decoder< decode-until ; M: decoder stream-readln - "\r\n" over >decoder< decoder-read-until handle-readln ; + "\r\n" over >decoder< decode-until handle-readln ; M: decoder dispose stream>> dispose ; diff --git a/core/io/encodings/utf8/utf8.factor b/core/io/encodings/utf8/utf8.factor index c01ee89e9e..61f71e3d3b 100644 --- a/core/io/encodings/utf8/utf8.factor +++ b/core/io/encodings/utf8/utf8.factor @@ -54,6 +54,8 @@ SINGLETON: utf8 M: utf8 decode-char drop decode-utf8 ; inline +M: utf8 decode-until (decode-until) ; + ! Encoding UTF-8 : encoded ( stream char -- )