git: more correct handling of multiline payloads

Field keys read until a space. The field payload reads until a newline. If the next line begins with a space, that line is also part of the field value.

Fields end with two newlines (\n\n) in a row.
modern-harvey3
Doug Coleman 2019-12-01 09:45:31 -06:00
parent 73f2838c87
commit bea7852d60
1 changed files with 48 additions and 37 deletions

View File

@ -2,13 +2,13 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs assocs.extras calendar USING: accessors arrays assocs assocs.extras calendar
calendar.format checksums checksums.sha combinators calendar.format checksums checksums.sha combinators
combinators.smart compression.zlib constructors fry grouping io combinators.short-circuit combinators.smart compression.zlib
io.binary io.directories io.directories.search constructors fry grouping io io.binary io.directories
io.encodings.binary io.encodings.string io.encodings.utf8 io.directories.search io.encodings.binary io.encodings.string
io.files io.files.info io.pathnames io.streams.byte-array io.encodings.utf8 io.files io.files.info io.pathnames
io.streams.peek kernel math math.bitwise math.parser io.streams.byte-array io.streams.peek kernel make math
math.statistics memoize namespaces random sequences math.bitwise math.parser math.statistics memoize modern.slices
sequences.extras splitting strings ; namespaces random sequences sequences.extras splitting strings ;
IN: git IN: git
ERROR: byte-expected offset ; ERROR: byte-expected offset ;
@ -120,10 +120,10 @@ ERROR: unhandled-git-index-trailing-bytes bytes ;
[ name>> file-info modified>> timestamp>unix-time >integer ] bi = not [ name>> file-info modified>> timestamp>unix-time >integer ] bi = not
] filter ; ] filter ;
TUPLE: commit hash tree parents author committer message ; TUPLE: commit hash tree parents author committer gpgsig message ;
CONSTRUCTOR: <commit> commit ( tree parents author committer -- obj ) ; CONSTRUCTOR: <commit> commit ( tree parents author committer -- obj ) ;
TUPLE: tree hash tree parents author committer message ; TUPLE: tree hash tree parents author committer gpgsig message ;
CONSTRUCTOR: <tree> tree ( -- obj ) ; CONSTRUCTOR: <tree> tree ( -- obj ) ;
: last2 ( seq -- penultimate ultimate ) 2 tail* first2 ; : last2 ( seq -- penultimate ultimate ) 2 tail* first2 ;
@ -151,43 +151,57 @@ ERROR: unknown-commit-line line name ;
ERROR: eof-too-early ; ERROR: eof-too-early ;
ERROR: unknown-field field ; ERROR: unknown-field field ;
: parse-commit-field ( obj parameter -- obj ) : read-field* ( bytes n -- bytes n' )
[ "\r\n" read-until [ eof-too-early ] unless ] dip { 1 peek-from
{ "tree" [ >>tree ] } "\s" sequence= [
{ "parent" [ >>parents ] } 1 +
{ "author" [ >>author ] } "\n" slice-til-string drop ,
{ "committer" [ >>committer ] } read-field*
] when ;
: read-field ( bytes n -- bytes n' seq )
[
read-field*
] { } make concat ;
: parse-commit-field ( commit bytes n parameter -- commit bytes n )
[ read-field ] dip >string {
{ "tree" [ reach tree<< ] }
{ "parent" [ reach parents<< ] }
{ "author" [ reach author<< ] }
{ "committer" [ reach committer<< ] }
{ "gpgsig" [ reach gpgsig<< ] }
[ unknown-field ] [ unknown-field ]
} case ; } case ;
ERROR: unexpected-text text ; ERROR: unexpected-text text ;
: parse-commit-lines ( obj -- obj ) : parse-commit-lines ( obj bytes n -- obj )
" \n" read-until { "\s\n" slice-til-either {
{ char: \s [ parse-commit-field parse-commit-lines ] } { char: \s [ parse-commit-field parse-commit-lines ] }
{ char: \n [ drop contents >>message ] } { char: \n [ drop 1 + tail >>message ] }
[ unexpected-text ] [ unexpected-text ]
} case ; } case ;
: parse-commit ( bytes -- commit ) : parse-commit ( bytes -- commit )
" " split1 [ "commit" assert= ] [ string>number read ] bi* " " split1 [ "commit" assert= ] [ string>number read ] bi*
utf8 [ [ commit new ] dip 0 parse-commit-lines ;
commit new parse-commit-lines
] with-byte-reader ;
: parse-tree-field ( obj parameter -- obj ) : parse-tree-field ( tree bytes n parameter -- tree bytes n )
[ "\r\n" read-until* ] dip { [ read-field ] dip {
{ "tree" [ >>tree ] } { "tree" [ reach tree<< ] }
{ "parent" [ >>parents ] } { "parent" [ reach parents<< ] }
{ "author" [ >>author ] } { "author" [ reach author<< ] }
{ "committer" [ >>committer ] } { "committer" [ reach committer<< ] }
{ "gpgsig" [ reach gpgsig<< ] }
[ unknown-field ] [ unknown-field ]
} case ; } case ;
: parse-tree-lines ( obj -- obj )
"\s\n" read-until { : bytes>tree ( obj bytes n -- obj )
{ char: \s [ parse-tree-field parse-tree-lines ] } "\s\n" slice-til-either {
{ char: \n [ drop contents >>message ] } { char: \s [ >string parse-tree-field bytes>tree ] }
{ char: \n [ drop 1 + tail >>message ] }
[ unexpected-text ] [ unexpected-text ]
} case ; } case ;
@ -223,12 +237,8 @@ ERROR: unknown-git-object obj ;
H{ } clone parse-object-line>assoc assoc>git-object H{ } clone parse-object-line>assoc assoc>git-object
] with-byte-reader ; ] with-byte-reader ;
: parse-tree ( bytes -- commit ) : parse-tree ( bytes -- commit )
[ tree new ] dip [ tree new ] dip 0 bytes>tree ;
utf8 [
parse-tree-lines
] with-byte-reader ;
: parse-object ( bytes -- git-obj ) : parse-object ( bytes -- git-obj )
utf8 [ utf8 [
@ -377,6 +387,7 @@ ERROR: unsupported-packed-raw-type type ;
: read-packed-raw ( -- string ) : read-packed-raw ( -- string )
read-type-length first2 swap { read-type-length first2 swap {
{ 1 [ 256 + read uncompress ] } { 1 [ 256 + read uncompress ] }
! { 6 [ read ] } ! delta-packed with DEFLATE
[ unsupported-packed-raw-type ] [ unsupported-packed-raw-type ]
} case ; } case ;
@ -396,7 +407,7 @@ SYMBOL: initial-offset
read-type-length first2 swap { read-type-length first2 swap {
{ 1 [ 256 + read uncompress parse-object ] } { 1 [ 256 + read uncompress parse-object ] }
{ 6 [ read-offset-delta first2 do-deltas parse-object-bytes>assoc ] } { 6 [ read-offset-delta first2 do-deltas parse-object-bytes>assoc ] }
! { 7 [ B read-sha1-delta ] } ! { 7 [ read-sha1-delta ] }
[ number>string "unknown packed type: " prepend throw ] [ number>string "unknown packed type: " prepend throw ]
} case } case
] with-variable ; ] with-variable ;