From 559f140a93f7f3b98b23449d3900bd6fa458f8f5 Mon Sep 17 00:00:00 2001 From: Jon Harper Date: Sat, 5 Apr 2014 15:51:30 +0200 Subject: [PATCH] YAML: handle recursive data and anchors' identity --- extra/yaml/yaml-tests.factor | 57 +++++++++++- extra/yaml/yaml.factor | 172 ++++++++++++++++++++++++++--------- 2 files changed, 184 insertions(+), 45 deletions(-) diff --git a/extra/yaml/yaml-tests.factor b/extra/yaml/yaml-tests.factor index ab0546c1f1..8cb60bd304 100644 --- a/extra/yaml/yaml-tests.factor +++ b/extra/yaml/yaml-tests.factor @@ -1,6 +1,7 @@ ! Copyright (C) 2014 Jon Harper. ! See http://factorcode.org/license.txt for BSD license. -USING: assocs linked-assocs literals tools.test yaml ; +USING: assocs kernel linked-assocs literals locals sequences +tools.test yaml yaml.private grouping ; IN: yaml.tests ! TODO real conformance tests here @@ -94,10 +95,64 @@ CONSTANT: test-anchors-obj { ${ test-anchors-obj } [ $ test-anchors yaml> ] unit-test ${ test-anchors-obj } [ $ test-anchors-obj >yaml yaml> ] unit-test +! and test indentity +{ t } [ $ test-anchors yaml> 2 group [ all-eq? ] all? ] unit-test +{ t } [ $ test-anchors yaml> >yaml yaml> 2 group [ all-eq? ] all? ] unit-test + +! Anchors and fancy types +CONSTANT: fancy-anchors """- &1 [ "foo" ] +- &2 !!set + ? *1 +- *2 +""" +CONSTANT: fancy-anchors-obj { + { "foo" } HS{ { "foo" } } HS{ { "foo" } } +} +${ fancy-anchors-obj } [ $ fancy-anchors yaml> ] unit-test +${ fancy-anchors-obj } [ $ fancy-anchors-obj >yaml yaml> ] unit-test ! Missing anchors [ "*foo" yaml> ] [ "No previous anchor" = ] must-fail-with +! Simple Recursive output +: simple-recursive-list ( -- obj ) + { f } clone [ 0 over set-nth ] keep ; +CONSTANT: simple-recursive-list-anchored T{ yaml-anchor f "0" { + T{ yaml-alias f "0" } +} } +CONSTANT: simple-recursive-list-yaml """&0 +- *0""" + +${ simple-recursive-list-anchored } [ simple-recursive-list replace-identities ] unit-test +${ simple-recursive-list-anchored } [ $ simple-recursive-list-yaml yaml> replace-identities ] unit-test +${ simple-recursive-list-anchored } [ simple-recursive-list >yaml yaml> replace-identities ] unit-test + +! many recursive outputs +: many-recursive-objects ( -- obj ) + 4 [ simple-recursive-list ] replicate ; +CONSTANT: many-recursive-objects-anchored { + T{ yaml-anchor f "0" { T{ yaml-alias f "0" } } } + T{ yaml-anchor f "1" { T{ yaml-alias f "1" } } } + T{ yaml-anchor f "2" { T{ yaml-alias f "2" } } } + T{ yaml-anchor f "3" { T{ yaml-alias f "3" } } } +} + +${ many-recursive-objects-anchored } [ many-recursive-objects replace-identities ] unit-test + +! Advanced recursive outputs +:: transitive-recursive-objects ( -- obj ) + { f } :> list + HS{ list } :> set + H{ { set list } } :> hash + hash 0 list set-nth + list ; +CONSTANT: transitive-recursive-objects-anchored T{ yaml-anchor f "0" { + H{ { HS{ T{ yaml-alias f "0" } } T{ yaml-alias f "0" } } } +} } + +${ transitive-recursive-objects-anchored } [ transitive-recursive-objects replace-identities ] unit-test + + ! Lifted from pyyaml ! http://pyyaml.org/browser/pyyaml/trunk/tests/data diff --git a/extra/yaml/yaml.factor b/extra/yaml/yaml.factor index ce4ec2f2df..92fc1c1555 100644 --- a/extra/yaml/yaml.factor +++ b/extra/yaml/yaml.factor @@ -1,10 +1,11 @@ ! Copyright (C) 2013 Jon Harper. ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien.data arrays assocs byte-arrays -classes.struct combinators destructors hashtables -io.encodings.string io.encodings.utf8 kernel libc linked-assocs -locals make namespaces sequences sets strings yaml.conversion -yaml.ffi ; +classes.struct combinators combinators.extras +combinators.short-circuit destructors fry hashtables +hashtables.identity io.encodings.string io.encodings.utf8 kernel +libc linked-assocs locals make math math.parser namespaces +sequences sets strings yaml.conversion yaml.ffi ; FROM: sets => set ; IN: yaml @@ -12,13 +13,20 @@ IN: yaml : yaml-assert-ok ( ? -- ) [ "yaml error" throw ] unless ; +TUPLE: yaml-alias anchor ; +C: yaml-alias SYMBOL: anchors : ?register-anchor ( obj event -- obj ) dupd anchor>> [ anchors get set-at ] [ drop ] if* ; -: deref-anchor ( event -- obj ) - data>> alias>> anchor>> anchors get at* +: assert-anchor-exists ( anchor -- ) + anchors get at* nip [ "No previous anchor" throw ] unless ; +: deref-anchor ( event -- obj ) + data>> alias>> anchor>> + [ assert-anchor-exists ] + [ ] bi ; + : event>scalar ( event -- obj ) data>> scalar>> [ construct-scalar ] @@ -59,11 +67,11 @@ TUPLE: factor_yaml_event_t type data start_mark end_mark ; DEFER: parse-sequence DEFER: parse-mapping : (parse-sequence) ( parser event prev-event -- obj ) - data>> sequence_start>> - [ [ parse-sequence ] [ construct-sequence ] bi* ] [ 2nip ?register-anchor ] 3bi ; + data>> sequence_start>> [ [ 2drop f ] dip ?register-anchor drop ] + [ [ parse-sequence ] [ construct-sequence ] bi* ] [ 2nip ?register-anchor ] 3tri ; : (parse-mapping) ( parser event prev-event -- obj ) - data>> mapping_start>> - [ [ parse-mapping ] [ construct-mapping ] bi* ] [ 2nip ?register-anchor ] 3bi ; + data>> mapping_start>> [ [ 2drop f ] dip ?register-anchor drop ] + [ [ parse-mapping ] [ construct-mapping ] bi* ] [ 2nip ?register-anchor ] 3tri ; : next-complex-value ( parser event prev-event -- obj ) dup type>> { { YAML_SEQUENCE_START_EVENT [ (parse-sequence) ] } @@ -118,9 +126,25 @@ DEFER: parse-mapping [ "wrong event" throw ] unless ] with-destructors ; +GENERIC: (deref-aliases) ( anchors obj -- obj' ) +M: object (deref-aliases) nip ; +M: byte-array (deref-aliases) nip ; +M: string (deref-aliases) nip ; +M: yaml-alias (deref-aliases) anchor>> swap at ; + +M: sequence (deref-aliases) + [ (deref-aliases) ] with map! ; +M: set (deref-aliases) + [ members (deref-aliases) ] [ clear-set ] [ swap union! ] tri ; +: assoc-map! ( assoc quot -- ) + [ assoc-map ] [ drop clear-assoc ] [ drop swap assoc-union! ] 2tri ; inline +M: assoc (deref-aliases) + swap '[ [ _ swap (deref-aliases) ] bi@ ] assoc-map! ; + :: parse-yaml-doc ( parser event -- obj ) H{ } clone anchors [ parser event next-value + anchors get swap (deref-aliases) ] with-variable ; :: ?parse-yaml-doc ( parser event -- obj/f ? ) @@ -167,6 +191,59 @@ PRIVATE> ( -- yaml-anchors ) + IH{ } clone IH{ } clone 0 yaml-anchors boa ; +GENERIC: (replace-aliases) ( yaml-anchors obj -- obj' ) +: incr-anchor ( yaml-anchors -- current-anchor ) + [ next-anchor>> ] [ + [ [ number>string ] [ 1 + ] bi ] + [ next-anchor<< ] bi* + ] bi ; +:: ?replace-aliases ( yaml-anchors obj -- obj' ) + yaml-anchors objects>> :> objects + obj objects at* [ + [ yaml-anchors incr-anchor dup obj objects set-at ] unless* + + ] [ + drop f obj objects set-at + yaml-anchors obj (replace-aliases) :> obj' + obj obj' yaml-anchors new-objects>> set-at + obj' + ] if ; + +M: object (replace-aliases) nip ; +M: byte-array (replace-aliases) nip ; +M: string (replace-aliases) nip ; + +M: sequence (replace-aliases) + [ ?replace-aliases ] with map ; +M: set (replace-aliases) [ members (replace-aliases) ] keep set-like ; +M: assoc (replace-aliases) + swap '[ [ _ swap ?replace-aliases ] bi@ ] assoc-map ; + +TUPLE: yaml-anchor anchor obj ; +C: yaml-anchor + +GENERIC: (replace-anchors) ( yaml-anchors obj -- obj' ) +: (get-anchor) ( yaml-anchors obj -- anchor/f ) swap objects>> at ; +: get-anchor ( yaml-anchors obj -- anchor/f ) + { [ (get-anchor) ] [ over new-objects>> at (get-anchor) ] } 2|| ; +: ?replace-anchors ( yaml-anchors obj -- obj' ) + [ (replace-anchors) ] [ get-anchor ] 2bi [ swap ] when* ; +M: object (replace-anchors) nip ; +M: byte-array (replace-anchors) nip ; +M: string (replace-anchors) nip ; + +M: sequence (replace-anchors) + [ ?replace-anchors ] with map ; +M: set (replace-anchors) [ members ?replace-anchors ] keep set-like ; +M: assoc (replace-anchors) + swap '[ [ _ swap ?replace-anchors ] bi@ ] assoc-map ; + +: replace-identities ( obj -- obj' ) + [ ] dip dupd ?replace-aliases ?replace-anchors ; + ! TODO We can also pass some data when registering the write handler, ! use this to have several buffers if it can be interrupted. ! For now, only do operations on strings that are in memory @@ -178,19 +255,26 @@ SYMBOL: yaml-write-buffer push-all drop 1 ] yaml_write_handler_t ; -GENERIC: emit-value ( emitter event obj -- ) +GENERIC: emit-value ( emitter event anchor obj -- ) +: emit-object ( emitter event obj -- ) [ f ] dip emit-value ; -:: emit-scalar ( emitter event obj -- ) - event f +:: emit-scalar ( emitter event anchor obj -- ) + event anchor obj [ yaml-tag ] [ represent-scalar ] bi -1 f f YAML_ANY_SCALAR_STYLE yaml_scalar_event_initialize yaml-assert-ok emitter event yaml_emitter_emit yaml-assert-ok ; -M: object emit-value ( emitter event obj -- ) emit-scalar ; +M: object emit-value ( emitter event anchor obj -- ) emit-scalar ; -:: emit-sequence-start ( emitter event tag -- ) - event f tag f YAML_ANY_SEQUENCE_STYLE +M: yaml-anchor emit-value ( emitter event unused obj -- ) + nip [ anchor>> ] [ obj>> ] bi emit-value ; +M:: yaml-alias emit-value ( emitter event unused obj -- ) + event obj anchor>> yaml_alias_event_initialize yaml-assert-ok + emitter event yaml_emitter_emit yaml-assert-ok ; + +:: emit-sequence-start ( emitter event anchor tag -- ) + event anchor tag f YAML_ANY_SEQUENCE_STYLE yaml_sequence_start_event_initialize yaml-assert-ok emitter event yaml_emitter_emit yaml-assert-ok ; @@ -198,29 +282,29 @@ M: object emit-value ( emitter event obj -- ) emit-scalar ; dup yaml_sequence_end_event_initialize yaml-assert-ok yaml_emitter_emit yaml-assert-ok ; -: emit-sequence ( emitter event seq -- ) - [ emit-value ] with with each ; -: emit-assoc ( emitter event assoc -- ) - >alist concat emit-sequence ; -: emit-linked-assoc ( emitter event linked-assoc -- ) - >alist [ first2 swap associate ] map emit-sequence ; -: emit-set ( emitter event set -- ) - [ members ] [ cardinality f ] bi zip concat emit-sequence ; +: emit-sequence-body ( emitter event seq -- ) + [ emit-object ] with with each ; +: emit-assoc-body ( emitter event assoc -- ) + >alist concat emit-sequence-body ; +: emit-linked-assoc-body ( emitter event linked-assoc -- ) + >alist [ first2 swap associate ] map emit-sequence-body ; +: emit-set-body ( emitter event set -- ) + [ members ] [ cardinality f ] bi zip concat emit-sequence-body ; -M: f emit-value ( emitter event seq -- ) emit-scalar ; -M: string emit-value ( emitter event seq -- ) emit-scalar ; -M: byte-array emit-value ( emitter event seq -- ) emit-scalar ; -M: sequence emit-value ( emitter event seq -- ) +M: f emit-value ( emitter event anchor f -- ) emit-scalar ; +M: string emit-value ( emitter event anchor string -- ) emit-scalar ; +M: byte-array emit-value ( emitter event anchor byte-array -- ) emit-scalar ; +M: sequence emit-value ( emitter event anchor seq -- ) [ drop YAML_SEQ_TAG emit-sequence-start ] - [ emit-sequence ] - [ drop emit-sequence-end ] 3tri ; -M: linked-assoc emit-value ( emitter event assoc -- ) + [ nip emit-sequence-body ] + [ 2drop emit-sequence-end ] 4tri ; +M: linked-assoc emit-value ( emitter event anchor assoc -- ) [ drop YAML_OMAP_TAG emit-sequence-start ] - [ emit-linked-assoc ] - [ drop emit-sequence-end ] 3tri ; + [ nip emit-linked-assoc-body ] + [ 2drop emit-sequence-end ] 4tri ; -:: emit-assoc-start ( emitter event tag -- ) - event f tag f YAML_ANY_MAPPING_STYLE +:: emit-assoc-start ( emitter event anchor tag -- ) + event anchor tag f YAML_ANY_MAPPING_STYLE yaml_mapping_start_event_initialize yaml-assert-ok emitter event yaml_emitter_emit yaml-assert-ok ; @@ -228,14 +312,14 @@ M: linked-assoc emit-value ( emitter event assoc -- ) dup yaml_mapping_end_event_initialize yaml-assert-ok yaml_emitter_emit yaml-assert-ok ; -M: assoc emit-value ( emitter event seq -- ) +M: assoc emit-value ( emitter event anchor assoc -- ) [ drop YAML_MAP_TAG emit-assoc-start ] - [ emit-assoc ] - [ drop emit-assoc-end ] 3tri ; -M: set emit-value ( emitter event set -- ) + [ nip emit-assoc-body ] + [ 2drop emit-assoc-end ] 4tri ; +M: set emit-value ( emitter event anchor set -- ) [ drop YAML_SET_TAG emit-assoc-start ] - [ emit-set ] - [ drop emit-assoc-end ] 3tri ; + [ nip emit-set-body ] + [ 2drop emit-assoc-end ] 4tri ; ! registers destructors (use with with-destructors) :: init-emitter ( -- emitter event ) @@ -259,7 +343,7 @@ M: set emit-value ( emitter event set -- ) event f f f f yaml_document_start_event_initialize yaml-assert-ok emitter event yaml_emitter_emit yaml-assert-ok - emitter event obj emit-value + emitter event obj emit-object event f yaml_document_end_event_initialize yaml-assert-ok emitter event yaml_emitter_emit yaml-assert-ok ; @@ -277,11 +361,11 @@ PRIVATE> : >yaml ( obj -- str ) [ [ init-emitter ] dip - [ emit-doc ] [ drop flush-emitter ] 3bi + [ replace-identities emit-doc ] [ drop flush-emitter ] 3bi ] with-destructors ; : >yaml-docs ( seq -- str ) [ [ init-emitter ] dip - [ [ emit-doc ] with with each ] [ drop flush-emitter ] 3bi + [ [ replace-identities emit-doc ] with with each ] [ drop flush-emitter ] 3bi ] with-destructors ;