44 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			HTML
		
	
	
		
		
			
		
	
	
			44 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			HTML
		
	
	
| 
								 | 
							
								<HTML>
							 | 
						||
| 
								 | 
							
								<TITLE>Canonical XML</TITLE>
							 | 
						||
| 
								 | 
							
								<BODY>
							 | 
						||
| 
								 | 
							
								<H1>Canonical XML</H1>
							 | 
						||
| 
								 | 
							
								<P>
							 | 
						||
| 
								 | 
							
								This document defines a subset of XML called canonical XML.
							 | 
						||
| 
								 | 
							
								The intended use of canonical XML is in testing XML processors,
							 | 
						||
| 
								 | 
							
								as a representation of the result of parsing an XML document.
							 | 
						||
| 
								 | 
							
								<P>
							 | 
						||
| 
								 | 
							
								Every well-formed XML document has a unique structurally equivalent
							 | 
						||
| 
								 | 
							
								canonical XML document.  Two structurally equivalent XML
							 | 
						||
| 
								 | 
							
								documents have a byte-for-byte identical canonical XML document.
							 | 
						||
| 
								 | 
							
								Canonicalizing an XML document requires only information that an XML
							 | 
						||
| 
								 | 
							
								processor is required to make available to an application.
							 | 
						||
| 
								 | 
							
								<P>
							 | 
						||
| 
								 | 
							
								A canonical XML document conforms to the following grammar:
							 | 
						||
| 
								 | 
							
								<PRE>
							 | 
						||
| 
								 | 
							
								CanonXML    ::= Pi* element Pi*
							 | 
						||
| 
								 | 
							
								element     ::= Stag (Datachar | Pi | element)* Etag
							 | 
						||
| 
								 | 
							
								Stag        ::= '<'  Name Atts '>'
							 | 
						||
| 
								 | 
							
								Etag        ::= '</' Name '>'
							 | 
						||
| 
								 | 
							
								Pi          ::= '<?' Name ' ' (((Char - S) Char*)? - (Char* '?>' Char*)) '?>'
							 | 
						||
| 
								 | 
							
								Atts        ::= (' ' Name '=' '"' Datachar* '"')*
							 | 
						||
| 
								 | 
							
								Datachar    ::= '&amp;' | '&lt;' | '&gt;' | '&quot;'
							 | 
						||
| 
								 | 
							
								                 | '&#9;'| '&#10;'| '&#13;'
							 | 
						||
| 
								 | 
							
								                 | (Char - ('&' | '<' | '>' | '"' | #x9 | #xA | #xD))
							 | 
						||
| 
								 | 
							
								Name        ::= (see XML spec)
							 | 
						||
| 
								 | 
							
								Char        ::= (see XML spec)
							 | 
						||
| 
								 | 
							
								S           ::= (see XML spec)
							 | 
						||
| 
								 | 
							
								</PRE>
							 | 
						||
| 
								 | 
							
								<P>
							 | 
						||
| 
								 | 
							
								Attributes are in lexicographical order (in Unicode bit order).
							 | 
						||
| 
								 | 
							
								<P>
							 | 
						||
| 
								 | 
							
								A canonical XML document is encoded in UTF-8.
							 | 
						||
| 
								 | 
							
								<P>
							 | 
						||
| 
								 | 
							
								Ignorable white space is considered significant and is treated equivalently
							 | 
						||
| 
								 | 
							
								to data.
							 | 
						||
| 
								 | 
							
								<P>
							 | 
						||
| 
								 | 
							
								<ADDRESS>
							 | 
						||
| 
								 | 
							
								<A HREF="mailto:jjc@jclark.com">James Clark</A>
							 | 
						||
| 
								 | 
							
								</ADDRESS>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								</BODY>
							 | 
						||
| 
								 | 
							
								</HTML>
							 |