diff options
Diffstat (limited to 'xsd/examples/cxx/tree/streaming/parser.cxx')
-rw-r--r-- | xsd/examples/cxx/tree/streaming/parser.cxx | 218 |
1 files changed, 145 insertions, 73 deletions
diff --git a/xsd/examples/cxx/tree/streaming/parser.cxx b/xsd/examples/cxx/tree/streaming/parser.cxx index b0d9df7..41ad7af 100644 --- a/xsd/examples/cxx/tree/streaming/parser.cxx +++ b/xsd/examples/cxx/tree/streaming/parser.cxx @@ -1,6 +1,4 @@ -// file : examples/cxx/tree/streaming/parser.cxx -// author : Boris Kolpackov <boris@codesynthesis.com> -// copyright : not copyrighted - public domain +#include <cassert> #include <xercesc/util/XMLUni.hpp> #include <xercesc/util/XMLString.hpp> @@ -11,10 +9,10 @@ #include <xercesc/sax2/XMLReaderFactory.hpp> #include <xercesc/dom/DOM.hpp> +#include <xercesc/dom/impl/DOMTextImpl.hpp> -#if _XERCES_VERSION >= 30000 -# include <xercesc/dom/impl/DOMTextImpl.hpp> -#endif +#include <xercesc/validators/common/Grammar.hpp> // xercesc::Grammar +#include <xercesc/framework/XMLGrammarPoolImpl.hpp> #include <xsd/cxx/auto-array.hxx> @@ -25,6 +23,7 @@ #include <xsd/cxx/tree/error-handler.hxx> #include "parser.hxx" +#include "grammar-input-stream.hxx" using namespace std; using namespace xercesc; @@ -32,16 +31,22 @@ using namespace xercesc; namespace xml = xsd::cxx::xml; namespace tree = xsd::cxx::tree; +typedef parser::document_ptr document_ptr; + class parser_impl: public DefaultHandler { public: - parser_impl (); + parser_impl (const XMLByte* grammar, size_t grammar_size); - xml::dom::auto_ptr<DOMDocument> + void start (istream& is, const string& id, bool validate); - xml::dom::auto_ptr<DOMDocument> - next (); + document_ptr + peek (); + + document_ptr + next (document_ptr doc = document_ptr (), + document_ptr outer_doc = document_ptr ()); // SAX event handlers. // @@ -59,17 +64,13 @@ private: virtual void characters (const XMLCh* const s, -#if _XERCES_VERSION >= 30000 - const XMLSize_t length -#else - const unsigned int length -#endif - ); + const XMLSize_t length); private: // SAX parser. // bool clean_; + auto_ptr<XMLGrammarPool> grammar_pool_; auto_ptr<SAX2XMLReader> parser_; XMLPScanToken token_; tree::error_handler<char> error_handler_; @@ -77,23 +78,40 @@ private: auto_ptr<xml::sax::std_input_source> isrc_; size_t depth_; + size_t whitespace_depth_; // Depth at which to ignore whitespaces. + + bool peek_; + size_t next_depth_; // Depth at which next() should work. // DOM document being built. // DOMImplementation& dom_impl_; - xml::dom::auto_ptr<DOMDocument> doc_; + document_ptr doc_; DOMElement* cur_; }; const XMLCh ls[] = {chLatin_L, chLatin_S, chNull}; parser_impl:: -parser_impl () +parser_impl (const XMLByte* grammar, size_t grammar_size) : clean_ (true), - parser_ (XMLReaderFactory::createXMLReader ()), error_proxy_ (error_handler_), dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls)) { + MemoryManager* mm (XMLPlatformUtils::fgMemoryManager); + + if (grammar != 0) + { + assert (grammar_size != 0); + grammar_pool_.reset (new XMLGrammarPoolImpl (mm)); + + grammar_input_stream is (grammar, grammar_size); + grammar_pool_->deserializeGrammars(&is); + grammar_pool_->lockPool (); + } + + parser_.reset (XMLReaderFactory::createXMLReader (mm, grammar_pool_.get ())); + parser_->setFeature (XMLUni::fgSAX2CoreNameSpaces, true); parser_->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true); parser_->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true); @@ -101,7 +119,7 @@ parser_impl () // Xerces-C++ 3.1.0 is the first version with working multi import // support. It also allows us to disable buffering in the parser - // so that the date is parsed and returned as soon as it is + // so that the data is parsed and returned as soon as it is // available. // #if _XERCES_VERSION >= 30100 @@ -115,12 +133,13 @@ parser_impl () parser_->setContentHandler (this); } -xml::dom::auto_ptr<DOMDocument> parser_impl:: +void parser_impl:: start (istream& is, const string& id, bool val) { // Reset our state. // depth_ = 0; + peek_ = false; doc_.reset (); error_handler_.reset (); @@ -134,59 +153,116 @@ start (istream& is, const string& id, bool val) parser_->setFeature (XMLUni::fgSAX2CoreValidation, val); parser_->setFeature (XMLUni::fgXercesSchema, val); - // Start parsing. The first document that we return is a "carcase" - // of the complete document. That is, the root element with all the - // attributes but without any content. - // - bool r (parser_->parseFirst (*isrc_, token_)); + if (val && grammar_pool_.get () != 0) + { + // Use the loaded grammar during parsing. + // + parser_->setFeature (XMLUni::fgXercesUseCachedGrammarInParse, true); + + // Disable loading schemas via other means (e.g., schemaLocation). + // + parser_->setFeature (XMLUni::fgXercesLoadSchema, false); + } + + parser_->parseFirst (*isrc_, token_); error_handler_.throw_if_failed<tree::parsing<char> > (); +} + +document_ptr parser_impl:: +peek () +{ + bool r (true); + + size_t d (depth_); + whitespace_depth_ = d; + + peek_ = true; - while (r && depth_ == 0) + // Parse (skip whitespace content) until the depth increases or we get + // a document. The latter test covers <element/> cases where both start + // and end events will trigger and therefore leave the depth unchanged. + // + while (r && depth_ == d && doc_.get () == 0) { r = parser_->parseNext (token_); error_handler_.throw_if_failed<tree::parsing<char> > (); } if (!r) - return xml::dom::auto_ptr<DOMDocument> (0); + return document_ptr (0); return doc_; } -xml::dom::auto_ptr<DOMDocument> parser_impl:: -next () +document_ptr parser_impl:: +next (document_ptr doc, document_ptr outer_doc) { - // We should be at depth 1. If not, then we are done parsing. + assert (peek_ == (doc.get () != 0)); + + // Install doc/outer_doc as the document we are parsing. // - if (depth_ != 1) - return xml::dom::auto_ptr<DOMDocument> (0); + if (doc.get () != 0) + { + if (outer_doc.get () != 0) + { + // Copy doc to outer_doc. + // + doc_ = outer_doc; + cur_ = static_cast<DOMElement*> ( + doc_->importNode (doc->getDocumentElement (), true)); + doc_->getDocumentElement ()->appendChild (cur_); + } + else + { + doc_ = doc; + cur_ = doc_->getDocumentElement (); + } + + // This handles the <element/> case where we get both start and + // end events in peek(). In this case the element is fully parsed + // and next() has nothing to do. + // + if (depth_ != next_depth_) + { + peek_ = false; + return doc_; + } + } bool r (true); + // If we peeked, then we have already seen the start tag and our + // return depth is one above the current depth. + // + size_t d (peek_ ? depth_ - 1 : depth_); + whitespace_depth_ = d; + + peek_ = false; + // Keep calling parseNext() until we either move to a greater depth or // get a document. This way we skip the text (presumably whitespaces) - // that may be preceding the next chunk. + // that may be preceding this chunk. // - while (r && depth_ == 1 && doc_.get () == 0) + while (r && depth_ == d && doc_.get () == 0) { parser_->parseNext (token_); error_handler_.throw_if_failed<tree::parsing<char> > (); } if (!r) - return xml::dom::auto_ptr<DOMDocument> (0); + return document_ptr (0); - // If we are not at depth 1, keep calling parseNext() until we get - // there. + // If we are not at our start depth, keep calling parseNext() until we + // get there again. // - while (r && depth_ != 1) + while (r && depth_ != d) { r = parser_->parseNext (token_); error_handler_.throw_if_failed<tree::parsing<char> > (); } if (!r) - return xml::dom::auto_ptr<DOMDocument> (0); + return document_ptr (0); return doc_; } @@ -214,18 +290,25 @@ startElement (const XMLCh* const uri, // Set attributes. // -#if _XERCES_VERSION >= 30000 for (XMLSize_t i (0), end (attr.getLength()); i < end; ++i) -#else - for (unsigned int i (0), end (attr.getLength()); i < end; ++i) -#endif { - cur_->setAttributeNS (attr.getURI (i), - attr.getQName (i), - attr.getValue (i)); + const XMLCh* qn (attr.getQName (i)); + const XMLCh* ns (attr.getURI (i)); + + // When SAX2 reports the xmlns attribute, it does not include + // the proper attribute namespace. So we have to detect and + // handle this case. + // + if (XMLString::equals (qn, XMLUni::fgXMLNSString)) + ns = XMLUni::fgXMLNSURIName; + + cur_->setAttributeNS (ns, qn, attr.getValue (i)); } depth_++; + + if (peek_) + next_depth_ = depth_; } void parser_impl:: @@ -239,38 +322,21 @@ endElement (const XMLCh* const /*uri*/, cur_ = static_cast<DOMElement*> (cur_->getParentNode ()); } -#if _XERCES_VERSION >= 30000 void parser_impl:: characters (const XMLCh* const s, const XMLSize_t length) { const XMLCh empty[] = {chNull}; - // Ignore text content (presumably whitespaces) in the root element. + // Ignore text content (presumably whitespaces) while looking for + // the next element. // - if (depth_ > 1) + if (depth_ > whitespace_depth_) { DOMText* t = doc_->createTextNode (empty); static_cast<DOMTextImpl*> (t)->appendData (s, length); cur_->appendChild (t); } } -#else -void parser_impl:: -characters (const XMLCh* const s, const unsigned int length) -{ - // Ignore text content (presumably whitespaces) in the root element. - // - if (depth_ > 1) - { - // For Xerces-C++ 2-series we have to make copy. - // - xsd::cxx::auto_array<XMLCh> tmp (new XMLCh[length + 1]); - XMLString::copyNString (tmp.get (), s, length); - cur_->appendChild (doc_->createTextNode (tmp.get ())); - } -} -#endif - // // parser @@ -282,19 +348,25 @@ parser:: } parser:: -parser () - : impl_ (new parser_impl) +parser (const XMLByte* grammar, size_t grammar_size) + : impl_ (new parser_impl (grammar, grammar_size)) { } -xml::dom::auto_ptr<DOMDocument> parser:: +void parser:: start (istream& is, const string& id, bool val) { return impl_->start (is, id, val); } -xml::dom::auto_ptr<DOMDocument> parser:: -next () +document_ptr parser:: +peek () +{ + return impl_->peek (); +} + +document_ptr parser:: +next (document_ptr doc, document_ptr outer_doc) { - return impl_->next (); + return impl_->next (doc, outer_doc); } |