summaryrefslogtreecommitdiff
path: root/xsd/examples/cxx/tree/streaming/parser.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'xsd/examples/cxx/tree/streaming/parser.cxx')
-rw-r--r--xsd/examples/cxx/tree/streaming/parser.cxx218
1 files changed, 145 insertions, 73 deletions
diff --git a/xsd/examples/cxx/tree/streaming/parser.cxx b/xsd/examples/cxx/tree/streaming/parser.cxx
index b0d9df7..41ad7af 100644
--- a/xsd/examples/cxx/tree/streaming/parser.cxx
+++ b/xsd/examples/cxx/tree/streaming/parser.cxx
@@ -1,6 +1,4 @@
-// file : examples/cxx/tree/streaming/parser.cxx
-// author : Boris Kolpackov <boris@codesynthesis.com>
-// copyright : not copyrighted - public domain
+#include <cassert>
#include <xercesc/util/XMLUni.hpp>
#include <xercesc/util/XMLString.hpp>
@@ -11,10 +9,10 @@
#include <xercesc/sax2/XMLReaderFactory.hpp>
#include <xercesc/dom/DOM.hpp>
+#include <xercesc/dom/impl/DOMTextImpl.hpp>
-#if _XERCES_VERSION >= 30000
-# include <xercesc/dom/impl/DOMTextImpl.hpp>
-#endif
+#include <xercesc/validators/common/Grammar.hpp> // xercesc::Grammar
+#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
#include <xsd/cxx/auto-array.hxx>
@@ -25,6 +23,7 @@
#include <xsd/cxx/tree/error-handler.hxx>
#include "parser.hxx"
+#include "grammar-input-stream.hxx"
using namespace std;
using namespace xercesc;
@@ -32,16 +31,22 @@ using namespace xercesc;
namespace xml = xsd::cxx::xml;
namespace tree = xsd::cxx::tree;
+typedef parser::document_ptr document_ptr;
+
class parser_impl: public DefaultHandler
{
public:
- parser_impl ();
+ parser_impl (const XMLByte* grammar, size_t grammar_size);
- xml::dom::auto_ptr<DOMDocument>
+ void
start (istream& is, const string& id, bool validate);
- xml::dom::auto_ptr<DOMDocument>
- next ();
+ document_ptr
+ peek ();
+
+ document_ptr
+ next (document_ptr doc = document_ptr (),
+ document_ptr outer_doc = document_ptr ());
// SAX event handlers.
//
@@ -59,17 +64,13 @@ private:
virtual void
characters (const XMLCh* const s,
-#if _XERCES_VERSION >= 30000
- const XMLSize_t length
-#else
- const unsigned int length
-#endif
- );
+ const XMLSize_t length);
private:
// SAX parser.
//
bool clean_;
+ auto_ptr<XMLGrammarPool> grammar_pool_;
auto_ptr<SAX2XMLReader> parser_;
XMLPScanToken token_;
tree::error_handler<char> error_handler_;
@@ -77,23 +78,40 @@ private:
auto_ptr<xml::sax::std_input_source> isrc_;
size_t depth_;
+ size_t whitespace_depth_; // Depth at which to ignore whitespaces.
+
+ bool peek_;
+ size_t next_depth_; // Depth at which next() should work.
// DOM document being built.
//
DOMImplementation& dom_impl_;
- xml::dom::auto_ptr<DOMDocument> doc_;
+ document_ptr doc_;
DOMElement* cur_;
};
const XMLCh ls[] = {chLatin_L, chLatin_S, chNull};
parser_impl::
-parser_impl ()
+parser_impl (const XMLByte* grammar, size_t grammar_size)
: clean_ (true),
- parser_ (XMLReaderFactory::createXMLReader ()),
error_proxy_ (error_handler_),
dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls))
{
+ MemoryManager* mm (XMLPlatformUtils::fgMemoryManager);
+
+ if (grammar != 0)
+ {
+ assert (grammar_size != 0);
+ grammar_pool_.reset (new XMLGrammarPoolImpl (mm));
+
+ grammar_input_stream is (grammar, grammar_size);
+ grammar_pool_->deserializeGrammars(&is);
+ grammar_pool_->lockPool ();
+ }
+
+ parser_.reset (XMLReaderFactory::createXMLReader (mm, grammar_pool_.get ()));
+
parser_->setFeature (XMLUni::fgSAX2CoreNameSpaces, true);
parser_->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true);
parser_->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true);
@@ -101,7 +119,7 @@ parser_impl ()
// Xerces-C++ 3.1.0 is the first version with working multi import
// support. It also allows us to disable buffering in the parser
- // so that the date is parsed and returned as soon as it is
+ // so that the data is parsed and returned as soon as it is
// available.
//
#if _XERCES_VERSION >= 30100
@@ -115,12 +133,13 @@ parser_impl ()
parser_->setContentHandler (this);
}
-xml::dom::auto_ptr<DOMDocument> parser_impl::
+void parser_impl::
start (istream& is, const string& id, bool val)
{
// Reset our state.
//
depth_ = 0;
+ peek_ = false;
doc_.reset ();
error_handler_.reset ();
@@ -134,59 +153,116 @@ start (istream& is, const string& id, bool val)
parser_->setFeature (XMLUni::fgSAX2CoreValidation, val);
parser_->setFeature (XMLUni::fgXercesSchema, val);
- // Start parsing. The first document that we return is a "carcase"
- // of the complete document. That is, the root element with all the
- // attributes but without any content.
- //
- bool r (parser_->parseFirst (*isrc_, token_));
+ if (val && grammar_pool_.get () != 0)
+ {
+ // Use the loaded grammar during parsing.
+ //
+ parser_->setFeature (XMLUni::fgXercesUseCachedGrammarInParse, true);
+
+ // Disable loading schemas via other means (e.g., schemaLocation).
+ //
+ parser_->setFeature (XMLUni::fgXercesLoadSchema, false);
+ }
+
+ parser_->parseFirst (*isrc_, token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
+}
+
+document_ptr parser_impl::
+peek ()
+{
+ bool r (true);
+
+ size_t d (depth_);
+ whitespace_depth_ = d;
+
+ peek_ = true;
- while (r && depth_ == 0)
+ // Parse (skip whitespace content) until the depth increases or we get
+ // a document. The latter test covers <element/> cases where both start
+ // and end events will trigger and therefore leave the depth unchanged.
+ //
+ while (r && depth_ == d && doc_.get () == 0)
{
r = parser_->parseNext (token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
}
if (!r)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ return document_ptr (0);
return doc_;
}
-xml::dom::auto_ptr<DOMDocument> parser_impl::
-next ()
+document_ptr parser_impl::
+next (document_ptr doc, document_ptr outer_doc)
{
- // We should be at depth 1. If not, then we are done parsing.
+ assert (peek_ == (doc.get () != 0));
+
+ // Install doc/outer_doc as the document we are parsing.
//
- if (depth_ != 1)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ if (doc.get () != 0)
+ {
+ if (outer_doc.get () != 0)
+ {
+ // Copy doc to outer_doc.
+ //
+ doc_ = outer_doc;
+ cur_ = static_cast<DOMElement*> (
+ doc_->importNode (doc->getDocumentElement (), true));
+ doc_->getDocumentElement ()->appendChild (cur_);
+ }
+ else
+ {
+ doc_ = doc;
+ cur_ = doc_->getDocumentElement ();
+ }
+
+ // This handles the <element/> case where we get both start and
+ // end events in peek(). In this case the element is fully parsed
+ // and next() has nothing to do.
+ //
+ if (depth_ != next_depth_)
+ {
+ peek_ = false;
+ return doc_;
+ }
+ }
bool r (true);
+ // If we peeked, then we have already seen the start tag and our
+ // return depth is one above the current depth.
+ //
+ size_t d (peek_ ? depth_ - 1 : depth_);
+ whitespace_depth_ = d;
+
+ peek_ = false;
+
// Keep calling parseNext() until we either move to a greater depth or
// get a document. This way we skip the text (presumably whitespaces)
- // that may be preceding the next chunk.
+ // that may be preceding this chunk.
//
- while (r && depth_ == 1 && doc_.get () == 0)
+ while (r && depth_ == d && doc_.get () == 0)
{
parser_->parseNext (token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
}
if (!r)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ return document_ptr (0);
- // If we are not at depth 1, keep calling parseNext() until we get
- // there.
+ // If we are not at our start depth, keep calling parseNext() until we
+ // get there again.
//
- while (r && depth_ != 1)
+ while (r && depth_ != d)
{
r = parser_->parseNext (token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
}
if (!r)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ return document_ptr (0);
return doc_;
}
@@ -214,18 +290,25 @@ startElement (const XMLCh* const uri,
// Set attributes.
//
-#if _XERCES_VERSION >= 30000
for (XMLSize_t i (0), end (attr.getLength()); i < end; ++i)
-#else
- for (unsigned int i (0), end (attr.getLength()); i < end; ++i)
-#endif
{
- cur_->setAttributeNS (attr.getURI (i),
- attr.getQName (i),
- attr.getValue (i));
+ const XMLCh* qn (attr.getQName (i));
+ const XMLCh* ns (attr.getURI (i));
+
+ // When SAX2 reports the xmlns attribute, it does not include
+ // the proper attribute namespace. So we have to detect and
+ // handle this case.
+ //
+ if (XMLString::equals (qn, XMLUni::fgXMLNSString))
+ ns = XMLUni::fgXMLNSURIName;
+
+ cur_->setAttributeNS (ns, qn, attr.getValue (i));
}
depth_++;
+
+ if (peek_)
+ next_depth_ = depth_;
}
void parser_impl::
@@ -239,38 +322,21 @@ endElement (const XMLCh* const /*uri*/,
cur_ = static_cast<DOMElement*> (cur_->getParentNode ());
}
-#if _XERCES_VERSION >= 30000
void parser_impl::
characters (const XMLCh* const s, const XMLSize_t length)
{
const XMLCh empty[] = {chNull};
- // Ignore text content (presumably whitespaces) in the root element.
+ // Ignore text content (presumably whitespaces) while looking for
+ // the next element.
//
- if (depth_ > 1)
+ if (depth_ > whitespace_depth_)
{
DOMText* t = doc_->createTextNode (empty);
static_cast<DOMTextImpl*> (t)->appendData (s, length);
cur_->appendChild (t);
}
}
-#else
-void parser_impl::
-characters (const XMLCh* const s, const unsigned int length)
-{
- // Ignore text content (presumably whitespaces) in the root element.
- //
- if (depth_ > 1)
- {
- // For Xerces-C++ 2-series we have to make copy.
- //
- xsd::cxx::auto_array<XMLCh> tmp (new XMLCh[length + 1]);
- XMLString::copyNString (tmp.get (), s, length);
- cur_->appendChild (doc_->createTextNode (tmp.get ()));
- }
-}
-#endif
-
//
// parser
@@ -282,19 +348,25 @@ parser::
}
parser::
-parser ()
- : impl_ (new parser_impl)
+parser (const XMLByte* grammar, size_t grammar_size)
+ : impl_ (new parser_impl (grammar, grammar_size))
{
}
-xml::dom::auto_ptr<DOMDocument> parser::
+void parser::
start (istream& is, const string& id, bool val)
{
return impl_->start (is, id, val);
}
-xml::dom::auto_ptr<DOMDocument> parser::
-next ()
+document_ptr parser::
+peek ()
+{
+ return impl_->peek ();
+}
+
+document_ptr parser::
+next (document_ptr doc, document_ptr outer_doc)
{
- return impl_->next ();
+ return impl_->next (doc, outer_doc);
}