1 files changed, 145 insertions, 73 deletions
diff --git a/xsd/examples/cxx/tree/streaming/parser.cxx b/xsd/examples/cxx/tree/streaming/parser.cxx
index b0d9df7..41ad7af 100644
--- a/xsd/examples/cxx/tree/streaming/parser.cxx
+++ b/xsd/examples/cxx/tree/streaming/parser.cxx
@@ -1,6 +1,4 @@
-// file      : examples/cxx/tree/streaming/parser.cxx
-// author    : Boris Kolpackov <boris@codesynthesis.com>
-// copyright : not copyrighted - public domain
+#include <cassert>
 
 #include <xercesc/util/XMLUni.hpp>
 #include <xercesc/util/XMLString.hpp>
@@ -11,10 +9,10 @@
 #include <xercesc/sax2/XMLReaderFactory.hpp>
 
 #include <xercesc/dom/DOM.hpp>
+#include <xercesc/dom/impl/DOMTextImpl.hpp>
 
-#if _XERCES_VERSION >= 30000
-#  include <xercesc/dom/impl/DOMTextImpl.hpp>
-#endif
+#include <xercesc/validators/common/Grammar.hpp> // xercesc::Grammar
+#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
 
 #include <xsd/cxx/auto-array.hxx>
 
@@ -25,6 +23,7 @@
 #include <xsd/cxx/tree/error-handler.hxx>
 
 #include "parser.hxx"
+#include "grammar-input-stream.hxx"
 
 using namespace std;
 using namespace xercesc;
@@ -32,16 +31,22 @@ using namespace xercesc;
 namespace xml = xsd::cxx::xml;
 namespace tree = xsd::cxx::tree;
 
+typedef parser::document_ptr document_ptr;
+
 class parser_impl: public DefaultHandler
 {
 public:
-  parser_impl ();
+  parser_impl (const XMLByte* grammar, size_t grammar_size);
 
-  xml::dom::auto_ptr<DOMDocument>
+  void
   start (istream& is, const string& id, bool validate);
 
-  xml::dom::auto_ptr<DOMDocument>
-  next ();
+  document_ptr
+  peek ();
+
+  document_ptr
+  next (document_ptr doc = document_ptr (),
+        document_ptr outer_doc = document_ptr ());
 
   // SAX event handlers.
   //
@@ -59,17 +64,13 @@ private:
 
   virtual void
   characters (const XMLCh* const s,
-#if _XERCES_VERSION >= 30000
-              const XMLSize_t length
-#else
-              const unsigned int length
-#endif
-  );
+              const XMLSize_t length);
 
 private:
   // SAX parser.
   //
   bool clean_;
+  auto_ptr<XMLGrammarPool> grammar_pool_;
   auto_ptr<SAX2XMLReader> parser_;
   XMLPScanToken token_;
   tree::error_handler<char> error_handler_;
@@ -77,23 +78,40 @@ private:
   auto_ptr<xml::sax::std_input_source> isrc_;
 
   size_t depth_;
+  size_t whitespace_depth_; // Depth at which to ignore whitespaces.
+
+  bool peek_;
+  size_t next_depth_; // Depth at which next() should work.
 
   // DOM document being built.
   //
   DOMImplementation& dom_impl_;
-  xml::dom::auto_ptr<DOMDocument> doc_;
+  document_ptr doc_;
   DOMElement* cur_;
 };
 
 const XMLCh ls[] = {chLatin_L, chLatin_S, chNull};
 
 parser_impl::
-parser_impl ()
+parser_impl (const XMLByte* grammar, size_t grammar_size)
     : clean_ (true),
-      parser_ (XMLReaderFactory::createXMLReader ()),
       error_proxy_ (error_handler_),
       dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls))
 {
+  MemoryManager* mm (XMLPlatformUtils::fgMemoryManager);
+
+  if (grammar != 0)
+  {
+    assert (grammar_size != 0);
+    grammar_pool_.reset (new XMLGrammarPoolImpl (mm));
+
+    grammar_input_stream is (grammar, grammar_size);
+    grammar_pool_->deserializeGrammars(&is);
+    grammar_pool_->lockPool ();
+  }
+
+  parser_.reset (XMLReaderFactory::createXMLReader (mm, grammar_pool_.get ()));
+
   parser_->setFeature (XMLUni::fgSAX2CoreNameSpaces, true);
   parser_->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true);
   parser_->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true);
@@ -101,7 +119,7 @@ parser_impl ()
 
   // Xerces-C++ 3.1.0 is the first version with working multi import
   // support. It also allows us to disable buffering in the parser
-  // so that the date is parsed and returned as soon as it is
+  // so that the data is parsed and returned as soon as it is
   // available.
   //
 #if _XERCES_VERSION >= 30100
@@ -115,12 +133,13 @@ parser_impl ()
   parser_->setContentHandler (this);
 }
 
-xml::dom::auto_ptr<DOMDocument> parser_impl::
+void parser_impl::
 start (istream& is, const string& id, bool val)
 {
   // Reset our state.
   //
   depth_ = 0;
+  peek_ = false;
   doc_.reset ();
   error_handler_.reset ();
 
@@ -134,59 +153,116 @@ start (istream& is, const string& id, bool val)
   parser_->setFeature (XMLUni::fgSAX2CoreValidation, val);
   parser_->setFeature (XMLUni::fgXercesSchema, val);
 
-  // Start parsing. The first document that we return is a "carcase"
-  // of the complete document. That is, the root element with all the
-  // attributes but without any content.
-  //
-  bool r (parser_->parseFirst (*isrc_, token_));
+  if (val && grammar_pool_.get () != 0)
+  {
+    // Use the loaded grammar during parsing.
+    //
+    parser_->setFeature (XMLUni::fgXercesUseCachedGrammarInParse, true);
+
+    // Disable loading schemas via other means (e.g., schemaLocation).
+    //
+    parser_->setFeature (XMLUni::fgXercesLoadSchema, false);
+  }
+
+  parser_->parseFirst (*isrc_, token_);
   error_handler_.throw_if_failed<tree::parsing<char> > ();
+}
+
+document_ptr parser_impl::
+peek ()
+{
+  bool r (true);
+
+  size_t d (depth_);
+  whitespace_depth_ = d;
+
+  peek_ = true;
 
-  while (r && depth_ == 0)
+  // Parse (skip whitespace content) until the depth increases or we get
+  // a document. The latter test covers <element/> cases where both start
+  // and end events will trigger and therefore leave the depth unchanged.
+  //
+  while (r && depth_ == d && doc_.get () == 0)
   {
     r = parser_->parseNext (token_);
     error_handler_.throw_if_failed<tree::parsing<char> > ();
   }
 
   if (!r)
-    return xml::dom::auto_ptr<DOMDocument> (0);
+    return document_ptr (0);
 
   return doc_;
 }
 
-xml::dom::auto_ptr<DOMDocument> parser_impl::
-next ()
+document_ptr parser_impl::
+next (document_ptr doc, document_ptr outer_doc)
 {
-  // We should be at depth 1. If not, then we are done parsing.
+  assert (peek_ == (doc.get () != 0));
+
+  // Install doc/outer_doc as the document we are parsing.
   //
-  if (depth_ != 1)
-    return xml::dom::auto_ptr<DOMDocument> (0);
+  if (doc.get () != 0)
+  {
+    if (outer_doc.get () != 0)
+    {
+      // Copy doc to outer_doc.
+      //
+      doc_ = outer_doc;
+      cur_ = static_cast<DOMElement*> (
+        doc_->importNode (doc->getDocumentElement (), true));
+      doc_->getDocumentElement ()->appendChild (cur_);
+    }
+    else
+    {
+      doc_ = doc;
+      cur_ = doc_->getDocumentElement ();
+    }
+
+    // This handles the <element/> case where we get both start and
+    // end events in peek(). In this case the element is fully parsed
+    // and next() has nothing to do.
+    //
+    if (depth_ != next_depth_)
+    {
+      peek_ = false;
+      return doc_;
+    }
+  }
 
   bool r (true);
 
+  // If we peeked, then we have already seen the start tag and our
+  // return depth is one above the current depth.
+  //
+  size_t d (peek_ ? depth_ - 1 : depth_);
+  whitespace_depth_ = d;
+
+  peek_ = false;
+
   // Keep calling parseNext() until we either move to a greater depth or
   // get a document. This way we skip the text (presumably whitespaces)
-  // that may be preceding the next chunk.
+  // that may be preceding this chunk.
   //
-  while (r && depth_ == 1 && doc_.get () == 0)
+  while (r && depth_ == d && doc_.get () == 0)
   {
     parser_->parseNext (token_);
     error_handler_.throw_if_failed<tree::parsing<char> > ();
   }
 
   if (!r)
-    return xml::dom::auto_ptr<DOMDocument> (0);
+    return document_ptr (0);
 
-  // If we are not at depth 1, keep calling parseNext() until we get
-  // there.
+  // If we are not at our start depth, keep calling parseNext() until we
+  // get there again.
   //
-  while (r && depth_ != 1)
+  while (r && depth_ != d)
   {
     r = parser_->parseNext (token_);
     error_handler_.throw_if_failed<tree::parsing<char> > ();
   }
 
   if (!r)
-    return xml::dom::auto_ptr<DOMDocument> (0);
+    return document_ptr (0);
 
   return doc_;
 }
@@ -214,18 +290,25 @@ startElement (const XMLCh* const uri,
 
   // Set attributes.
   //
-#if _XERCES_VERSION >= 30000
   for (XMLSize_t i (0), end (attr.getLength()); i < end; ++i)
-#else
-  for (unsigned int i (0), end (attr.getLength()); i < end; ++i)
-#endif
   {
-    cur_->setAttributeNS (attr.getURI (i),
-                          attr.getQName (i),
-                          attr.getValue (i));
+    const XMLCh* qn (attr.getQName (i));
+    const XMLCh* ns (attr.getURI (i));
+
+    // When SAX2 reports the xmlns attribute, it does not include
+    // the proper attribute namespace. So we have to detect and
+    // handle this case.
+    //
+    if (XMLString::equals (qn, XMLUni::fgXMLNSString))
+      ns = XMLUni::fgXMLNSURIName;
+
+    cur_->setAttributeNS (ns, qn, attr.getValue (i));
   }
 
   depth_++;
+
+  if (peek_)
+    next_depth_ = depth_;
 }
 
 void parser_impl::
@@ -239,38 +322,21 @@ endElement (const XMLCh* const /*uri*/,
     cur_ = static_cast<DOMElement*> (cur_->getParentNode ());
 }
 
-#if _XERCES_VERSION >= 30000
 void parser_impl::
 characters (const XMLCh* const s, const XMLSize_t length)
 {
   const XMLCh empty[] = {chNull};
 
-  // Ignore text content (presumably whitespaces) in the root element.
+  // Ignore text content (presumably whitespaces) while looking for
+  // the next element.
   //
-  if (depth_ > 1)
+  if (depth_ > whitespace_depth_)
   {
     DOMText* t = doc_->createTextNode (empty);
     static_cast<DOMTextImpl*> (t)->appendData (s, length);
     cur_->appendChild (t);
   }
 }
-#else
-void parser_impl::
-characters (const XMLCh* const s, const unsigned int length)
-{
-  // Ignore text content (presumably whitespaces) in the root element.
-  //
-  if (depth_ > 1)
-  {
-    // For Xerces-C++ 2-series we have to make copy.
-    //
-    xsd::cxx::auto_array<XMLCh> tmp (new XMLCh[length + 1]);
-    XMLString::copyNString (tmp.get (), s, length);
-    cur_->appendChild (doc_->createTextNode (tmp.get ()));
-  }
-}
-#endif
-
 
 //
 // parser
@@ -282,19 +348,25 @@ parser::
 }
 
 parser::
-parser ()
-    : impl_ (new parser_impl)
+parser (const XMLByte* grammar, size_t grammar_size)
+    : impl_ (new parser_impl (grammar, grammar_size))
 {
 }
 
-xml::dom::auto_ptr<DOMDocument> parser::
+void parser::
 start (istream& is, const string& id, bool val)
 {
   return impl_->start (is, id, val);
 }
 
-xml::dom::auto_ptr<DOMDocument> parser::
-next ()
+document_ptr parser::
+peek ()
+{
+  return impl_->peek ();
+}
+
+document_ptr parser::
+next (document_ptr doc, document_ptr outer_doc)
 {
-  return impl_->next ();
+  return impl_->next (doc, outer_doc);
 }