From a15cf65c44d5c224169c32ef5495b68c758134b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 18 May 2014 16:08:14 +0200 Subject: Imported Upstream version 3.3.0.2 --- libxsd-frontend/xsd-frontend/parser.cxx | 5126 +++++++++++++++++++++++++++++++ 1 file changed, 5126 insertions(+) create mode 100644 libxsd-frontend/xsd-frontend/parser.cxx (limited to 'libxsd-frontend/xsd-frontend/parser.cxx') diff --git a/libxsd-frontend/xsd-frontend/parser.cxx b/libxsd-frontend/xsd-frontend/parser.cxx new file mode 100644 index 0000000..901a8d3 --- /dev/null +++ b/libxsd-frontend/xsd-frontend/parser.cxx @@ -0,0 +1,5126 @@ +// file : xsd-frontend/parser.cxx +// author : Boris Kolpackov +// copyright : Copyright (c) 2005-2010 Code Synthesis Tools CC +// license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +//@@ Do i need this? +// +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include // std::auto_ptr + +using std::wcout; +using std::wcerr; +using std::endl; + +using Cult::RTTI::TypeId; + +namespace XSDFrontend +{ + namespace Xerces = XML::Xerces; + using namespace SemanticGraph; + + //@@ Port to tracing facility. + // + Boolean trace_ = false; + + String const xsd = L"http://www.w3.org/2001/XMLSchema"; + String const xse = L"http://www.codesynthesis.com/xmlns/xml-schema-extension"; + + namespace + { + // + // Exceptions. + // + + struct NotNamespace + { + NotNamespace (String const& ns) + : ns_ (ns) + { + } + + String const& + ns () const + { + return ns_; + } + + private: + String ns_; + }; + + struct NotName + { + NotName (String const& ns, String const& name) + : ns_ (ns), name_ (name) + { + } + + String const& + ns () const + { + return ns_; + } + + String const& + name () const + { + return name_; + } + + private: + String ns_; + String name_; + }; + + + // Name cache. We only support maximum two nodes with the same + // name in the cache (e.g., element and type). For (rare) cases + // where there is three or more names, there will be a cache miss. + // + struct CacheNodes + { + CacheNodes () : first (0), second (0) {} + + Nameable* first; + Nameable* second; + }; + + typedef Cult::Containers::Map NodeMap; + typedef Cult::Containers::Map NamespaceMap; + typedef Cult::Containers::Vector DefaultValues; + + template + X& + resolve (String const& ns_name, + String const& uq_name, + Schema& s_, + NamespaceMap& cache) + { + // First check the cache. + // + NamespaceMap::Iterator i (cache.find (ns_name)); + + if (i != cache.end ()) + { + NodeMap::Iterator j (i->second.find (uq_name)); + + if (j != i->second.end ()) + { + X* x; + + if ((x = dynamic_cast (j->second.first)) || + (x = dynamic_cast (j->second.second))) + return *x; + } + } + + Scope::NamesIteratorPair nss (s_.find (ns_name)); + + if (nss.first == nss.second) + throw NotNamespace (ns_name); + + for (; nss.first != nss.second; ++nss.first) + { + Namespace& ns (dynamic_cast (nss.first->named ())); + + Scope::NamesIteratorPair types (ns.find (uq_name)); + + for (; types.first != types.second; ++types.first) + { + if (X* x = dynamic_cast (&types.first->named ())) + { + if (trace_) + wcout << "successfully resolved '" << ns_name << '#' << uq_name + << "'" << endl; + + // Add to the cache if there are free slots. + // + NodeMap& m (i != cache.end () ? i->second : cache[ns_name]); + CacheNodes& n (m[uq_name]); + + if (n.first == 0) + n.first = x; + else if (n.second == 0) + n.second = x; + + return *x; + } + } + } + + throw NotName (ns_name, uq_name); + } + + // + // + typedef Cult::Containers::Map Facets; + + Void + copy_facets (Restricts& r, Facets const& f) + { + for (Facets::ConstIterator i (f.begin ()), e (f.end ()); i != e; ++i) + r.facet_insert (i->first, i->second); + } + + // + // + struct UnionMemberType + { + UnionMemberType (String const& ns, String const& uq) + : ns_name (ns), uq_name (uq) + { + } + + String ns_name; + String uq_name; + }; + + typedef Cult::Containers::Vector UnionMemberTypes; + + // + // + struct ElementGroupRef + { + ElementGroupRef (String const& uq_name_, String const& ns_name_, + UnsignedLong min_, UnsignedLong max_, + Compositor& compositor, Scope& scope) + : uq_name (uq_name_), ns_name (ns_name_), + min (min_), max (max_) + { + contains_pos = compositor.contains_end (); + if (compositor.contains_begin () != contains_pos) + --contains_pos; + + names_pos = scope.names_end (); + if (scope.names_begin () != names_pos) + --names_pos; + } + + ElementGroupRef (String const& uq_name_, String const& ns_name_, + UnsignedLong min_, UnsignedLong max_, + Scope& scope) + : uq_name (uq_name_), ns_name (ns_name_), + min (min_), max (max_) + { + names_pos = scope.names_end (); + if (scope.names_begin () != names_pos) + --names_pos; + } + + String uq_name; + String ns_name; + UnsignedLong min, max; + Compositor::ContainsIterator contains_pos; + Scope::NamesIterator names_pos; + }; + + typedef Cult::Containers::Vector ElementGroupRefs; + + + // + // + struct AttributeGroupRef + { + AttributeGroupRef (String const& uq_name_, + String const& ns_name_, + Scope& scope) + : uq_name (uq_name_), ns_name (ns_name_) + { + names_pos = scope.names_end (); + if (scope.names_begin () != names_pos) + --names_pos; + } + + String uq_name; + String ns_name; + Scope::NamesIterator names_pos; + }; + + typedef Cult::Containers::Vector AttributeGroupRefs; + + + // + // + template + struct NodeArgs + { + NodeArgs (N& node, A arg) + : node_ (node), arg_ (arg) + { + } + + operator N& () const + { + return node_; + } + + template + Void + add_edge_left (E& e) + { + node_.add_edge_left (e, arg_); + } + + template + Void + add_edge_right (E& e) + { + node_.add_edge_right (e, arg_); + } + + private: + N& node_; + A arg_; + }; + + + // + // + struct Resolver : Traversal::Element, + Traversal::Attribute, + Traversal::Fundamental::IdRef, + Traversal::Fundamental::IdRefs, + Traversal::List, + Traversal::Union, + Traversal::Complex, + Traversal::Enumeration, + Traversal::ElementGroup, + Traversal::AttributeGroup, + Traversal::Compositor + { + Resolver (Schema& s, + Boolean& valid, + NamespaceMap& cache, + DefaultValues& default_values) + : s_ (s), + valid_ (valid), + cache_ (cache), + default_values_ (default_values) + { + *this >> contains_compositor >> *this; + } + + Void + traverse (SemanticGraph::Attribute& a) + { + // Avoid traversing attribute more than once. + // + if (!a.context ().count ("attribute-traversed")) + { + a.context ().set ("attribute-traversed", true); + SemanticGraph::Member& m (a); + resolve_member (m); + } + } + + Void + traverse (SemanticGraph::Element& e) + { + resolve_element (e); + } + + Void + resolve_element (SemanticGraph::Element& e) + { + // Avoid resolving element more than once. + // + if (e.context ().count ("element-resolved")) + return; + + e.context ().set ("element-resolved", true); + + { + SemanticGraph::Member& m (e); + resolve_member (m); + } + + if (e.context ().count ("substitution-ns-name")) + { + String ns_name (e.context ().get ("substitution-ns-name")); + String uq_name (e.context ().get ("substitution-uq-name")); + + e.context ().remove ("substitution-ns-name"); + e.context ().remove ("substitution-uq-name"); + + try + { + SemanticGraph::Element& root ( + resolve (ns_name, uq_name, s_, cache_)); + + s_.new_edge (e, root); + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" < ("type-ns-name"); + uq_name = m.context ().get ("type-uq-name"); + + m.context ().remove ("type-ns-name"); + m.context ().remove ("type-uq-name"); + m.context ().remove ("edge-type-id"); + + s_.new_edge ( + m, resolve (ns_name, uq_name, s_, cache_)); + } + else if (m.context ().count ("instance-ns-name")) + { + ns_name = m.context ().get ("instance-ns-name"); + uq_name = m.context ().get ("instance-uq-name"); + + m.context ().remove ("instance-ns-name"); + m.context ().remove ("instance-uq-name"); + + + Member& ref (resolve (ns_name, uq_name, s_, cache_)); + + // Make sure the referenced member is fully resolved. + // @@ Substitutes edge won't be resolved. + // + resolve_member (ref); + + + // Substitution group info. We have to test for both resolved + // and unresolved cases since we don't know whether it was + // resolved or not. + // + if (ref.is_a ()) + { + Element& m_e (dynamic_cast (m)); + Element& ref_e (dynamic_cast (ref)); + + if (ref_e.substitutes_p ()) + { + s_.new_edge (m_e, ref_e.substitutes ().root ()); + } + else if (ref_e.context ().count ("substitution-ns-name")) + { + m_e.context ().set ( + "substitution-ns-name", + ref_e.context ().get ("substitution-ns-name")); + + m_e.context ().set ( + "substitution-uq-name", + ref_e.context ().get ("substitution-uq-name")); + } + } + + // + // + s_.new_edge (m, ref.namespace_ ()); + + // Transfer default and fixed values if we haven't already + // gotten them. + // + if (!m.default_p ()) + { + if (ref.fixed_p ()) + m.fixed (ref.value ()); + else if (ref.default_p ()) + { + // Default value applies only if the attribute is optional. + // + if (Attribute* a = dynamic_cast (&m)) + { + if (a->optional_p ()) + m.default_ (ref.value ()); + } + else + m.default_ (ref.value ()); + } + + if (m.default_p ()) + { + m.context ().set ( + "dom-node", + ref.context ().get ("dom-node")); + default_values_.push_back (&m); + } + } + + // Transfer annotation if we haven't already gotten it. + // + if (!m.annotated_p () && ref.annotated_p ()) + s_.new_edge (ref.annotation (), m); + + // Type info. + // + if (ref.typed_p ()) + s_.new_edge (m, ref.type ()); + else + { + if (valid_) + { + wcerr << "ice: referenced instance '" << ns_name << "#" + << uq_name << "' is not typed" << endl; + abort (); + } + } + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" < ("type-ns-name")); + String uq_name (s.context ().get ("type-uq-name")); + + s.context ().remove ("type-ns-name"); + s.context ().remove ("type-uq-name"); + s.context ().remove ("edge-type-id"); + + try + { + s_.new_edge ( + resolve (ns_name, uq_name, s_, cache_), s); + } + catch (NotName const& ex) + { + wcerr << s.file () << ":" << s.line () << ":" << s.column () << ": " + << "error: unable to resolve type '" << uq_name << "' " + << "in namespace '" << ns_name << "'" << endl; + + valid_ = false; + } + } + } + + Void + traverse (SemanticGraph::List& l) + { + if (l.context ().count ("type-ns-name")) + { + String ns_name (l.context ().get ("type-ns-name")); + String uq_name (l.context ().get ("type-uq-name")); + + l.context ().remove ("type-ns-name"); + l.context ().remove ("type-uq-name"); + l.context ().remove ("edge-type-id"); + + try + { + s_.new_edge ( + resolve (ns_name, uq_name, s_, cache_), l); + } + catch (NotName const& ex) + { + wcerr << l.file () << ":" << l.line () << ":" << l.column () << ": " + << "error: unable to resolve item type '" << uq_name << "' " + << "in namespace '" << ns_name << "'" << endl; + + valid_ = false; + } + } + + Traversal::List::traverse (l); + } + + Void + traverse (SemanticGraph::Union& u) + { + using SemanticGraph::Union; + + if (u.context ().count ("union-member-types")) + { + UnionMemberTypes const& m ( + u.context ().get ("union-member-types")); + + // Process it backwards so that we can just insert each + // edge in the front. + // + for (UnionMemberTypes::ConstReverseIterator i (m.rbegin ()); + i != m.rend (); i++) + { + try + { + NodeArgs na ( + u, u.argumented_begin ()); + + s_.new_edge ( + resolve ( + i->ns_name, i->uq_name, s_, cache_), na); + } + catch (NotName const& ex) + { + wcerr << u.file () << ":" << u.line () << ":" << u.column () << ": " + << "error: unable to resolve item type '" << i->uq_name << "' " + << "in namespace '" << i->ns_name << "'" << endl; + + valid_ = false; + } + } + + u.context ().remove ("union-member-types"); + } + + Traversal::Union::traverse (u); + } + + Void + traverse (SemanticGraph::Complex& c) + { + // Avoid traversing complex type more than once. + // + if (c.context ().count ("complex-type-resolved")) + return; + + c.context ().set ("complex-type-resolved", true); + + // Resolve base type if any. + // + if (c.context ().count ("type-ns-name")) + { + using Cult::RTTI::TypeId; + + String ns_name (c.context ().get ("type-ns-name")); + String uq_name (c.context ().get ("type-uq-name")); + TypeId edge_id (c.context ().get ("edge-type-id")); + + c.context ().remove ("type-ns-name"); + c.context ().remove ("type-uq-name"); + c.context ().remove ("edge-type-id"); + + try + { + if (edge_id == typeid (Extends)) + { + s_.new_edge ( + c, resolve ( + ns_name, uq_name, s_, cache_)); + } + else if (edge_id == typeid (Restricts)) + { + Restricts& r ( + s_.new_edge ( + c, resolve ( + ns_name, uq_name, s_, cache_))); + + if (c.context ().count ("facets")) + { + Facets const& f (c.context ().get ("facets")); + copy_facets (r, f); + c.context ().remove ("facets"); + } + } + else + assert (false); + } + catch (NotName const& ex) + { + wcerr << c.file () << ":" << c.line () << ":" << c.column () << ": " + << "error: unable to resolve base type '" << uq_name << "' " + << "in namespace '" << ns_name << "'" << endl; + + valid_ = false; + } + } + + // Resolve attribute-group-refs. Do it before element-group-refs + // so that if the scope was empty they end up at the end. + // + if (c.context ().count ("attribute-group-refs")) + { + AttributeGroupRefs& refs ( + c.context ().get ("attribute-group-refs")); + + // Handle refs from last to first so that multiple insertions + // to an empty list (always front) end up in proper order. + // + for (AttributeGroupRefs::ReverseIterator i (refs.rbegin ()); + i != refs.rend (); ++i) + { + clone_attribute_group_content (*i, c); + } + + c.context ().remove ("attribute-group-refs"); + } + + // Resolve element-group-ref if any. + // + if (c.context ().count ("element-group-ref")) + { + using SemanticGraph::Compositor; + + ElementGroupRef& ref ( + c.context ().get ("element-group-ref")); + + Compositor* comp (clone_element_group_content (c, ref)); + + // Create ContainsCompositor edge. + // + if (comp) + s_.new_edge (c, *comp, ref.min, ref.max); + + c.context ().remove ("element-group-ref"); + } + + Traversal::Complex::traverse (c); + } + + Void + traverse (SemanticGraph::Enumeration& e) + { + // Resolve base type if any. + // + if (e.context ().count ("type-ns-name")) + { + String ns_name (e.context ().get ("type-ns-name")); + String uq_name (e.context ().get ("type-uq-name")); + + e.context ().remove ("type-ns-name"); + e.context ().remove ("type-uq-name"); + e.context ().remove ("edge-type-id"); + + try + { + Restricts& r ( + s_.new_edge ( + e, resolve ( + ns_name, uq_name, s_, cache_))); + + if (e.context ().count ("facets")) + { + Facets const& f (e.context ().get ("facets")); + copy_facets (r, f); + e.context ().remove ("facets"); + } + } + catch (NotName const& ex) + { + wcerr << e.file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve base type '" << uq_name << "' " + << "in namespace '" << ns_name << "'" << endl; + + valid_ = false; + } + } + + Traversal::Enumeration::traverse (e); + } + + Void + traverse (SemanticGraph::ElementGroup& g) + { + // Avoid traversing groups more than once. + // + if (!g.context ().count ("element-group-traversed")) + { + g.context ().set ("element-group-traversed", true); + Traversal::ElementGroup::traverse (g); + + // Note that setting element-group-resolved after traversing + // the group allows for a recursive shallow resolution using + // resolve_element_group. + // + g.context ().set ("element-group-resolved", true); + } + } + + // We need a "shallow" resolve to break possible recursing: + // group->element->complexType->group. + // + Void + resolve_element_group (SemanticGraph::ElementGroup& g) + { + using SemanticGraph::Scope; + using SemanticGraph::Element; + + // Avoid resolving groups more than once. + // + if (!g.context ().count ("element-group-resolved")) + { + g.context ().set ("element-group-resolved", true); + + for (Scope::NamesIterator i (g.names_begin ()); + i != g.names_end (); ++i) + { + if (Element* e = dynamic_cast (&i->named ())) + resolve_element (*e); + } + + traverse (g.contains_compositor ().compositor ()); + } + } + + Void + traverse (SemanticGraph::AttributeGroup& g) + { + // Avoid traversing groups more than once. + // + if (g.context ().count ("attribute-group-resolved")) + return; + + g.context ().set ("attribute-group-resolved", true); + + // Resolve attribute-group-refs. + // + if (g.context ().count ("attribute-group-refs")) + { + AttributeGroupRefs& refs ( + g.context ().get ("attribute-group-refs")); + + // Handle refs from last to first so that multiple insertions + // to an empty list (always front) end up in proper order. + // + for (AttributeGroupRefs::ReverseIterator i (refs.rbegin ()); + i != refs.rend (); ++i) + { + clone_attribute_group_content (*i, g); + } + + g.context ().remove ("attribute-group-refs"); + } + + Traversal::AttributeGroup::traverse (g); + } + + Void + traverse (SemanticGraph::Compositor& c) + { + using SemanticGraph::Compositor; + + // Resolve element-group-refs if any. + // + if (c.context ().count ("element-group-refs")) + { + using SemanticGraph::Scope; + + ElementGroupRefs& refs ( + c.context ().get ("element-group-refs")); + + // Handle refs from last to first so that multiple insertions + // to an empty list (always front) end up in proper order. + // + for (ElementGroupRefs::ReverseIterator i (refs.rbegin ()); + i != refs.rend (); ++i) + { + // Find our scope. + // + Compositor* j (&c); + + while(!j->contained_compositor_p ()) + j = &j->contained_particle ().compositor (); + + Compositor* comp ( + clone_element_group_content ( + dynamic_cast (j->contained_compositor ().container ()), + *i)); + + // Create ContainsParticle edge. + // + if (comp) + { + NodeArgs na ( + c, i->contains_pos); + s_.new_edge (na, *comp, i->min, i->max); + } + } + + c.context ().remove ("element-group-refs"); + } + + // Traverse recursively but only particles that are compositors. + // This way we won't trigger anonymous type traversal (via member) + // and therefore can call this functions from resolve_element_group + // to completely resolve a group. + // + for (Compositor::ContainsIterator i (c.contains_begin ()), + e (c.contains_end ()); i != e; ++i) + { + SemanticGraph::Particle& p (i->particle ()); + + if (p.is_a ()) + dispatch (p); + } + + // Traversal::Compositor::traverse (c); + } + + SemanticGraph::Compositor* + clone_element_group_content (SemanticGraph::Scope& s, + ElementGroupRef const& ref) + { + using SemanticGraph::Scope; + using SemanticGraph::Compositor; + using SemanticGraph::ElementGroup; + + try + { + ElementGroup& g ( + resolve (ref.ns_name, ref.uq_name, s_, cache_)); + + // Make sure the group and all its content are fully resolved. + // + resolve_element_group (g); + + Scope::NamesIterator pos (ref.names_pos); + Compositor& root (g.contains_compositor ().compositor ()); + Compositor& copy (clone_compositor (root, s, pos)); + + return © + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" << endl; + abort (); + } + } + + return 0; + } + + SemanticGraph::Compositor& + clone_compositor (SemanticGraph::Compositor& c, + SemanticGraph::Scope& scope, + SemanticGraph::Scope::NamesIterator& pos) + { + using SemanticGraph::Any; + using SemanticGraph::Element; + using SemanticGraph::Particle; + using SemanticGraph::Compositor; + + Compositor* tmp (0); + + if (c.is_a ()) + tmp = &s_.new_node (c.file (), c.line (), c.column ()); + else if (c.is_a ()) + tmp = &s_.new_node (c.file (), c.line (), c.column ()); + else if (c.is_a ()) + tmp = &s_.new_node (c.file (), c.line (), c.column ()); + else + assert (false); + + Compositor& copy (*tmp); + + // Copy annotation. + // + if (c.annotated_p ()) + s_.new_edge (c.annotation (), copy); + + for (Compositor::ContainsIterator i (c.contains_begin ()); + i != c.contains_end (); ++i) + { + Particle& p (i->particle ()); + + if (p.is_a ()) + { + Compositor& c (dynamic_cast (p)); + Compositor& cc (clone_compositor (c, scope, pos)); + + s_.new_edge (copy, cc, i->min (), i->max ()); + } + else if (p.is_a ()) + { + Element& e (dynamic_cast (p)); + Element& ec (clone_element (e)); + + s_.new_edge (copy, ec, i->min (), i->max ()); + + NodeArgs na (scope, pos); + s_.new_edge (na, ec, e.name ()); + ++pos; + } + else if (p.is_a ()) + { + Any& a (dynamic_cast (p)); + Any& ac ( + s_.new_node (a.file (), a.line (), a.column (), + a.namespace_begin (), a.namespace_end ())); + + ac.prototype (a); + + s_.new_edge (copy, ac, i->min (), i->max ()); + + // Transfer annotation. + // + if (a.annotated_p ()) + s_.new_edge (a.annotation (), ac); + + // Any has no name so we have to come up with a fake one in + // order to put it into the scope. Note that we cannot reuse + // the name from the prototype. + + UnsignedLong count; + FrontendElements::Context& ctx (scope.context ()); + + if (!ctx.count ("any-name-count")) + { + count = 0; + ctx.set ("any-name-count", count); + } + else + count = ++(ctx.get ("any-name-count")); + + std::basic_ostringstream os; + os << "any #" << count; + + NodeArgs na (scope, pos); + s_.new_edge (na, ac, os.str ()); + ++pos; + } + else + assert (false); + } + + return copy; + } + + // Clone a fully-resolved element. Note that it cannot be used as + // is to clone ref'ed element (default/fixed value, etc). + // + SemanticGraph::Element& + clone_element (SemanticGraph::Element& e) + { + using SemanticGraph::Element; + + Element& copy ( + s_.new_node ( + e.file (), e.line (), e.column (), e.global_p (), e.qualified_p ())); + + if (e.qualified_p ()) + s_.new_edge (copy, e.namespace_ ()); + + // Transfer default and fixed values. + // + if (e.fixed_p ()) + copy.fixed (e.value ()); + else if (e.default_p ()) + copy.default_ (e.value ()); + + if (copy.default_p ()) + { + copy.context ().set ( + "dom-node", + e.context ().get ("dom-node")); + default_values_.push_back (©); + } + + // Transfer annotation. + // + if (e.annotated_p ()) + s_.new_edge (e.annotation (), copy); + + // Belongs edge. + // + if (e.typed_p ()) + s_.new_edge (copy, e.type ()); + else + assert (!valid_); + + // Substitutes edge. + // + if (e.substitutes_p ()) + s_.new_edge (copy, e.substitutes ().root ()); + + return copy; + } + + Void + clone_attribute_group_content (AttributeGroupRef& ref, + SemanticGraph::Scope& s) + { + using SemanticGraph::Scope; + using SemanticGraph::Attribute; + using SemanticGraph::AttributeGroup; + + try + { + AttributeGroup& g ( + resolve (ref.ns_name, ref.uq_name, s_, cache_)); + + // Make sure the group and all its content are fully resolved. + // + traverse (g); + + Scope::NamesIterator pos (ref.names_pos); + + for (Scope::NamesIterator i (g.names_begin ()); + i != g.names_end (); ++i) + { + if (Attribute* p = dynamic_cast (&i->named ())) + { + Attribute& a ( + s_.new_node (p->file (), + p->line (), + p->column (), + p->optional_p (), + p->global_p (), + p->qualified_p ())); + + NodeArgs na (s, pos); + s_.new_edge (na, a, p->name ()); + ++pos; + + if (p->qualified_p ()) + s_.new_edge (a, p->namespace_ ()); + + // Transfer default and fixed values if any. + // + if (p->fixed_p ()) + a.fixed (p->value ()); + else if (p->default_p ()) + a.default_ (p->value ()); + + if (a.default_p ()) + { + a.context ().set ( + "dom-node", + p->context ().get ("dom-node")); + default_values_.push_back (&a); + } + + // Transfer annotation. + // + if (p->annotated_p ()) + s_.new_edge (p->annotation (), a); + + // Belongs edge. + // + if (p->typed_p ()) + s_.new_edge (a, p->type ()); + else + assert (!valid_); + } + else if ( + AnyAttribute* p = dynamic_cast (&i->named ())) + { + AnyAttribute& any ( + s_.new_node (p->file (), + p->line (), + p->column (), + p->namespace_begin (), + p->namespace_end ())); + + any.prototype (*p); + + // Transfer annotation. + // + if (p->annotated_p ()) + s_.new_edge (p->annotation (), any); + + // AnyAttribute has no name so we have to come up with a fake + // one in order to put it into the scope. Note that we cannot + // reuse the name from the attribute group. + + UnsignedLong count; + FrontendElements::Context& ctx (s.context ()); + + if (!ctx.count ("any-attribute-name-count")) + { + count = 0; + ctx.set ("any-attribute-name-count", count); + } + else + count = ++(ctx.get ("any-attribute-name-count")); + + std::basic_ostringstream os; + os << "any-attribute #" << count; + + NodeArgs na (s, pos); + s_.new_edge (na, any, os.str ()); + ++pos; + } + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + if (valid_) + { + wcerr << "ice: unable to resolve attribute group name '" + << ex.name () << "' inside namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + } + + private: + Schema& s_; + Boolean& valid_; + NamespaceMap& cache_; + DefaultValues& default_values_; + + private: + //Traversal::ContainsParticle contains_particle; + Traversal::ContainsCompositor contains_compositor; + }; + } + + // + // + struct FilePathComparator + { + Boolean + operator () (SemanticGraph::Path const& x, + SemanticGraph::Path const& y) const + { +#if !defined(BOOST_FILESYSTEM_VERSION) || BOOST_FILESYSTEM_VERSION == 2 + return x.native_file_string () < y.native_file_string (); +#else + return x.string () < y.string (); +#endif + } + }; + + // Parser::Impl + // + + class Parser::Impl: public NonCopyable + { + public: + ~Impl (); + + Impl (Boolean proper_restriction, + Boolean multiple_imports, + Boolean full_schema_check, + LocationTranslator*, + const WarningSet*); + + Evptr + parse (Path const&); + + Evptr + parse (Paths const&); + + Evptr + xml_schema (Path const&); + + private: + Void + fill_xml_schema (Schema&, Path const&); + + private: + XML::AutoPtr + dom (SemanticGraph::Path const&, Boolean validate); + + Void + schema (XML::Element const&); + + SemanticGraph::Annotation* + annotation (Boolean process); + + Void + import (XML::Element const&); + + Void + include (XML::Element const&); + + Void + element_group (XML::Element const&, Boolean in_compositor); + + SemanticGraph::Type* + simple_type (XML::Element const&); + + SemanticGraph::Type* + list (XML::Element const& l, XML::Element const& type); + + SemanticGraph::Type* + union_ (XML::Element const& u, XML::Element const& type); + + SemanticGraph::Type* + restriction (XML::Element const& r, XML::Element const& type); + + Void + enumeration (XML::Element const&); + + SemanticGraph::Type* + complex_type (XML::Element const&); + + All* + all (XML::Element const&); + + Choice* + choice (XML::Element const&, Boolean in_compositor); + + Sequence* + sequence (XML::Element const&, Boolean in_compositor); + + Void + simple_content (XML::Element const&); + + Void + complex_content (XML::Element const&, Complex&); + + Void + simple_content_extension (XML::Element const&); + + Void + simple_content_restriction (XML::Element const&); + + Void + complex_content_extension (XML::Element const&, Complex&); + + Void + complex_content_restriction (XML::Element const&, Complex&); + + Void + element (XML::Element const&, Boolean global); + + Void + attribute (XML::Element const&, Boolean global); + + Void + attribute_group (XML::Element const&); + + Void + any (XML::Element const&); + + Void + any_attribute (XML::Element const&); + + private: + Boolean + is_disabled (Char const* warning) + { + return disabled_warnings_all_ || + (disabled_warnings_ && + disabled_warnings_->find (warning) != disabled_warnings_->end ()); + } + + private: + Boolean + more () const + { + Iterator const& it (iteration_state_.top ()); + + return it.l_->getLength () > it.i_; + } + + XML::Element + next () + { + Iterator& it (iteration_state_.top ()); + + return XML::Element ( + dynamic_cast (it.l_->item (it.i_++))); + } + + Void + prev () + { + Iterator& it (iteration_state_.top ()); + + if (it.i_) + --it.i_; + } + + Void + push (XML::Element const& e) + { + iteration_state_.push (e.dom_element ()); + } + + Void + pop () + { + iteration_state_.pop (); + } + + private: + Void + push_scope (SemanticGraph::Scope& s) + { + scope_stack_.push (&s); + } + + Void + pop_scope () + { + scope_stack_.pop (); + } + + SemanticGraph::Scope& + scope () const + { + return *(scope_stack_.top ()); + } + + private: + Void + push_compositor (SemanticGraph::Compositor& c) + { + compositor_stack_.push (&c); + } + + Void + pop_compositor () + { + assert (!compositor_stack_.empty ()); + compositor_stack_.pop (); + } + + SemanticGraph::Compositor& + compositor () const + { + assert (!compositor_stack_.empty ()); + return *(compositor_stack_.top ()); + } + + private: + UnsignedLong + parse_min (String const& m) + { + if (m.empty ()) + return 1; + + UnsignedLong v; + std::basic_istringstream is (m); + + is >> v; + return v; + } + + UnsignedLong + parse_max (String const& m) + { + if (m.empty ()) + return 1; + + if (m == L"unbounded") + return 0; + + UnsignedLong v; + std::basic_istringstream is (m); + + is >> v; + return v; + } + + private: + SemanticGraph::Namespace& + cur_ns () const + { + // Here I am using the fact that each Schema Names only one + // Namespace. + // + return dynamic_cast (cur_->names_begin ()->named ()); + } + + private: + String + unqualified_name (String const& n) + { + return XML::uq_name (n); + } + + String + namespace_name (XML::Element const& e, String const& n) + { + try + { + String p (XML::prefix (n)); + + // If we are currently handling a chameleon-included schema then + // the empty prefix is logically translated into acquired target + // namespace. + // + if (cur_chameleon_ && p.empty ()) + return cur_ns ().name (); + + // We have to try to resolve even the empty prefix since it can + // be assigned to a namespace (which takes precedence over names + // without a namespace). + // + return XML::ns_name (e.dom_element (), p); + } + catch (XML::NoMapping const& ex) + { + if (ex.prefix ().empty ()) + return String (); + else + throw; + } + } + + SemanticGraph::Type& + ultimate_base (SemanticGraph::Type& t) + { + using namespace SemanticGraph; + + Complex* c = dynamic_cast (&t); + + if (c != 0 && c->inherits_p ()) + { + Type* b (&c->inherits ().base ()); + + while (true) + { + Complex* cb (dynamic_cast (b)); + + if (cb != 0 && cb->inherits_p ()) + { + b = &cb->inherits ().base (); + continue; + } + + break; + } + + return *b; + } + else + return t; + } + + private: + template + Edge* + set_type (String const& type, XML::Element const& e, Node& node); + + private: + XML::PtrVector* dom_docs_; + + struct Iterator + { + Iterator (Xerces::DOMElement* e) + : l_ (e->getChildNodes ()), i_ (0) + { + } + + Xerces::DOMNodeList* l_; + Size i_; + }; + + Cult::Containers::Stack iteration_state_; + SemanticGraph::Schema* s_; // root schema file + SemanticGraph::Schema* cur_; // current schema file + Boolean cur_chameleon_; // whethere cur_ is chameleon + + SemanticGraph::Schema* xml_schema_; // XML Schema file + SemanticGraph::Path xml_schema_path_; + + // + // + Cult::Containers::Stack scope_stack_; + + // + // + Cult::Containers::Stack compositor_stack_; + + + // Map of absolute file path and namespace pair to a Schema node. + // + struct SchemaId + { + SchemaId (SemanticGraph::Path const& path, String const& ns) + : path_ (path), ns_ (ns) + { + } + + + friend Boolean + operator< (SchemaId const& x, SchemaId const& y) + { +#if !defined(BOOST_FILESYSTEM_VERSION) || BOOST_FILESYSTEM_VERSION == 2 + return x.path_.native_file_string () < y.path_.native_file_string () + || (x.path_.native_file_string () == y.path_.native_file_string () + && x.ns_ < y.ns_); +#else + return x.path_.string () < y.path_.string () + || (x.path_.string () == y.path_.string () + && x.ns_ < y.ns_); +#endif + } + + private: + SemanticGraph::Path path_; + String ns_; + }; + + + typedef + Cult::Containers::Map + SchemaMap; + + SchemaMap schema_map_; + + // Path stack for diagnostic. + // + Cult::Containers::Stack file_stack_; + + SemanticGraph::Path const& + file () + { + return file_stack_.top (); + } + + // Members with default/fixed values (needed for QName handling). + // + DefaultValues default_values_; + + private: + Boolean qualify_attribute_; + Boolean qualify_element_; + + Boolean valid_; + + Boolean proper_restriction_; + Boolean multiple_imports_; + Boolean full_schema_check_; + LocationTranslator* loc_translator_; + const WarningSet* disabled_warnings_; + Boolean disabled_warnings_all_; + + NamespaceMap* cache_; + }; + + + Parser::Impl:: + Impl (Boolean proper_restriction, + Boolean multiple_imports, + Boolean full_schema_check, + LocationTranslator* t, + const WarningSet* dw) + : s_ (0), + cur_ (0), + cur_chameleon_ (false), + xml_schema_path_ ("XMLSchema.xsd"), + qualify_attribute_ (false), + qualify_element_ (false), + proper_restriction_ (proper_restriction), + multiple_imports_ (multiple_imports), + full_schema_check_ (full_schema_check), + loc_translator_ (t), + disabled_warnings_ (dw), + disabled_warnings_all_ (false) + { + if (dw && dw->find ("all") != dw->end ()) + disabled_warnings_all_ = true; + + // Initialize the Xerces-C++ runtime. + // + Xerces::XMLPlatformUtils::Initialize (); + } + + Parser::Impl:: + ~Impl () + { + // Terminate the Xerces-C++ runtime. + // + Xerces::XMLPlatformUtils::Terminate (); + } + + template T& + add_type (Schema& s, Namespace& ns, String name) + { + Path path ("XMLSchema.xsd"); + T& node (s.new_node (path, 0, 0)); + s.new_edge (ns, node, name); + + return node; + } + + Void Parser::Impl:: + fill_xml_schema (Schema& s, Path const& path) + { + Namespace& ns (s.new_node (path, 1, 1)); + s.new_edge (s, ns, xsd); + + // anyType and & anySimpleType + // + AnyType& any_type ( + add_type (s, ns, L"anyType")); + add_type (s, ns, L"anySimpleType"); + + // Integers. + // + add_type (s, ns, L"byte"); + add_type (s, ns, L"unsignedByte"); + add_type (s, ns, L"short"); + add_type (s, ns, L"unsignedShort"); + add_type (s, ns, L"int"); + add_type (s, ns, L"unsignedInt"); + add_type (s, ns, L"long"); + add_type (s, ns, L"unsignedLong"); + add_type (s, ns, L"integer"); + add_type (s, ns, L"nonPositiveInteger"); + add_type (s, ns, L"nonNegativeInteger"); + add_type (s, ns, L"positiveInteger"); + add_type (s, ns, L"negativeInteger"); + + // Boolean. + // + add_type (s, ns, L"boolean"); + + // Floats. + // + add_type (s, ns, L"float"); + add_type (s, ns, L"double"); + add_type (s, ns, L"decimal"); + + // Strings + // + add_type (s, ns, L"string"); + add_type (s, ns, L"normalizedString"); + add_type (s, ns, L"token"); + add_type (s, ns, L"Name"); + add_type (s, ns, L"NMTOKEN"); + add_type (s, ns, L"NMTOKENS"); + add_type (s, ns, L"NCName"); + add_type (s, ns, L"language"); + + // ID/IDREF. + // + add_type (s, ns, L"ID"); + + Fundamental::IdRef& id_ref ( + s.new_node (path, 0, 0)); + s.new_edge (ns, id_ref, L"IDREF"); + s.new_edge (any_type, id_ref); + + Fundamental::IdRefs& id_refs ( + s.new_node (path, 0, 0)); + s.new_edge (ns, id_refs, L"IDREFS"); + s.new_edge (any_type, id_refs); + + // URI. + // + add_type (s, ns, L"anyURI"); + + // Qualified name. + // + add_type (s, ns, L"QName"); + + // Binary. + // + add_type (s, ns, L"base64Binary"); + add_type (s, ns, L"hexBinary"); + + // Date/time. + // + add_type (s, ns, L"date"); + add_type (s, ns, L"dateTime"); + add_type (s, ns, L"duration"); + add_type (s, ns, L"gDay"); + add_type (s, ns, L"gMonth"); + add_type (s, ns, L"gMonthDay"); + add_type (s, ns, L"gYear"); + add_type (s, ns, L"gYearMonth"); + add_type (s, ns, L"time"); + + // Entity. + // + add_type (s, ns, L"ENTITY"); + add_type (s, ns, L"ENTITIES"); + + // Notation. + // + add_type (s, ns, L"NOTATION"); + } + + + Evptr Parser::Impl:: + xml_schema (Path const& tu) + { + valid_ = true; + + Evptr rs (new Schema (tu, 1, 1)); + fill_xml_schema (*rs, tu); + + if (!valid_) + throw InvalidSchema (); + + return rs; + } + Evptr Parser::Impl:: + parse (Path const& tu) + { + valid_ = true; + schema_map_.clear (); + default_values_.clear (); + + XML::PtrVector dom_docs; + dom_docs_ = &dom_docs; + + NamespaceMap cache; + cache_ = &cache; + + XML::AutoPtr d (dom (tu, true)); + + if (!d) + throw InvalidSchema (); + + XML::Element root (d->getDocumentElement ()); + String ns (root["targetNamespace"]); + + if (trace_) + wcout << "target namespace: " << ns << endl; + + Evptr rs (new Schema (tu, root.line (), root.column ())); + + // Implied schema with fundamental types. + // + xml_schema_ = &rs->new_node (xml_schema_path_, 1, 1); + rs->new_edge (*rs, *xml_schema_, xml_schema_path_); + + fill_xml_schema (*xml_schema_, xml_schema_path_); + + // Parse. + // + { + // Enter the file into schema_map_. + // + Path abs_path (system_complete (tu)); + abs_path.normalize (); + schema_map_[SchemaId (abs_path, ns)] = rs.get (); + rs->context ().set ("absolute-path", abs_path); + + s_ = cur_ = rs.get (); + { + file_stack_.push (tu); + + { + push_scope ( + s_->new_node ( + file (), root.line (), root.column ())); + s_->new_edge (*cur_, scope (), ns); + + { + schema (root); + } + + pop_scope (); + } + + file_stack_.pop (); + } + + s_ = cur_ = 0; + } + + dom_docs_->push_back (d); + + // Second pass to resolve forward references to types, elements, + // attributes and groups. + // + if (valid_) + { + Traversal::Schema schema; + + struct Uses: Traversal::Uses + { + virtual Void + traverse (Type& u) + { + Schema& s (u.schema ()); + + if (!s.context ().count ("schema-resolved")) + { + s.context ().set ("schema-resolved", true); + Traversal::Uses::traverse (u); + } + } + } uses; + + Traversal::Names schema_names; + Traversal::Namespace ns; + Traversal::Names ns_names; + + schema >> uses >> schema; + schema >> schema_names >> ns >> ns_names; + + Resolver resolver (*rs, valid_, *cache_, default_values_); + + struct AnonymousMember: Traversal::Attribute, + Traversal::Element, + Traversal::Member + { + AnonymousMember (Traversal::NodeDispatcherBase& d) + { + belongs_.node_traverser (d); + } + + virtual Void + traverse (SemanticGraph::Attribute& a) + { + traverse_member (a); + } + + virtual Void + traverse (SemanticGraph::Element& e) + { + traverse_member (e); + } + + Void + traverse_member (SemanticGraph::Member& m) + { + if (m.typed_p () && + !m.type ().named_p () && + !m.type ().context ().count ("seen")) + { + m.type().context ().set ("seen", true); + + Traversal::Member::belongs (m, belongs_); + + m.type ().context ().remove ("seen"); + } + } + + private: + Traversal::Belongs belongs_; + } anonymous_member (resolver); + + struct AnonymousBase: Traversal::Type + { + AnonymousBase (Traversal::NodeDispatcherBase& d) + : base_ (d) + { + } + + virtual Void + traverse (SemanticGraph::Type& t) + { + if (!t.named_p ()) + base_.dispatch (t); + } + + private: + Traversal::NodeDispatcherBase& base_; + } anonymous_base (resolver); + + ns_names >> resolver; + ns_names >> anonymous_member; + + Traversal::Names names; + Traversal::Inherits inherits; + Traversal::Argumented argumented; + resolver >> names >> resolver; + names >> anonymous_member; + resolver >> inherits >> anonymous_base; + resolver >> argumented >> anonymous_base; + + if (trace_) + wcout << "starting resolution pass" << endl; + + schema.dispatch (*rs); + } + + // Resolve default/fixed values of QName type. + // + if (valid_) + { + for (DefaultValues::ConstIterator i (default_values_.begin ()), + e (default_values_.end ()); i != e; ++i) + { + SemanticGraph::Member& m (**i); + SemanticGraph::Type& t (m.type ()); + SemanticGraph::Context& c (m.context ()); + + if (ultimate_base (t).is_a ()) + { + String v (m.value ()); + Xerces::DOMElement* e (c.get ("dom-node")); + + try + { + // We have to try to resolve even the empty prefix since it can + // be assigned to a namespace (which takes precedence over names + // without a namespace). + // + String ns (XML::ns_name (e, XML::prefix (v))); + + if (m.fixed_p ()) + m.fixed (ns + L'#' + v); + else + m.default_ (ns + L'#' + v); + } + catch (XML::NoMapping const& ex) + { + if (!ex.prefix ().empty ()) + { + wcerr << m.file () << ":" << m.line () << ":" << m.column () + << ": error: unable to resolve namespace for prefix '" + << ex.prefix () << "'" << endl; + + valid_ = false; + } + } + } + + c.remove ("dom-node"); + } + } + + if (!valid_) + throw InvalidSchema (); + + return rs; + } + + Evptr Parser::Impl:: + parse (Paths const& paths) + { + valid_ = true; + schema_map_.clear (); + default_values_.clear (); + + XML::PtrVector dom_docs; + dom_docs_ = &dom_docs; + + NamespaceMap cache; + cache_ = &cache; + + Evptr rs (new Schema ("", 0, 0)); + + // Implied schema with fundamental types. + // + xml_schema_ = &rs->new_node (xml_schema_path_, 1, 1); + rs->new_edge (*rs, *xml_schema_, xml_schema_path_); + + fill_xml_schema (*xml_schema_, xml_schema_path_); + + // Parse individual schemas. + // + s_ = rs.get (); + + for (Paths::ConstIterator i (paths.begin ()); i != paths.end (); ++i) + { + Path const& tu (*i); + XML::AutoPtr d (dom (tu, true)); + + if (!d) + throw InvalidSchema (); + + XML::Element root (d->getDocumentElement ()); + String ns (root["targetNamespace"]); + + if (trace_) + wcout << "target namespace: " << ns << endl; + + // Check if we already have this schema. + // + Path abs_path (system_complete (tu)); + abs_path.normalize (); + SchemaId schema_id (abs_path, ns); + + if (schema_map_.find (schema_id) != schema_map_.end ()) + continue; + + Schema& s (s_->new_node (tu, root.line (), root.column ())); + s_->new_edge (s, *xml_schema_, xml_schema_path_); + s_->new_edge (*s_, s, tu); + + // Enter the file into schema_map_. + // + schema_map_[schema_id] = &s; + s.context ().set ("absolute-path", abs_path); + + cur_ = &s; + + { + file_stack_.push (tu); + + { + push_scope ( + s_->new_node ( + file (), root.line (), root.column ())); + s_->new_edge (*cur_, scope (), ns); + + { + schema (root); + } + + pop_scope (); + } + + file_stack_.pop (); + } + + cur_ = 0; + + dom_docs_->push_back (d); + + if (!valid_) + break; + } + + s_ = 0; + + // Second pass to resolve forward references to types, elements, + // attributes and groups. + // + if (valid_) + { + Traversal::Schema schema; + + struct Uses: Traversal::Uses + { + virtual Void + traverse (Type& u) + { + Schema& s (u.schema ()); + + if (!s.context ().count ("schema-resolved")) + { + s.context ().set ("schema-resolved", true); + Traversal::Uses::traverse (u); + } + } + } uses; + + Traversal::Names schema_names; + Traversal::Namespace ns; + Traversal::Names ns_names; + + schema >> uses >> schema; + schema >> schema_names >> ns >> ns_names; + + Resolver resolver (*rs, valid_, *cache_, default_values_); + + struct AnonymousMember: Traversal::Attribute, + Traversal::Element, + Traversal::Member + { + AnonymousMember (Traversal::NodeDispatcherBase& d) + { + belongs_.node_traverser (d); + } + + virtual Void + traverse (SemanticGraph::Attribute& a) + { + traverse_member (a); + } + + virtual Void + traverse (SemanticGraph::Element& e) + { + traverse_member (e); + } + + virtual Void + traverse_member (SemanticGraph::Member& m) + { + if (m.typed_p () && + !m.type ().named_p () && + !m.type ().context ().count ("seen")) + { + m.type().context ().set ("seen", true); + + Traversal::Member::belongs (m, belongs_); + + m.type ().context ().remove ("seen"); + } + } + + private: + Traversal::Belongs belongs_; + } anonymous_member (resolver); + + struct AnonymousBase: Traversal::Type + { + AnonymousBase (Traversal::NodeDispatcherBase& d) + : base_ (d) + { + } + + virtual Void + traverse (SemanticGraph::Type& t) + { + if (!t.named_p ()) + base_.dispatch (t); + } + + private: + Traversal::NodeDispatcherBase& base_; + } anonymous_base (resolver); + + ns_names >> resolver; + ns_names >> anonymous_member; + + Traversal::Names names; + Traversal::Inherits inherits; + Traversal::Argumented argumented; + resolver >> names >> resolver; + names >> anonymous_member; + resolver >> inherits >> anonymous_base; + resolver >> argumented >> anonymous_base; + + if (trace_) + wcout << "starting resolution pass" << endl; + + schema.dispatch (*rs); + } + + // Resolve default/fixed values of QName type. + // + if (valid_) + { + for (DefaultValues::ConstIterator i (default_values_.begin ()), + e (default_values_.end ()); i != e; ++i) + { + SemanticGraph::Member& m (**i); + SemanticGraph::Type& t (m.type ()); + SemanticGraph::Context& c (m.context ()); + + if (ultimate_base (t).is_a ()) + { + String v (m.value ()); + Xerces::DOMElement* e (c.get ("dom-node")); + + try + { + // We have to try to resolve even the empty prefix since it can + // be assigned to a namespace (which takes precedence over names + // without a namespace). + // + String ns (XML::ns_name (e, XML::prefix (v))); + + if (m.fixed_p ()) + m.fixed (ns + L'#' + v); + else + m.default_ (ns + L'#' + v); + } + catch (XML::NoMapping const& ex) + { + if (!ex.prefix ().empty ()) + { + wcerr << m.file () << ":" << m.line () << ":" << m.column () + << ": error: unable to resolve namespace for prefix '" + << ex.prefix () << "'" << endl; + + valid_ = false; + } + } + } + + c.remove ("dom-node"); + } + } + + if (!valid_) + throw InvalidSchema (); + + return rs; + } + + Void Parser::Impl:: + schema (XML::Element const& s) + { + Boolean old_qa (qualify_attribute_); + Boolean old_qe (qualify_element_); + + if (String af = s["attributeFormDefault"]) + qualify_attribute_ = af == L"qualified"; + + if (String ef = s["elementFormDefault"]) + qualify_element_ = ef == L"qualified"; + + push (s); + + // Parse leading annotation if any and add it as an annotation for + // this schema. + // + if (Annotation* a = annotation (true)) + s_->new_edge (*a, *cur_); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"import") import (e); else + if (name == L"include") include (e); else + if (name == L"element") element (e, true); else + if (name == L"attribute") attribute (e, true); else + if (name == L"simpleType") simple_type (e); else + if (name == L"annotation"); else + if (name == L"complexType") complex_type (e); else + if (name == L"group") element_group (e, false); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected top-level element: '" << name << "'" + << endl; + + valid_ = false; + } + } + + pop (); + + qualify_attribute_ = old_qa; + qualify_element_ = old_qe; + } + + Void Parser::Impl:: + import (XML::Element const& i) + { + NarrowString loc ( + XML::transcode_to_narrow ( + i.dom_element ()->getAttribute ( + XML::XMLChString ("schemaLocation").c_str ()))); + + if (loc_translator_) + loc = loc_translator_->translate (loc); + + // Ignore empty . + // + if (!loc && !i["namespace"]) + return; + + Path path, rel_path, abs_path; + try + { +#if !defined(BOOST_FILESYSTEM_VERSION) || BOOST_FILESYSTEM_VERSION == 2 + try + { + path = Path (loc); + } + catch (InvalidPath const&) + { + // Retry as a native path. + // + path = Path (loc, boost::filesystem::native); + } +#else + // The new ABI does not have a fallback native representation + path = Path (loc.c_str()); +#endif + + if (path.is_complete ()) + { + abs_path = rel_path = path; + } + else + { + rel_path = file ().branch_path () / path; + abs_path = system_complete (rel_path); + } + + abs_path.normalize (); + } + catch (InvalidPath const&) + { + wcerr << file () << ":" << i.line () << ":" << i.column () << ": " + << "error: '" << loc.c_str () << "' is not a valid " + << "filesystem path" << endl; + + valid_ = false; + return; + } + + SchemaId schema_id (abs_path, i["namespace"]); + + if (schema_map_.find (schema_id) != schema_map_.end ()) + { + s_->new_edge (*cur_, *schema_map_[schema_id], path); + return; + } + + if (trace_) + wcout << "importing " << rel_path << endl; + + if (XML::AutoPtr d = dom (abs_path, false)) + { + XML::Element r (d->getDocumentElement ()); + String ns (r["targetNamespace"]); + + if (trace_) + wcout << "target namespace: " << ns << endl; + + Schema& s (s_->new_node (rel_path, r.line (), r.column ())); + s_->new_edge (s, *xml_schema_, xml_schema_path_); + s_->new_edge (*cur_, s, path); + + schema_map_[schema_id] = &s; + s.context ().set ("absolute-path", abs_path); + + Schema* old_cur (cur_); + Boolean old_cur_chameleon (cur_chameleon_); + cur_ = &s; + cur_chameleon_ = false; + + { + file_stack_.push (rel_path); + + { + push_scope ( + s_->new_node (file (), r.line (), r.column ())); + s_->new_edge (*cur_, scope (), ns); + + { + schema (r); + } + + pop_scope (); + } + + file_stack_.pop (); + } + + cur_chameleon_ = old_cur_chameleon; + cur_ = old_cur; + + dom_docs_->push_back (d); + } + } + + Void Parser::Impl:: + include (XML::Element const& i) + { + NarrowString loc ( + XML::transcode_to_narrow ( + i.dom_element ()->getAttribute ( + XML::XMLChString ("schemaLocation").c_str ()))); + + if (loc_translator_) + loc = loc_translator_->translate (loc); + + Path path, rel_path, abs_path; + try + { +#if !defined(BOOST_FILESYSTEM_VERSION) || BOOST_FILESYSTEM_VERSION == 2 + try + { + path = Path (loc); + } + catch (InvalidPath const&) + { + // Retry as a native path. + // + path = Path (loc, boost::filesystem::native); + } +#else + // The new API does not have a fallback native representation. + path = Path (loc.c_str()); +#endif + + if (path.is_complete ()) + { + abs_path = rel_path = path; + } + else + { + rel_path = file ().branch_path () / path; + abs_path = system_complete (rel_path); + } + + abs_path.normalize (); + } + catch (InvalidPath const&) + { + wcerr << file () << ":" << i.line () << ":" << i.column () << ": " + << "error: '" << loc.c_str () << "' is not a valid " + << "filesystem path" << endl; + + valid_ = false; + return; + } + + // Included schema should have the same namespace as ours. + // + SchemaId schema_id (abs_path, cur_ns ().name ()); + + if (schema_map_.find (schema_id) != schema_map_.end ()) + { + Schema& s (*schema_map_[schema_id]); + + // Chemeleon inclusion results in a new Schema node for every + // namespace. As a result, such a Schema node can only be + // Source'ed. I use this property to decide which edge to use. + // + + if (s.used_p () && s.used_begin ()->is_a ()) + s_->new_edge (*cur_, s, path); + else + s_->new_edge (*cur_, s, path); + + return; + } + + if (trace_) + wcout << "including " << rel_path << endl; + + if (XML::AutoPtr d = dom (abs_path, false)) + { + XML::Element r (d->getDocumentElement ()); + String ns (r["targetNamespace"]), cur_ns; + + Schema& s (s_->new_node (rel_path, r.line (), r.column ())); + s_->new_edge (s, *xml_schema_, xml_schema_path_); + + schema_map_[schema_id] = &s; + s.context ().set ("absolute-path", abs_path); + + Boolean chameleon (false); + + if (ns.empty () && !(cur_ns = (cur_->names_begin ())->name ()).empty ()) + { + // Chameleon. + // + ns = cur_ns; + s_->new_edge (*cur_, s, path); + chameleon = true; + + if (trace_) + wcout << "handling chameleon schema" << endl; + } + else + s_->new_edge (*cur_, s, path); + + if (trace_) + wcout << "target namespace: " << ns << endl; + + Schema* old_cur (cur_); + Boolean old_cur_chameleon (cur_chameleon_); + cur_ = &s; + cur_chameleon_ = chameleon; + + { + file_stack_.push (rel_path); + + { + push_scope ( + s_->new_node (file (), r.line (), r.column ())); + s_->new_edge (*cur_, scope (), ns); + + { + schema (r); + } + + pop_scope (); + } + + file_stack_.pop (); + } + + cur_chameleon_ = old_cur_chameleon; + cur_ = old_cur; + + dom_docs_->push_back (d); + } + } + + Void Parser::Impl:: + element_group (XML::Element const& g, Boolean in_compositor) + { + if (String name = g["name"]) + { + ElementGroup& group ( + s_->new_node (file (), g.line (), g.column ())); + + s_->new_edge (scope (), group, name); + + push_scope (group); + push (g); + + annotation (false); + + XML::Element e (next ()); + + name = e.name (); + + if (trace_) + wcout << name << endl; + + Compositor* c (0); + + if (name == L"all") c = all (e); else + if (name == L"choice") c = choice (e, false); else + if (name == L"sequence") c = sequence (e, false); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'all', 'choice' or 'sequence' " + << "instead of '" << name << "'" << endl; + + valid_ = false; + } + + // Group's immediate compositor always has cardinality 1,1. + // + if (c) + s_->new_edge (group, *c, 1, 1); + + pop (); + pop_scope (); + } + else if (String ref = g["ref"]) + { + if (trace_) + wcout << "element-group-ref " << ref << endl; + + try + { + String uq_name (unqualified_name (ref)); + String ns_name (namespace_name (g, ref)); + + // In order to avoid code duplication we are going to let the + // resolver handle this case. + // + if (trace_) + wcout << "deferring resolution of group name '" << uq_name + << "' inside namespace '" << ns_name << "'" + << " until later" << endl; + + if (in_compositor) + { + Compositor& c (compositor ()); + + ElementGroupRef ref ( + uq_name, ns_name, + parse_min (g["minOccurs"]), parse_max (g["maxOccurs"]), + c, scope ()); + + if (!c.context ().count ("element-group-refs")) + c.context ().set ("element-group-refs", ElementGroupRefs ()); + + c.context ().get ( + "element-group-refs").push_back (ref); + } + else + { + // This is a group-ref directly in complexType. + // + + Scope& s (scope ()); + + ElementGroupRef ref ( + uq_name, ns_name, + parse_min (g["minOccurs"]), parse_max (g["maxOccurs"]), + s); + + s.context ().set ("element-group-ref", ref); + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + + abort (); + } + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "error: unable to resolve namespace prefix '" << ex.prefix () + << "' in '" << ref << "'" << endl; + + valid_ = false; + } + } + else + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "error: 'name' or 'ref' attribute is missing in group " + << "declaration" << endl; + + valid_ = false; + + return; + } + } + + //@@ Need RAII for push/pop. + // + + Type* Parser::Impl:: + simple_type (XML::Element const& t) + { + Type* r (0); + + push (t); + + Annotation* a (annotation (true)); + + XML::Element e (next ()); + + String name (e.name ()); + + if (name == L"list") r = list (e, t); else + if (name == L"union") r = union_ (e, t); else + if (name == L"restriction") r = restriction (e, t); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'list', 'union', or 'restriction' " + << "instead of '" << name << "'" << endl; + + valid_ = false; + } + + if (r != 0 && a != 0) + s_->new_edge (*a, *r); + + pop (); + + return r; + } + + SemanticGraph::Type* Parser::Impl:: + list (XML::Element const& l, XML::Element const& t) + { + if (trace_) + wcout << "list" << endl; + + List& node (s_->new_node (file (), t.line (), t.column ())); + + if (String item_type = l["itemType"]) + { + if (trace_) + wcout << "item type: " << fq_name (l, item_type) << endl; + + set_type (item_type, l, node); + } + else + { + // Anonymous list item type. + // + push (l); + + annotation (false); + + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + Type* t (0); + + if (name == L"simpleType") t = simple_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' instead of " + << "'" << e.name () << "'" << endl; + + valid_ = false; + } + + if (t) + s_->new_edge (*t, node); + } + else + { + wcerr << file () << ":" << l.line () << ":" << l.column () << ": " + << "error: expected 'itemType' attribute or 'simpleType' " + << "nested element" << endl; + + valid_ = false; + } + + pop (); + } + + if (String name = t["name"]) + s_->new_edge (scope (), node, name); + + return &node; + } + + namespace + { + // + // List parsing utility functions. + // + + // Find first non-space character. + // + Size + find_ns (const WideChar* s, Size size, Size pos) + { + while (pos < size && + (s[pos] == 0x20 || // space + s[pos] == 0x0D || // carriage return + s[pos] == 0x09 || // tab + s[pos] == 0x0A)) + ++pos; + + return pos < size ? pos : String::npos; + } + + // Find first space character. + // + Size + find_s (const WideChar* s, Size size, Size pos) + { + while (pos < size && + s[pos] != 0x20 && // space + s[pos] != 0x0D && // carriage return + s[pos] != 0x09 && // tab + s[pos] != 0x0A) + ++pos; + + return pos < size ? pos : String::npos; + } + } + + SemanticGraph::Type* Parser::Impl:: + union_ (XML::Element const& u, XML::Element const& t) + { + if (trace_) + wcout << "union" << endl; + + Union& node (s_->new_node (file (), t.line (), t.column ())); + + Boolean has_members (false); + + if (String members = u["memberTypes"]) + { + // Don't bother trying to resolve member types at this point + // since the order is important so we would have to insert + // the late resolutions into specific places. It is simpler + // to just do the whole resolution later. + // + const WideChar* data (members.c_str ()); + Size size (members.size ()); + + UnionMemberTypes* m (0); + + // Traverse the type list while logically collapsing spaces. + // + for (Size i (find_ns (data, size, 0)); i != String::npos;) + { + String s; + Size j (find_s (data, size, i)); + + if (j != String::npos) + { + s = String (data + i, j - i); + i = find_ns (data, size, j); + } + else + { + // Last item. + // + s = String (data + i, size - i); + i = String::npos; + } + + if (trace_) + wcout << "member type: " << fq_name (u, s) << endl; + + if (m == 0) + { + node.context ().set ("union-member-types", UnionMemberTypes ()); + m = &node.context ().get ("union-member-types"); + } + + try + { + m->push_back ( + UnionMemberType ( + namespace_name (u, s), unqualified_name (s))); + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << u.line () << ":" << u.column () << ": " + << "error: unable to resolve namespace prefix " + << "'" << ex.prefix () << "' in '" << s << "'" << endl; + + valid_ = false; + } + } + + has_members = (m != 0); + } + + // Handle anonymous members. + // + push (u); + + annotation (false); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + Type* t (0); + + if (name == L"simpleType") t = simple_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' instead of " + << "'" << e.name () << "'" << endl; + + valid_ = false; + } + + if (t) + s_->new_edge (*t, node); + } + + pop (); + + if (node.argumented_begin () == node.argumented_end () && !has_members) + { + wcerr << file () << ":" << u.line () << ":" << u.column () << ": " + << "error: expected 'memberTypes' attribute or 'simpleType' " + << "nested element" << endl; + + valid_ = false; + } + + if (String name = t["name"]) + s_->new_edge (scope (), node, name); + + return &node; + } + + Type* Parser::Impl:: + restriction (XML::Element const& r, XML::Element const& t) + { + String base (r["base"]); + Type* base_type (0); + + if (base) + { + if (trace_) + wcout << "restriction base: " << fq_name (r, base) << endl; + } + + Type* rv (0); + + push (r); + + annotation (false); + + Boolean enum_ (false); + + if (!base) + { + // Anonymous base type. + // + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"simpleType") base_type = simple_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' instead of " + << "'" << e.name () << "'" << endl; + + valid_ = false; + } + } + else + { + wcerr << file () << ":" << r.line () << ":" << r.column () << ": " + << "error: expected 'base' attribute or 'simpleType' " + << "nested element" << endl; + + valid_ = false; + } + + if (!valid_) + { + pop (); + return 0; + } + } + + Facets facets; + Restricts* restricts (0); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"enumeration") + { + // Enumeration + // + if (enum_) + enumeration (e); + else + { + // First + // + enum_ = true; + + Enumeration& node ( + s_->new_node (file (), t.line (), t.column ())); + + if (base_type) + restricts = &s_->new_edge (node, *base_type); + else + restricts = set_type (base, r, node); + + if (String name = t["name"]) + s_->new_edge (scope (), static_cast (node), name); + + rv = &node; + push_scope (node); + enumeration (e); + } + } + else if (name == L"minExclusive" || + name == L"minInclusive" || + name == L"maxExclusive" || + name == L"maxInclusive" || + name == L"totalDigits" || + name == L"fractionDigits" || + name == L"length" || + name == L"minLength" || + name == L"maxLength" || + name == L"whiteSpace" || + name == L"pattern") + { + facets[name] = e["value"]; + } + else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "' in " + << "simple type restriction" << endl; + + valid_ = false; + } + } + + if (enum_) + pop_scope (); + else + { + Complex& node (s_->new_node (file (), t.line (), t.column ())); + + if (base_type) + restricts = &s_->new_edge (node, *base_type); + else + restricts = set_type (base, r, node); + + if (String name = t["name"]) + s_->new_edge (scope (), node, name); + + rv = &node; + } + + if (!facets.empty ()) + { + if (restricts) + copy_facets (*restricts, facets); + else + rv->context ().set ("facets", facets); + } + + pop (); + + return rv; + } + + Void Parser::Impl:: + enumeration (XML::Element const& e) + { + String value (e["value"]); + + if (trace_) + wcout << "enumeration value: " << value << endl; + + push (e); + Annotation* a (annotation (true)); + pop (); + + Enumerator& node ( + s_->new_node (file (), e.line (), e.column ())); + + s_->new_edge (scope (), node, value); + s_->new_edge (node, dynamic_cast(scope ())); + + if (a != 0) + s_->new_edge (*a, node); + + } + + Type* Parser::Impl:: + complex_type (XML::Element const& t) + { + Type* r (0); + + Complex& node (s_->new_node (file (), t.line (), t.column ())); + + node.mixed_p (t["mixed"] == L"true" || t["mixed"] == L"1"); + + if (String name = t["name"]) + s_->new_edge (scope (), node, name); + + r = &node; + + push_scope (node); + push (t); + + if (Annotation* a = annotation (true)) + s_->new_edge (*a, node); + + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"simpleContent") simple_content (e); else + if (name == L"complexContent") complex_content (e, node); else + { + Compositor* c (0); + + if (name == L"all") c = all (e); else + if (name == L"choice") c = choice (e, false); else + if (name == L"sequence") c = sequence (e, false); else + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"group") element_group (e, false); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "'" << endl; + + valid_ = false; + } + + if (c) + s_->new_edge ( + node, *c, parse_min (e["minOccurs"]), parse_max (e["maxOccurs"])); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + } + } + + pop (); + pop_scope (); + + return r; + } + + All* Parser::Impl:: + all (XML::Element const& a) + { + // 'all' cannot be nested inside 'choice' or 'sequence', nor + // can it contain any of those. The only valid cardinality + // values for 'all' are min=0,1 and max=1. + // + All& node (s_->new_node (file (), a.line (), a.column ())); + + push_compositor (node); + push (a); + + if (Annotation* a = annotation (true)) + s_->new_edge (*a, node); + + while (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (name == L"element") element (e, false); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'element' " + << "instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + pop_compositor (); + + return &node; + } + + Choice* Parser::Impl:: + choice (XML::Element const& c, Boolean in_compositor) + { + Choice& node (s_->new_node (file (), c.line (), c.column ())); + + if (in_compositor) + { + s_->new_edge ( + compositor (), node, + parse_min (c["minOccurs"]), parse_max (c["maxOccurs"])); + } + + push_compositor (node); + push (c); + + if (Annotation* a = annotation (true)) + s_->new_edge (*a, node); + + while (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (name == L"any") any (e); else + if (name == L"choice") choice (e, true); else + if (name == L"element") element (e, false); else + if (name == L"sequence") sequence (e, true); else + if (name == L"group") element_group (e, true); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'any', 'group', 'choice', 'sequence', " + << "or 'element' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + pop_compositor (); + + return &node; + } + + Sequence* Parser::Impl:: + sequence (XML::Element const& s, Boolean in_compositor) + { + Sequence& node (s_->new_node (file (), s.line (), s.column ())); + + if (in_compositor) + { + s_->new_edge ( + compositor (), node, + parse_min (s["minOccurs"]), parse_max (s["maxOccurs"])); + } + + push_compositor (node); + push (s); + + if (Annotation* a = annotation (true)) + s_->new_edge (*a, node); + + while (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (name == L"any") any (e); else + if (name == L"choice") choice (e, true); else + if (name == L"element") element (e, false); else + if (name == L"sequence") sequence (e, true); else + if (name == L"group") element_group (e, true); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'any', 'group', 'choice', 'sequence', " + << "or 'element' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + pop_compositor (); + + return &node; + } + + Void Parser::Impl:: + simple_content (XML::Element const& c) + { + push (c); + + annotation (false); + + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"extension") simple_content_extension (e); else + if (name == L"restriction") simple_content_restriction (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'extension' or 'restriction' instead of " + << "'" << name << "'" << endl; + + valid_ = false; + } + + pop (); + } + + Void Parser::Impl:: + complex_content (XML::Element const& c, Complex& type) + { + if (c.attribute_p ("mixed")) + { + type.mixed_p (c["mixed"] == L"true" || c["mixed"] == L"1"); + } + + push (c); + + annotation (false); + + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"extension") complex_content_extension (e, type); else + if (name == L"restriction") complex_content_restriction (e, type); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'extension' or 'restriction' instead of " + << "'" << name << "'" << endl; + + valid_ = false; + } + + pop (); + } + + Void Parser::Impl:: + simple_content_extension (XML::Element const& e) + { + if (trace_) + wcout << "extension base: " << fq_name (e, e["base"]) << endl; + + set_type (e["base"], e, dynamic_cast (scope ())); + + push (e); + + annotation (false); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + } + + Void Parser::Impl:: + simple_content_restriction (XML::Element const& r) + { + String base (r["base"]); + Type* base_type (0); + + if (trace_ && base) + wcout << "restriction base: " << fq_name (r, base) << endl; + + push (r); + annotation (false); + + if (!base) + { + // Anonymous base type. + // + if (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"simpleType") base_type = simple_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' instead of " + << "'" << e.name () << "'" << endl; + + valid_ = false; + } + } + else + { + wcerr << file () << ":" << r.line () << ":" << r.column () << ": " + << "error: expected 'base' attribute or 'simpleType' " + << "nested element" << endl; + + valid_ = false; + } + + if (!valid_) + { + pop (); + return; + } + } + + Facets facets; + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"simpleType") + { + // This is a "superimposed" restriction where the base + // content is restricted by specifying another simple + // type. The attributes are restricted in the ussual + // way. So in effect we have kind of two base classes. + // I guess the way to handle this one day would be to + // copy all the facets from the base-to-this-type + // part of the hierarchy (will need to "know" facets + // for the built-in type restrictions as well). For + // now just ignore it. + // + } + else if (name == L"enumeration") + { + // Right now our sementic graph cannot represent enumerations + // with attributes so we are going to ignore enumerators for + // now. + // + } + else if (name == L"minExclusive" || + name == L"minInclusive" || + name == L"maxExclusive" || + name == L"maxInclusive" || + name == L"totalDigits" || + name == L"fractionDigits" || + name == L"length" || + name == L"minLength" || + name == L"maxLength" || + name == L"whiteSpace" || + name == L"pattern") + { + facets[name] = e["value"]; + } + else if (name == L"attribute") + { + if (proper_restriction_) + attribute (e, false); + } + else if (name == L"anyAttribute") + { + if (proper_restriction_) + any_attribute (e); + } + else if (name == L"attributeGroup") + { + if (proper_restriction_) + attribute_group (e); + } + else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "' in " + << "simple content restriction" << endl; + + valid_ = false; + } + } + + Complex& type (dynamic_cast (scope ())); + Restricts* restricts = set_type (base, r, type); + + if (!facets.empty ()) + { + if (restricts) + copy_facets (*restricts, facets); + else + type.context ().set ("facets", facets); + } + + pop (); + } + + Void Parser::Impl:: + complex_content_extension (XML::Element const& e, Complex& type) + { + if (trace_) + wcout << "extension base: " << fq_name (e, e["base"]) << endl; + + set_type (e["base"], e, dynamic_cast (scope ())); + + push (e); + + annotation (false); + + if (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + Compositor* c (0); + + if (name == L"all") c = all (e); else + if (name == L"choice") c = choice (e, false); else + if (name == L"sequence") c = sequence (e, false); else + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"group") element_group (e, false); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "'" << endl; + + valid_ = false; + } + + if (c) + s_->new_edge ( + type, *c, parse_min (e["minOccurs"]), parse_max (e["maxOccurs"])); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + } + + pop (); + } + + Void Parser::Impl:: + complex_content_restriction (XML::Element const& e, Complex& type) + { + if (trace_) + wcout << "restriction base: " << fq_name (e, e["base"]) << endl; + + set_type (e["base"], e, dynamic_cast (scope ())); + + // @@ + // For now we simply skip the contents unless the base is anyType + // (or a trivial alias thereof). Checking for the trivial alias + // is further complicated by the fact that it might not be defined + // at this stage (forward inheritnace) so we will ignore that case + // as well for now. + // + if (!proper_restriction_) + { + String base (e["base"]); + String uq_name (unqualified_name (base)); + String ns_name (namespace_name (e, base)); + + if (ns_name != xsd || uq_name != L"anyType") + return; + } + + push (e); + + annotation (false); + + if (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + Compositor* c (0); + + if (name == L"all") c = all (e); else + if (name == L"choice") c = choice (e, false); else + if (name == L"sequence") c = sequence (e, false); else + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"group") element_group (e, false); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unexpected element '" << name << "'" << endl; + + valid_ = false; + } + + if (c) + s_->new_edge ( + type, *c, parse_min (e["minOccurs"]), parse_max (e["maxOccurs"])); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + } + + pop (); + } + + Void Parser::Impl:: + element (XML::Element const& e, Boolean global) + { + Boolean qualified (global ? true : qualify_element_); + + if (String form = e["form"]) + qualified = form == L"qualified"; + + if (trace_) + wcout << "element qualified: " << qualified << endl; + + if (String name = e["name"]) + { + if (trace_) + wcout << "element name '" << name << "'" << endl; + + Element& node ( + s_->new_node ( + file (), e.line (), e.column (), global, qualified)); + + s_->new_edge (scope (), node, name); + + if (qualified) + s_->new_edge (node, cur_ns ()); + + if (!global) + { + s_->new_edge ( + compositor (), node, + parse_min (e["minOccurs"]), parse_max (e["maxOccurs"])); + } + + // Default and fixed values are mutually exclusive. + // + if (e.attribute_p ("fixed")) + node.fixed (e.attribute ("fixed")); + else if (e.attribute_p ("default")) + node.default_ (e.attribute ("default")); + + if (node.default_p ()) + { + node.context ().set ("dom-node", e.dom_element ()); + default_values_.push_back (&node); + } + + if (global) + { + if (String sg = e["substitutionGroup"]) + { + if (trace_) + wcout << "substitutes " << sg << endl; + + try + { + String uq_name (unqualified_name (sg)); + String ns_name (namespace_name (e, sg)); + + node.context ().set ("substitution-ns-name", ns_name); + node.context ().set ("substitution-uq-name", uq_name); + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve namespace prefix '" + << ex.prefix () << "' in '" << sg << "'" << endl; + + valid_ = false; + } + } + } + + if (String type = e["type"]) + { + if (trace_) + wcout << "element type " << fq_name (e, type) << endl; + + set_type (type, e, node); + + // Parse annotation. + // + push (e); + + if (Annotation* a = annotation (true)) + s_->new_edge (*a, node); + + pop (); + } + else + { + // Looks like an anonymous type. + // + push (e); + + if (Annotation* a = annotation (true)) + s_->new_edge (*a, node); + + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + Type* t (0); + + if (name == L"simpleType") t = simple_type (e); else + if (name == L"complexType") t = complex_type (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'simpleType' or 'complexType' " + << "instead of '" << e.name () << "'" << endl; + + valid_ = false; + } + + if (t) + s_->new_edge (node, *t); + } + else + { + // anyType + // + if (!is_disabled ("F001")) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "warning F001: element '" << name << "' is implicitly " + << "of anyType" << endl; + + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "info: did you forget to specify 'type' attribute?" + << endl; + } + + String prefix (ns_prefix (e, xsd)); + type = prefix + (prefix.empty () ? L"" : L":") + L"anyType"; + + set_type (type, e, node); + } + + pop (); + } + } + else if (String ref = e["ref"]) + { + Element& node ( + s_->new_node ( + file (), e.line (), e.column (), true, true)); + + // Ref can only be in compositor. + // + s_->new_edge ( + compositor (), node, + parse_min (e["minOccurs"]), parse_max (e["maxOccurs"])); + + + // Default and fixed values are mutually exclusive. + // + if (e.attribute_p ("fixed")) + node.fixed (e.attribute ("fixed")); + else if (e.attribute_p ("default")) + node.default_ (e.attribute ("default")); + + if (node.default_p ()) + { + node.context ().set ("dom-node", e.dom_element ()); + default_values_.push_back (&node); + } + + // Parse annotation. + // + push (e); + + if (Annotation* a = annotation (true)) + s_->new_edge (*a, node); + + pop (); + + // Try to resolve the prototype. + // + try + { + String uq_name (unqualified_name (ref)); + String ns_name (namespace_name (e, ref)); + + s_->new_edge (scope (), node, uq_name); + + Element& prot (resolve (ns_name, uq_name, *s_, *cache_)); + s_->new_edge (node, prot.namespace_ ()); + + // Copy substitution group information if any. + // + if (prot.context ().count ("substitution-ns-name")) + { + node.context ().set ( + "substitution-ns-name", + prot.context ().get ("substitution-ns-name")); + + node.context ().set ( + "substitution-uq-name", + prot.context ().get ("substitution-uq-name")); + } + + // Transfer default and fixed values if the ref declaration hasn't + // defined its own. + // + if (!node.default_p ()) + { + if (prot.fixed_p ()) + node.fixed (prot.value ()); + else if (prot.default_p ()) + node.default_ (prot.value ()); + + if (node.default_p ()) + { + node.context ().set ( + "dom-node", + prot.context ().get ("dom-node")); + default_values_.push_back (&node); + } + } + + // Transfer annotation if the ref declaration hasn't defined its own. + // + if (!node.annotated_p () && prot.annotated_p ()) + s_->new_edge (prot.annotation (), node); + + // Set type information. + // + if (prot.typed_p ()) + { + s_->new_edge (node, prot.type ()); + } + else if (prot.context ().count ("type-ns-name")) + { + String ns_name (prot.context ().get ("type-ns-name")); + String uq_name (prot.context ().get ("type-uq-name")); + + node.context ().set ("type-ns-name", ns_name); + node.context ().set ("type-uq-name", uq_name); + node.context ().set ("edge-type-id", TypeId (typeid (Belongs))); + + if (trace_) + wcout << "element '" << ref << "' is not typed" << endl + << "deferring resolution until later" << endl; + } + else + { + // This could be a recursive reference to an element who's + // (anonymous) type is being defined. We are going to let + // resolver sort out this case. + // + node.context ().set ("instance-ns-name", ns_name); + node.context ().set ("instance-uq-name", uq_name); + + if (trace_) + wcout << "looks like a recursive reference to an element '" + << ns_name << "#" << uq_name << "' which is being " + << "defined" << endl + << "deferring resolution until later" << endl; + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + + abort (); + } + } + catch (NotName const& ex) + { + node.context ().set ("instance-ns-name", ex.ns ()); + node.context ().set ("instance-uq-name", ex.name ()); + + if (trace_) + wcout << "unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" << endl + << "deferring resolution until later" << endl; + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve namespace prefix '" + << ex.prefix () << "' in '" << ref << "'" << endl; + + valid_ = false; + } + } + else + { + if (valid_) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: 'name' or 'ref' attribute is missing in element " + << "declaration" << endl; + } + } + } + + SemanticGraph::Annotation* Parser::Impl:: + annotation (Boolean process) + { + Annotation* r (0); + + if (more ()) + { + XML::Element e (next ()); + + if (e.name () == L"annotation") + { + if (process) + { + push (e); + + while (more ()) + { + XML::Element doc (next ()); + + if (doc.name () == L"documentation") + { + using Xerces::DOMNode; + using Xerces::DOMText; + using Xerces::DOMElement; + + // Use first non-structured (text only) documentation element. + // + String text; + Boolean struc (false); + DOMElement* de (doc.dom_element()); + + for (DOMNode* n (de->getFirstChild ()); + n != 0 && !struc; + n = n->getNextSibling ()) + { + switch (n->getNodeType ()) + { + case DOMNode::TEXT_NODE: + case DOMNode::CDATA_SECTION_NODE: + { + DOMText* t (static_cast (n)); + text += XML::transcode (t->getData ()); + break; + } + case DOMNode::ELEMENT_NODE: + { + struc = true; + break; + } + default: + break; // ignore + } + } + + if (struc) + continue; + + r = &s_->new_node ( + file (), e.line (), e.column (), text); + break; + } + } + + pop (); + } + } + else + prev (); + } + + return r; + } + + + Void Parser::Impl:: + attribute (XML::Element const& a, Boolean global) + { + Boolean optional (true); + + String use (a["use"]); + + if (use == L"prohibited") + return; + else if (use == L"required") + optional = false; + + Boolean qualified (global ? true : qualify_attribute_); + + if (String form = a["form"]) + qualified = form == L"qualified"; + + if (String name = a["name"]) + { + if (trace_) + wcout << "attribute '" << name << "'" << endl; + + Attribute& node ( + s_->new_node ( + file (), a.line (), a.column (), optional, global, qualified)); + + s_->new_edge (scope (), node, name); + + if (qualified) + s_->new_edge (node, cur_ns ()); + + + // Default and fixed values are mutually exclusive. + // + if (a.attribute_p ("fixed")) + node.fixed (a.attribute ("fixed")); + else if (a.attribute_p ("default")) + node.default_ (a.attribute ("default")); + + if (node.default_p ()) + { + node.context ().set ("dom-node", a.dom_element ()); + default_values_.push_back (&node); + } + + if (String type = a["type"]) + { + if (trace_) + wcout << "attribute type: '" << fq_name (a, type) << "'" << endl; + + set_type (type, a, node); + + // Parse annotation. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge (*ann, node); + + pop (); + } + else + { + // Looks like an anonymous type. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge (*ann, node); + + if (more ()) + { + XML::Element e (next ()); + + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + Type* t (0); + + if (name == L"simpleType") t = simple_type (e); else + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "error: expected 'simpleType' instead of '" << e.name () + << "'" << endl; + + valid_ = false; + } + + if (t) + s_->new_edge (node, *t); + } + else + { + if (!is_disabled ("F002")) + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "warning F002: attribute '" << name << "' is implicitly " + << "of anySimpleType" << endl; + + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "info: did you forget to specify 'type' attribute?" + << endl; + } + + // anySimpleType + // + String prefix (ns_prefix (a, xsd)); + type = prefix + (prefix.empty () ? L"" : L":") + L"anySimpleType"; + + set_type (type, a, node); + } + + pop (); + } + } + else if (String ref = a["ref"]) + { + Attribute& node ( + s_->new_node ( + file (), a.line (), a.column (), optional, true, true)); + + + // Default and fixed values are mutually exclusive. + // + if (a.attribute_p ("fixed")) + node.fixed (a.attribute ("fixed")); + else if (a.attribute_p ("default")) + node.default_ (a.attribute ("default")); + + if (node.default_p ()) + { + node.context ().set ("dom-node", a.dom_element ()); + default_values_.push_back (&node); + } + + // Parse annotation. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge (*ann, node); + + pop (); + + try + { + String uq_name (unqualified_name (ref)); + String ns_name (namespace_name (a, ref)); + + s_->new_edge (scope (), node, uq_name); + + Attribute& prot (resolve (ns_name, uq_name, *s_, *cache_)); + s_->new_edge (node, prot.namespace_ ()); + + // Transfer default and fixed values if the ref declaration hasn't + // defined its own. + // + if (!node.default_p ()) + { + // Default value applies only if this attribute is optional. + // + if (prot.fixed_p ()) + node.fixed (prot.value ()); + else if (optional && prot.default_p ()) + node.default_ (prot.value ()); + + if (node.default_p ()) + { + node.context ().set ( + "dom-node", + prot.context ().get ("dom-node")); + default_values_.push_back (&node); + } + } + + // Transfer annotation if the ref declaration hasn't defined its own. + // + if (!node.annotated_p () && prot.annotated_p ()) + s_->new_edge (prot.annotation (), node); + + // Set type. + // + if (prot.typed_p ()) + { + s_->new_edge (node, prot.type ()); + } + else if (prot.context ().count ("type-ns-name")) + { + String ns_name (prot.context ().get ("type-ns-name")); + String uq_name (prot.context ().get ("type-uq-name")); + + node.context ().set ("type-ns-name", ns_name); + node.context ().set ("type-uq-name", uq_name); + node.context ().set ("edge-type-id", TypeId (typeid (Belongs))); + + if (trace_) + wcout << "attribute '" << ref << "' is not typed" << endl + << "deferring resolution until later" << endl; + } + else + { + // This could be a recursive reference to an attribute who's + // (anonymous) type is being defined. We are going to let + // resolver sort out this case. + // + node.context ().set ("instance-ns-name", ns_name); + node.context ().set ("instance-uq-name", uq_name); + + if (trace_) + wcout << "looks like a recursive reference to an attribute '" + << ns_name << "#" << uq_name << "' which is being " + << "defined" << endl + << "deferring resolution until later" << endl; + } + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (NotName const& ex) + { + node.context ().set ("instance-ns-name", ex.ns ()); + node.context ().set ("instance-uq-name", ex.name ()); + + if (trace_) + wcout << "unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" << endl + << "deferring resolution until later" << endl; + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "error: unable to resolve namespace prefix '" + << ex.prefix () << "' in '" << ref << "'" << endl; + + valid_ = false; + } + } + else + { + if (valid_) + { + wcerr << file () << ":" << a.line () << ":" << a.column () << ": " + << "error: 'name' or 'ref' attribute is missing in attribute " + << "declaration" << endl; + } + } + } + + Void Parser::Impl:: + attribute_group (XML::Element const& g) + { + if (String name = g["name"]) + { + // Global definition. + // + if (trace_) + wcout << "attributeGroup '" << name << "'" << endl; + + AttributeGroup& group ( + s_->new_node (file (), g.line (), g.column ())); + s_->new_edge (scope (), group, name); + + push_scope (group); + push (g); + + annotation (false); + + while (more ()) + { + XML::Element e (next ()); + String name (e.name ()); + + if (trace_) + wcout << name << endl; + + if (name == L"attribute") attribute (e, false); else + if (name == L"anyAttribute") any_attribute (e); else + if (name == L"attributeGroup") attribute_group (e); else + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: expected 'attribute', 'anyAttribute', or " + << "'attributeGroup' instead of '" << name << "'" << endl; + + valid_ = false; + } + } + + pop (); + pop_scope (); + } + else if (String ref = g["ref"]) + { + if (trace_) + wcout << "attribute-group-ref " << ref << endl; + + try + { + String uq_name (unqualified_name (ref)); + String ns_name (namespace_name (g, ref)); + + // In order to avoid code duplication we are going to let the + // resolver handle this case. + // + if (trace_) + wcout << "deferring resolution of group name '" << uq_name + << "' inside namespace '" << ns_name << "'" + << " until later" << endl; + + Scope& s (scope ()); + AttributeGroupRef ref (uq_name, ns_name, s); + + if (!s.context ().count ("attribute-group-refs")) + s.context ().set ("attribute-group-refs", AttributeGroupRefs ()); + + s.context ().get ( + "attribute-group-refs").push_back (ref); + } + catch (NotNamespace const& ex) + { + if (valid_) + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "ice: unable to resolve namespace '" << ex.ns () << "'" + << endl; + abort (); + } + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "error: unable to resolve namespace prefix '" + << ex.prefix () << "' in '" << ref << "'" << endl; + + valid_ = false; + } + } + else + { + wcerr << file () << ":" << g.line () << ":" << g.column () << ": " + << "error: 'name' or 'ref' attribute is missing in " + << "attributeGroup declaration" << endl; + + valid_ = false; + return; + } + } + + Void Parser::Impl:: + any (XML::Element const& a) + { + if (trace_) + wcout << "any" << endl; + + String namespaces (a["namespace"] ? a["namespace"] : L"##any"); + + Any& any ( + s_->new_node (file (), a.line (), a.column (), namespaces)); + + s_->new_edge ( + compositor (), any, + parse_min (a["minOccurs"]), parse_max (a["maxOccurs"])); + + // Parse annotation. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge (*ann, any); + + pop (); + + // Any has no name so we have to come up with a fake one in order to + // put it into the scope. + // + UnsignedLong count; + FrontendElements::Context& ctx (scope ().context ()); + + if (!ctx.count ("any-name-count")) + { + count = 0; + ctx.set ("any-name-count", count); + } + else + count = ++(ctx.get ("any-name-count")); + + std::basic_ostringstream os; + os << "any #" << count; + + s_->new_edge (scope (), any, os.str ()); + } + + Void Parser::Impl:: + any_attribute (XML::Element const& a) + { + if (trace_) + wcout << "anyAttribute" << endl; + + String namespaces (a["namespace"] ? a["namespace"] : L"##any"); + + AnyAttribute& any ( + s_->new_node ( + file (), a.line (), a.column (), namespaces)); + + // Parse annotation. + // + push (a); + + if (Annotation* ann = annotation (true)) + s_->new_edge (*ann, any); + + pop (); + + // AnyAttribute has no name so we have to come up with a fake one + // in order to put it into the scope. + // + + UnsignedLong count; + FrontendElements::Context& ctx (scope ().context ()); + + if (!ctx.count ("any-attribute-name-count")) + { + count = 0; + ctx.set ("any-attribute-name-count", count); + } + else + count = ++(ctx.get ("any-attribute-name-count")); + + std::basic_ostringstream os; + os << "any-attribute #" << count; + + s_->new_edge (scope (), any, os.str ()); + } + + // Some specializations to get edge orientations right. + // + + template + struct Orientation + { + static Edge& + set_edge (Schema& s, Node& node, Type& type) + { + // By default it is node->edge + // + return s.template new_edge (node, type); + } + }; + + template + struct Orientation + { + static Arguments& + set_edge (Schema& s, Node& node, Type& type) + { + // For Arguments it is type->node. + // + return s.template new_edge (type, node); + } + }; + + template + Edge* Parser::Impl:: + set_type (String const& type, XML::Element const& e, Node& node) + { + Edge* r (0); + + try + { + String uq_name (unqualified_name (type)); + String ns_name (namespace_name (e, type)); + + Type& t (resolve (ns_name, uq_name, *s_, *cache_)); + + // See if it is an IDREF specialization. + // + if (ns_name == xsd && (uq_name == L"IDREF" || uq_name == L"IDREFS")) + { + // See if we've got 'xse:refType' attribute. + // + if (String ref_type = e.attribute (xse, "refType")) + { + if (trace_) + wcout << "found refType attribute '" << ref_type << "'" << endl; + + //@@ It is a bit wasteful to create a new spcialization for + // each refType. Instead we could lookup the target type + // and then navigate through Arguments edges to see if this + // type already arguments specialization that we are intersted + // in. But for now I will simplify the logic by creating a new + // specialization every time. + // + + Specialization* spec (0); + + if (uq_name == L"IDREF") + spec = &s_->new_node ( + file (), e.line (), e.column ()); + else + spec = &s_->new_node ( + file (), e.line (), e.column ()); + + r = &Orientation::set_edge (*s_, node, *spec); + + set_type (ref_type, e, *spec); + } + else + r = &Orientation::set_edge (*s_, node, t); + } + else + r = &Orientation::set_edge (*s_, node, t); + } + catch (NotNamespace const& ex) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve namespace '" << ex.ns () << "'" + << endl; + + valid_ = false; + + } + catch (NotName const& ex) + { + node.context ().set ("type-ns-name", ex.ns ()); + node.context ().set ("type-uq-name", ex.name ()); + node.context ().set ("edge-type-id", TypeId (typeid (Edge))); + + if (trace_) + wcout << "unable to resolve name '" << ex.name () + << "' inside namespace '" << ex.ns () << "'" << endl + << "deferring resolution until later" << endl; + } + catch (XML::NoMapping const& ex) + { + wcerr << file () << ":" << e.line () << ":" << e.column () << ": " + << "error: unable to resolve namespace prefix " + << "'" << ex.prefix () << "' in '" << type << "'" << endl; + + valid_ = false; + } + + return r; + } + + // Xerces has a provision to associate a public id with input streams + // that can later be used in diagnostics. Unfortunately, it doesn't + // work. So we will have to keep our own track. + // + struct Context: public NonCopyable + { + // File map for diagnostic. + // + Path const& + file (Path const& abs) const + { + FileMap::ConstIterator i (file_map_.find (abs)); + + if (i != file_map_.end ()) + { + return i->second; + } + else + { + return abs; + } + } + + Void + map_file (Path const& abs, Path const& rel) + { + file_map_[abs] = rel; + } + + private: + typedef Cult::Containers::Map FileMap; + FileMap file_map_; + }; + + // + // + class ErrorHandler : public Xerces::DOMErrorHandler + { + public: + ErrorHandler (Boolean& valid, XSDFrontend::Context const& ctx) + : valid_ (valid), + ctx_ (ctx) + { + } + + virtual Boolean + handleError (Xerces::DOMError const& e) + { + // Xerces likes to say "Fatal error encountered during schema scan". + // We don't need this junk. + // + if (!valid_ + && e.getLocation ()->getLineNumber () == 0 + && e.getLocation ()->getColumnNumber () == 0) + return true; + + +#if !defined(BOOST_FILESYSTEM_VERSION) || BOOST_FILESYSTEM_VERSION == 2 + XSDFrontend::SemanticGraph::Path abs_path ( + XML::transcode_to_narrow (e.getLocation ()->getURI ()), + boost::filesystem::native); +#else + XSDFrontend::SemanticGraph::Path abs_path ( + XML::transcode_to_narrow (e.getLocation ()->getURI ()).c_str()); +#endif + + XSDFrontend::SemanticGraph::Path rel_path (ctx_.file (abs_path)); + + wcerr << rel_path << ':' + << e.getLocation ()->getLineNumber () << ':' + << e.getLocation ()->getColumnNumber () << ": "; + + switch (e.getSeverity ()) + { + case Xerces::DOMError::DOM_SEVERITY_WARNING: + { + wcerr << "warning: "; + break; + } + default: + { + wcerr << "error: "; + valid_ = false; + break; + } + } + + wcerr << e.getMessage () << endl; + + return true; + } + + private: + Boolean& valid_; + XSDFrontend::Context const& ctx_; + }; + + + // Failed to open resource. + // + struct Open {}; + + class InputSource: public Xerces::InputSource + { + public: + InputSource ( + Path const& abs, + Path const& rel, + Path const& base, + XSDFrontend::Context const& ctx, + Xerces::MemoryManager* mm = Xerces::XMLPlatformUtils::fgMemoryManager) + : Xerces::InputSource (mm), + abs_ (abs), + rel_ (rel), + base_ (base), + ctx_ (ctx) + { +#if !defined(BOOST_FILESYSTEM_VERSION) || BOOST_FILESYSTEM_VERSION == 2 + setSystemId (XML::XMLChString ( + String (abs_.native_file_string ())).c_str ()); +#else + setSystemId (XML::XMLChString (String (abs_.string ())).c_str ()); +#endif + } + + virtual Xerces::BinInputStream* + makeStream () const + { + using namespace Xerces; + + BinFileInputStream* is ( + new (getMemoryManager ()) + BinFileInputStream (getSystemId (), getMemoryManager ())); + + if (!is->getIsOpen ()) + { + delete is; + + wcerr << ctx_.file (base_) << ": error: " + << "'" << rel_ << "': unable to open in read mode" + << endl; + + throw Open (); + } + + return is; + } + + private: + Path abs_; + Path rel_; + Path base_; + XSDFrontend::Context const& ctx_; + }; + + + class EntityResolver: public Xerces::XMemory, +#if _XERCES_VERSION >= 30000 + public Xerces::DOMLSResourceResolver +#else + public Xerces::DOMEntityResolver +#endif + { + public: + EntityResolver (XSDFrontend::Context& ctx, LocationTranslator* t) + : ctx_ (ctx), loc_translator_ (t) + { + } + +#if _XERCES_VERSION >= 30000 + virtual Xerces::DOMLSInput* + resolveResource(XMLCh const* const, + XMLCh const* const, + XMLCh const* const /*pub_id*/, + XMLCh const* const prv_id, + XMLCh const* const base_uri) +#else + virtual Xerces::DOMInputSource* + resolveEntity (XMLCh const* const /*pub_id*/, + XMLCh const* const prv_id, + XMLCh const* const base_uri) +#endif + { + /* + XMLCh empty[1]; + empty[0] = 0; + + wcerr << "resolve entity:" << endl + << " pub_id " << (pub_id ? pub_id : empty) << endl + << " prv_id " << (prv_id ? prv_id : empty) << endl + << " uri " << (base_uri ? base_uri : empty) << endl; + */ + + // base_uri should be a valid path by now. + // +#if !defined(BOOST_FILESYSTEM_VERSION) || BOOST_FILESYSTEM_VERSION == 2 + Path base (XML::transcode_to_narrow (base_uri), + boost::filesystem::native); +#else + Path base (XML::transcode_to_narrow (base_uri).c_str()); +#endif + + if (prv_id == 0) + { + //@@ How can I get the line/column numbers for this? + // + wcerr << ctx_.file (base) << ": error: " + << "unable to guess which schema to open" + << endl; + + wcerr << ctx_.file (base) << ": info: " + << "did you forget to specify schemaLocation for import/include?" + << endl; + + throw Open (); + } + + NarrowString path_str (XML::transcode_to_narrow (prv_id)); + + if (loc_translator_) + path_str = loc_translator_->translate (path_str); + + try + { + Path path; + +#if !defined(BOOST_FILESYSTEM_VERSION) || BOOST_FILESYSTEM_VERSION == 2 + try + { + path = Path (path_str); + } + catch (InvalidPath const&) + { + // Retry as a native path. + // + path = Path (path_str, boost::filesystem::native); + } +#else + // The new ABI does not have a fallback native representation + path = Path (path_str.c_str()); +#endif + + Path base_dir (base.branch_path ()); + + Path abs_path, rel_path; + + if (path.is_complete ()) + { + abs_path = rel_path = path; + } + else + { + abs_path = base_dir / path; + rel_path = ctx_.file (base).branch_path () / path; + } + + abs_path.normalize (); + + ctx_.map_file (abs_path, rel_path); + + using namespace Xerces; + + InputSource* is ( + new (XMLPlatformUtils::fgMemoryManager) + InputSource (abs_path, rel_path, base, ctx_)); + + // Note that I can't use XMLPlatformUtils::fgMemoryManager here + // since Wrapper4InputSource is-not-an XMemory. + // + return new Wrapper4InputSource (is); + } + catch (InvalidPath const&) + { + wcerr << ctx_.file (base) << ": error: " + << "'" << path_str.c_str () << "' is not a valid filesystem path" + << endl; + throw; + } + + // Will never reach. + // + return 0; + } + + private: + XSDFrontend::Context& ctx_; + LocationTranslator* loc_translator_; + }; + + + XML::AutoPtr Parser::Impl:: + dom (Path const& tu, Boolean validate) + { + using namespace Xerces; + + try + { + XSDFrontend::Context ctx; + + Path abs_path (system_complete (tu)); + abs_path.normalize (); + ctx.map_file (abs_path, tu); + + InputSource input_source (abs_path, tu, abs_path, ctx); + + // First validate the schema with Xerces. + // + if (validate) + { + // Instantiate the DOM parser. + // + XMLCh const gLS[] = {chLatin_L, chLatin_S, chNull }; + + // Get an implementation of the Load-Store (LS) interface. + // + DOMImplementationLS* impl ( + static_cast ( + DOMImplementationRegistry::getDOMImplementation (gLS))); + + // Create a DOMBuilder. + // +#if _XERCES_VERSION >= 30000 + XML::AutoPtr parser ( + impl->createLSParser (DOMImplementationLS::MODE_SYNCHRONOUS, 0)); + + DOMConfiguration* conf (parser->getDomConfig ()); + + conf->setParameter (XMLUni::fgDOMComments, false); + conf->setParameter (XMLUni::fgDOMDatatypeNormalization, true); + conf->setParameter (XMLUni::fgDOMEntities, false); + conf->setParameter (XMLUni::fgDOMNamespaces, true); + conf->setParameter (XMLUni::fgDOMValidate, true); + conf->setParameter (XMLUni::fgDOMElementContentWhitespace, false); + conf->setParameter (XMLUni::fgXercesSchema, true); + + // Xerces-C++ 3.1.0 is the first version with working multi import + // support. + // +#if _XERCES_VERSION >= 30100 + conf->setParameter (XMLUni::fgXercesHandleMultipleImports, multiple_imports_); +#endif + + conf->setParameter (XMLUni::fgXercesSchemaFullChecking, full_schema_check_); + conf->setParameter (XMLUni::fgXercesValidationErrorAsFatal, true); + + ErrorHandler eh (valid_, ctx); + conf->setParameter (XMLUni::fgDOMErrorHandler, &eh); + + EntityResolver er (ctx, loc_translator_); + conf->setParameter (XMLUni::fgDOMResourceResolver, &er); + + Wrapper4InputSource wrap (&input_source, false); + parser->loadGrammar (&wrap, Grammar::SchemaGrammarType); +#else + XML::AutoPtr parser ( + impl->createDOMBuilder (DOMImplementationLS::MODE_SYNCHRONOUS, 0)); + + parser->setFeature (XMLUni::fgDOMComments, false); + parser->setFeature (XMLUni::fgDOMDatatypeNormalization, true); + parser->setFeature (XMLUni::fgDOMEntities, false); + parser->setFeature (XMLUni::fgDOMNamespaces, true); + parser->setFeature (XMLUni::fgDOMValidation, true); + parser->setFeature (XMLUni::fgDOMWhitespaceInElementContent, false); + parser->setFeature (XMLUni::fgXercesSchema, true); + parser->setFeature (XMLUni::fgXercesSchemaFullChecking, full_schema_check_); + parser->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true); + + ErrorHandler eh (valid_, ctx); + parser->setErrorHandler (&eh); + + EntityResolver er (ctx, loc_translator_); + parser->setEntityResolver (&er); + + Wrapper4InputSource wrap (&input_source, false); + parser->loadGrammar (wrap, Grammar::SchemaGrammarType); +#endif + } + + if (!valid_) + return XML::AutoPtr (0); + + // Now do our own parsing. + // + std::auto_ptr xsd_parser ( + new (XMLPlatformUtils::fgMemoryManager) XML::SchemaDOMParser ()); + + xsd_parser->parse (input_source); + + XML::AutoPtr doc (xsd_parser->adoptDocument()); + + return doc; + } + catch (Xerces::XMLException const& e) + { + wcerr << tu << ": ice: Xerces::XMLException: " << e.getMessage () + << endl; + + abort (); + } + catch (Xerces::DOMException const& e) + { + Size const size = 2047; + XMLCh text[size + 1]; + + wcerr << tu << ": ice: Xerces::DOMException: "; + + if (DOMImplementation::loadDOMExceptionMsg (e.code, text, size)) + wcerr << text << endl; + else + wcerr << "no message available, error code: " << e.code << endl; + + abort (); + } + catch (InvalidPath const&) + { + // Diagnostics has already been issued. + // + valid_ = false; + } + catch (Open const&) + { + // Diagnostics has already been issued. + // + valid_ = false; + } + + return XML::AutoPtr (0); + } + + // LocationTranslator + // + LocationTranslator:: + ~LocationTranslator () + { + } + + // Parser + // + Parser:: + ~Parser () + { + } + + Parser:: + Parser (Boolean proper_restriction, + Boolean multiple_imports, + Boolean full_schema_check) + : impl_ (new Impl (proper_restriction, + multiple_imports, + full_schema_check, + 0, + 0)) + { + } + + Parser:: + Parser (Boolean proper_restriction, + Boolean multiple_imports, + Boolean full_schema_check, + LocationTranslator& t, + const WarningSet& d) + : impl_ (new Impl (proper_restriction, + multiple_imports, + full_schema_check, + &t, + &d)) + { + } + + Evptr Parser:: + parse (SemanticGraph::Path const& path) + { + return impl_->parse (path); + } + + Evptr Parser:: + parse (SemanticGraph::Paths const& paths) + { + return impl_->parse (paths); + } + + Evptr Parser:: + xml_schema (SemanticGraph::Path const& path) + { + return impl_->xml_schema (path); + } +} -- cgit v1.2.3