From 8286ac511144e4f17d34eac9affb97e50646344a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Wed, 23 Jul 2014 15:25:44 +0200 Subject: Imported Upstream version 4.0.0 --- libcutl/cutl/xml/exception.hxx | 19 + libcutl/cutl/xml/parser.cxx | 827 ++++++++++++++++++++++++++++++++++++++ libcutl/cutl/xml/parser.hxx | 419 +++++++++++++++++++ libcutl/cutl/xml/parser.ixx | 74 ++++ libcutl/cutl/xml/parser.txx | 33 ++ libcutl/cutl/xml/qname.cxx | 35 ++ libcutl/cutl/xml/qname.hxx | 79 ++++ libcutl/cutl/xml/serializer.cxx | 258 ++++++++++++ libcutl/cutl/xml/serializer.hxx | 183 +++++++++ libcutl/cutl/xml/serializer.ixx | 75 ++++ libcutl/cutl/xml/value-traits.cxx | 25 ++ libcutl/cutl/xml/value-traits.hxx | 53 +++ libcutl/cutl/xml/value-traits.txx | 35 ++ 13 files changed, 2115 insertions(+) create mode 100644 libcutl/cutl/xml/exception.hxx create mode 100644 libcutl/cutl/xml/parser.cxx create mode 100644 libcutl/cutl/xml/parser.hxx create mode 100644 libcutl/cutl/xml/parser.ixx create mode 100644 libcutl/cutl/xml/parser.txx create mode 100644 libcutl/cutl/xml/qname.cxx create mode 100644 libcutl/cutl/xml/qname.hxx create mode 100644 libcutl/cutl/xml/serializer.cxx create mode 100644 libcutl/cutl/xml/serializer.hxx create mode 100644 libcutl/cutl/xml/serializer.ixx create mode 100644 libcutl/cutl/xml/value-traits.cxx create mode 100644 libcutl/cutl/xml/value-traits.hxx create mode 100644 libcutl/cutl/xml/value-traits.txx (limited to 'libcutl/cutl/xml') diff --git a/libcutl/cutl/xml/exception.hxx b/libcutl/cutl/xml/exception.hxx new file mode 100644 index 0000000..c4d3c0d --- /dev/null +++ b/libcutl/cutl/xml/exception.hxx @@ -0,0 +1,19 @@ +// file : cutl/xml/exception.hxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef CUTL_XML_EXCEPTION_HXX +#define CUTL_XML_EXCEPTION_HXX + +#include +#include + +namespace cutl +{ + namespace xml + { + struct LIBCUTL_EXPORT exception: cutl::exception {}; + } +} + +#endif // CUTL_XML_EXCEPTION_HXX diff --git a/libcutl/cutl/xml/parser.cxx b/libcutl/cutl/xml/parser.cxx new file mode 100644 index 0000000..219fb00 --- /dev/null +++ b/libcutl/cutl/xml/parser.cxx @@ -0,0 +1,827 @@ +// file : cutl/xml/parser.cxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include // std::bad_alloc +#include +#include // std::strchr +#include +#include +#include + +#include + +using namespace std; + +namespace cutl +{ + namespace xml + { + // parsing + // + parsing:: + ~parsing () throw () {} + + parsing:: + parsing (const string& n, + unsigned long long l, + unsigned long long c, + const string& d) + : name_ (n), line_ (l), column_ (c), description_ (d) + { + init (); + } + + parsing:: + parsing (const parser& p, const std::string& d) + : name_ (p.input_name ()), + line_ (p.line ()), + column_ (p.column ()), + description_ (d) + { + init (); + } + + void parsing:: + init () + { + ostringstream os; + if (!name_.empty ()) + os << name_ << ':'; + os << line_ << ':' << column_ << ": error: " << description_; + what_ = os.str (); + } + + char const* parsing:: + what () const throw () + { + return what_.c_str (); + } + + // parser::event_type + // + static const char* parser_event_str[] = + { + "start element", + "end element", + "start attribute", + "end attribute", + "characters", + "start namespace declaration", + "end namespace declaration", + "end of file" + }; + + ostream& + operator<< (ostream& os, parser::event_type e) + { + return os << parser_event_str[e]; + } + + // parser + // + parser:: + ~parser () + { + if (p_ != 0) + XML_ParserFree (p_); + } + + parser:: + parser (istream& is, const string& iname, feature_type f) + : is_ (is), iname_ (iname), feature_ (f), + depth_ (0), state_ (state_next), event_ (eof), queue_ (eof), + pqname_ (&qname_), pvalue_ (&value_), + attr_i_ (0), start_ns_i_ (0), end_ns_i_ (0) + { + if ((feature_ & receive_attributes_map) != 0 && + (feature_ & receive_attributes_event) != 0) + feature_ &= ~receive_attributes_map; + + // Allocate the parser. Make sure nothing else can throw after + // this call since otherwise we will leak it. + // + p_ = XML_ParserCreateNS (0, XML_Char (' ')); + + if (p_ == 0) + throw bad_alloc (); + + // Get prefixes in addition to namespaces and local names. + // + XML_SetReturnNSTriplet (p_, true); + + // Set handlers. + // + XML_SetUserData(p_, this); + + if ((f & receive_elements) != 0) + { + XML_SetStartElementHandler (p_, &start_element_); + XML_SetEndElementHandler (p_, &end_element_); + } + + if ((f & receive_characters) != 0) + XML_SetCharacterDataHandler (p_, &characters_); + + if ((f & receive_namespace_decls) != 0) + XML_SetNamespaceDeclHandler (p_, + &start_namespace_decl_, + &end_namespace_decl_); + } + + void parser:: + handle_error () + { + XML_Error e (XML_GetErrorCode (p_)); + + if (e == XML_ERROR_ABORTED) + { + // For now we only abort the parser in the characters_() handler. + // + switch (content ()) + { + case empty: + throw parsing (*this, "character in empty content"); + case complex: + throw parsing (*this, "character in complex content"); + default: + assert (false); + } + } + else + throw parsing (iname_, + XML_GetCurrentLineNumber (p_), + XML_GetCurrentColumnNumber (p_), + XML_ErrorString (e)); + } + + struct stream_exception_controller + { + ~stream_exception_controller () + { + istream::iostate s = is_.rdstate (); + s &= ~istream::failbit; + + // If our error state (sans failbit) intersects with the + // exception state then that means we have an active + // exception and changing error/exception state will + // cause another to be thrown. + // + if (!(old_state_ & s)) + { + // Clear failbit if it was caused by eof. + // + if (is_.fail () && is_.eof ()) + is_.clear (s); + + is_.exceptions (old_state_); + } + } + + stream_exception_controller (istream& is) + : is_ (is), old_state_ (is_.exceptions ()) + { + is_.exceptions (old_state_ & ~istream::failbit); + } + + private: + stream_exception_controller (const stream_exception_controller&); + + stream_exception_controller& + operator= (const stream_exception_controller&); + + private: + istream& is_; + istream::iostate old_state_; + }; + + const string& parser:: + attribute (const qname_type& qn) const + { + if (const element_entry* e = get_element ()) + { + attribute_map_type::const_iterator i (e->attr_map_.find (qn)); + + if (i != e->attr_map_.end ()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return i->second.value; + } + } + + throw parsing (*this, "attribute '" + qn.string () + "' expected"); + } + + string parser:: + attribute (const qname_type& qn, const string& dv) const + { + if (const element_entry* e = get_element ()) + { + attribute_map_type::const_iterator i (e->attr_map_.find (qn)); + + if (i != e->attr_map_.end ()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return i->second.value; + } + } + + return dv; + } + + bool parser:: + attribute_present (const qname_type& qn) const + { + if (const element_entry* e = get_element ()) + { + attribute_map_type::const_iterator i (e->attr_map_.find (qn)); + + if (i != e->attr_map_.end ()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return true; + } + } + + return false; + } + + void parser:: + next_expect (event_type e) + { + if (next () != e) + throw parsing (*this, string (parser_event_str[e]) + " expected"); + } + + void parser:: + next_expect (event_type e, const string& ns, const string& n) + { + if (next () != e || namespace_ () != ns || name () != n) + throw parsing (*this, + string (parser_event_str[e]) + " '" + + qname_type (ns, n).string () + "' expected"); + } + + const parser::element_entry* parser:: + get_element () const + { + // The start_element_() Expat handler may have already provisioned + // an entry in the element stack. In this case, we need to get the + // one before it, if any. + // + const element_entry* r (0); + element_state::size_type n (element_state_.size ()); + if (n != 0) + { + n--; + if (element_state_[n].depth == depth_) + r = &element_state_[n]; + else if (n != 0 && element_state_[n].depth > depth_) + { + n--; + if (element_state_[n].depth == depth_) + r = &element_state_[n]; + } + } + return r; + } + + void parser:: + pop_element () + { + // Make sure there are no unhandled attributes left. + // + const element_entry& e (element_state_.back ()); + if (e.attr_unhandled_ != 0) + { + // Find the first unhandled attribute and report it. + // + for (attribute_map_type::const_iterator i (e.attr_map_.begin ()); + i != e.attr_map_.end (); ++i) + { + if (!i->second.handled) + throw parsing ( + *this, "unexpected attribute '" + i->first.string () + "'"); + } + assert (false); + } + + element_state_.pop_back (); + } + + parser::event_type parser:: + next_ (bool peek) + { + event_type e (next_body ()); + + // Content-specific processing. Note that we handle characters in the + // characters_() Expat handler for two reasons. Firstly, it is faster + // to ignore the whitespaces at the source. Secondly, this allows us + // to distinguish between element and attribute characters. We can + // move this processing to the handler because the characters event + // is never queued. + // + switch (e) + { + case end_element: + { + // If this is a peek, then avoid popping the stack just yet. + // This way, the attribute map will still be valid until we + // call next(). + // + if (!peek) + { + if (!element_state_.empty () && + element_state_.back ().depth == depth_) + pop_element (); + + depth_--; + } + break; + } + case start_element: + { + const element_entry* e (get_element ()); + switch (e != 0 ? e->content : mixed) + { + case empty: + throw parsing (*this, "element in empty content"); + case simple: + throw parsing (*this, "element in simple content"); + default: + break; + } + + // If this is a peek, then delay adjusting the depth. + // + if (!peek) + depth_++; + + break; + } + default: + break; + } + + return e; + } + + parser::event_type parser:: + next_body () + { + // See if we have any start namespace declarations we need to return. + // + if (start_ns_i_ < start_ns_.size ()) + { + // Based on the previous event determine what's the next one must be. + // + switch (event_) + { + case start_namespace_decl: + { + if (++start_ns_i_ == start_ns_.size ()) + { + start_ns_i_ = 0; + start_ns_.clear (); + pqname_ = &qname_; + break; // No more declarations. + } + // Fall through. + } + case start_element: + { + event_ = start_namespace_decl; + pqname_ = &start_ns_[start_ns_i_]; + return event_; + } + default: + { + assert (false); + return event_ = eof; + } + } + } + + // See if we have any attributes we need to return as events. + // + if (attr_i_ < attr_.size ()) + { + // Based on the previous event determine what's the next one must be. + // + switch (event_) + { + case start_attribute: + { + event_ = characters; + pvalue_ = &attr_[attr_i_].value; + return event_; + } + case characters: + { + event_ = end_attribute; // Name is already set. + return event_; + } + case end_attribute: + { + if (++attr_i_ == attr_.size ()) + { + attr_i_ = 0; + attr_.clear (); + pqname_ = &qname_; + pvalue_ = &value_; + break; // No more attributes. + } + // Fall through. + } + case start_element: + case start_namespace_decl: + { + event_ = start_attribute; + pqname_ = &attr_[attr_i_].qname; + return event_; + } + default: + { + assert (false); + return event_ = eof; + } + } + } + + // See if we have any end namespace declarations we need to return. + // + if (end_ns_i_ < end_ns_.size ()) + { + // Based on the previous event determine what's the next one must be. + // + switch (event_) + { + case end_namespace_decl: + { + if (++end_ns_i_ == end_ns_.size ()) + { + end_ns_i_ = 0; + end_ns_.clear (); + pqname_ = &qname_; + break; // No more declarations. + } + // Fall through. + } + // The end namespace declaration comes before the end element + // which means it can follow pretty much any other event. + // + default: + { + event_ = end_namespace_decl; + pqname_ = &end_ns_[end_ns_i_]; + return event_; + } + } + } + + // Check the queue. + // + if (queue_ != eof) + { + event_ = queue_; + queue_ = eof; + return event_; + } + + XML_ParsingStatus ps; + XML_GetParsingStatus (p_, &ps); + + switch (ps.parsing) + { + case XML_INITIALIZED: + { + // As if we finished the previous chunk. + break; + } + case XML_PARSING: + { + assert (false); + return event_ = eof; + } + case XML_FINISHED: + { + return event_ = eof; + } + case XML_SUSPENDED: + { + switch (XML_ResumeParser (p_)) + { + case XML_STATUS_SUSPENDED: + { + // If the parser is again in the suspended state, then + // that means we have the next event. + // + return event_; + } + case XML_STATUS_OK: + { + // Otherwise, we need to get and parse the next chunk of data + // unless this was the last chunk, in which case this is eof. + // + if (ps.finalBuffer) + return event_ = eof; + + break; + } + case XML_STATUS_ERROR: + handle_error (); + } + + break; + } + } + + // Get and parse the next chunk of data until we get the next event + // or reach eof. + // + event_ = eof; + XML_Status s; + do + { + const size_t cap (4096); + + char* b (static_cast (XML_GetBuffer (p_, cap))); + if (b == 0) + throw bad_alloc (); + + // Temporarily unset the exception failbit. Also clear the fail bit + // when we reset the old state if it was caused by eof. + // + { + stream_exception_controller sec (is_); + is_.read (b, static_cast (cap)); + } + + s = XML_ParseBuffer (p_, static_cast (is_.gcount ()), is_.eof ()); + + if (s == XML_STATUS_ERROR) + handle_error (); + + } while (s != XML_STATUS_SUSPENDED && !is_.eof ()); + + return event_; + } + + static void + split_name (const XML_Char* s, qname& qn) + { + string& ns (qn.namespace_ ()); + string& name (qn.name ()); + string& prefix (qn.prefix ()); + + const char* p (strchr (s, ' ')); + + if (p == 0) + { + ns.clear (); + name = s; + prefix.clear (); + } + else + { + ns.assign (s, 0, p - s); + + s = p + 1; + p = strchr (s, ' '); + + if (p == 0) + { + name = s; + prefix.clear (); + } + else + { + name.assign (s, 0, p - s); + prefix = p + 1; + } + } + } + + void XMLCALL parser:: + start_element_ (void* v, const XML_Char* name, const XML_Char** atts) + { + parser& p (*static_cast (v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus (p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + // Cannot be a followup event. + // + assert (ps.parsing == XML_PARSING); + + p.event_ = start_element; + split_name (name, p.qname_); + + p.line_ = XML_GetCurrentLineNumber (p.p_); + p.column_ = XML_GetCurrentColumnNumber (p.p_); + + // Handle attributes. + // + if (*atts != 0) + { + bool am ((p.feature_ & receive_attributes_map) != 0); + bool ae ((p.feature_ & receive_attributes_event) != 0); + + // Provision an entry for this element. + // + element_entry* pe (0); + if (am) + { + p.element_state_.push_back (element_entry (p.depth_ + 1)); + pe = &p.element_state_.back (); + } + + if (am || ae) + { + for (; *atts != 0; atts += 2) + { + if (am) + { + qname_type qn; + split_name (*atts, qn); + attribute_map_type::value_type v (qn, attribute_value_type ()); + v.second.value = *(atts + 1); + v.second.handled = false; + pe->attr_map_.insert (v); + } + else + { + p.attr_.push_back (attribute_type ()); + split_name (*atts, p.attr_.back ().qname); + p.attr_.back ().value = *(atts + 1); + } + } + + if (am) + pe->attr_unhandled_ = pe->attr_map_.size (); + } + } + + XML_StopParser (p.p_, true); + } + + void XMLCALL parser:: + end_element_ (void* v, const XML_Char* name) + { + parser& p (*static_cast (v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus (p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + // This can be a followup event for empty elements (). In this + // case the element name is already set. + // + if (ps.parsing != XML_PARSING) + p.queue_ = end_element; + else + { + // We may also have the end namespace declaration events which + // should come before the end element. If that's the case, then + // queue the end element and return the end namespace as the next + // event. + // + if (p.end_ns_i_ < p.end_ns_.size ()) + { + p.event_ = end_namespace_decl; + p.queue_ = end_element; + } + else + p.event_ = end_element; + + split_name (name, p.qname_); + + p.line_ = XML_GetCurrentLineNumber (p.p_); + p.column_ = XML_GetCurrentColumnNumber (p.p_); + + XML_StopParser (p.p_, true); + } + } + + void XMLCALL parser:: + characters_ (void* v, const XML_Char* s, int n) + { + parser& p (*static_cast (v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus (p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + // If this is empty or complex content, see if these are whitespaces. + // + switch (p.content ()) + { + case empty: + case complex: + { + for (int i (0); i != n; ++i) + { + char c (s[i]); + if (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09) + continue; + + // It would have been easier to throw the exception directly, + // however, the Expat code is most likely not exception safe. + // + p.line_ = XML_GetCurrentLineNumber (p.p_); + p.column_ = XML_GetCurrentColumnNumber (p.p_); + XML_StopParser (p.p_, false); + break; + } + return; + } + default: + break; + } + + // This can be a followup event for another character event. In + // this case simply append the data. + // + if (ps.parsing != XML_PARSING) + { + assert (p.event_ == characters); + p.value_.append (s, n); + } + else + { + p.event_ = characters; + p.value_.assign (s, n); + + p.line_ = XML_GetCurrentLineNumber (p.p_); + p.column_ = XML_GetCurrentColumnNumber (p.p_); + + XML_StopParser (p.p_, true); + } + } + + void XMLCALL parser:: + start_namespace_decl_ (void* v, const XML_Char* prefix, const XML_Char* ns) + { + parser& p (*static_cast (v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus (p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + p.start_ns_.push_back (qname_type ()); + p.start_ns_.back ().prefix () = (prefix != 0 ? prefix : ""); + p.start_ns_.back ().namespace_ () = (ns != 0 ? ns : ""); + } + + void XMLCALL parser:: + end_namespace_decl_ (void* v, const XML_Char* prefix) + { + parser& p (*static_cast (v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus (p.p_, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + p.end_ns_.push_back (qname_type ()); + p.end_ns_.back ().prefix () = (prefix != 0 ? prefix : ""); + } + } +} diff --git a/libcutl/cutl/xml/parser.hxx b/libcutl/cutl/xml/parser.hxx new file mode 100644 index 0000000..5c3c959 --- /dev/null +++ b/libcutl/cutl/xml/parser.hxx @@ -0,0 +1,419 @@ +// file : cutl/xml/parser.hxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef CUTL_XML_PARSER_HXX +#define CUTL_XML_PARSER_HXX + +#include +#include +#include +#include +#include // std::size_t +#include + +#include // LIBCUTL_EXTERNAL_EXPAT + +#ifndef LIBCUTL_EXTERNAL_EXPAT +# include +#else +# include +#endif + +// We only support UTF-8 expat. +// +#ifdef XML_UNICODE +# error UTF-16 expat (XML_UNICODE defined) is not supported +#endif + +#include +#include + +#include + +namespace cutl +{ + namespace xml + { + class parser; + + struct LIBCUTL_EXPORT parsing: exception + { + virtual + ~parsing () throw (); + + parsing (const std::string& name, + unsigned long long line, + unsigned long long column, + const std::string& description); + + parsing (const parser&, const std::string& description); + + const std::string& + name () const {return name_;} + + unsigned long long + line () const {return line_;} + + unsigned long long + column () const {return column_;} + + const std::string& + description () const {return description_;} + + virtual const char* + what () const throw (); + + private: + void + init (); + + private: + std::string name_; + unsigned long long line_; + unsigned long long column_; + std::string description_; + std::string what_; + }; + + class LIBCUTL_EXPORT parser + { + public: + ~parser (); + + typedef xml::qname qname_type; + typedef unsigned short feature_type; + + // If both receive_attributes_event and receive_attributes_map are + // specified, then receive_attributes_event is assumed. + // + static const feature_type receive_elements = 0x0001; + static const feature_type receive_characters = 0x0002; + static const feature_type receive_attributes_map = 0x0004; + static const feature_type receive_attributes_event = 0x0008; + static const feature_type receive_namespace_decls = 0x0010; + + static const feature_type receive_default = receive_elements | + receive_characters | + receive_attributes_map; + + // Parse std::istream. Input name is used in diagnostics to identify + // the document being parsed. std::ios_base::failure exception is + // used to report io errors (badbit and failbit). + // + parser (std::istream&, + const std::string& input_name, + feature_type = receive_default); + + const std::string& + input_name () const {return iname_;} + + // Parsing events. + // + public: + enum event_type + { + // If adding new events, also update the stream insertion operator. + // + start_element, + end_element, + start_attribute, + end_attribute, + characters, + start_namespace_decl, + end_namespace_decl, + eof + }; + + event_type + next () + { + if (state_ == state_next) + return next_ (false); + else + { + // If we previously peeked at start/end_element, then adjust + // state accordingly. + // + switch (event_) + { + case end_element: + { + if (!element_state_.empty () && + element_state_.back ().depth == depth_) + pop_element (); + + depth_--; + break; + } + case start_element: + { + depth_++; + break; + } + default: + break; + } + + state_ = state_next; + return event_; + } + } + + // Get the next event and make sure that it's what's expected. If it + // is not, then throw an appropriate parsing exception. + // + void + next_expect (event_type); + + void + next_expect (event_type, const qname_type& qname); + + void + next_expect (event_type, const std::string& name); + + void + next_expect (event_type, const std::string& ns, const std::string& name); + + event_type + peek () + { + if (state_ == state_peek) + return event_; + else + { + event_type e (next_ (true)); + state_ = state_peek; // Set it after the call to next_(). + return e; + } + } + + // Return the even that was last returned by the call to next() or + // peek(). + // + event_type + event () {return event_;} + + // Event data. + // + public: + const qname_type& qname () const {return *pqname_;} + + const std::string& namespace_ () const {return pqname_->namespace_ ();} + const std::string& name () const {return pqname_->name ();} + const std::string& prefix () const {return pqname_->prefix ();} + + const std::string& value () const {return *pvalue_;} + + unsigned long long line () const {return line_;} + unsigned long long column () const {return column_;} + + // Attribute map lookup. If attribute is not found, then the version + // without the default value throws an appropriate parsing exception + // while the version with the default value returns that value. + // + // Note also that there is no attribute(ns,name) version since it + // would conflict with attribute(name,dv) (qualified attributes + // are not very common). + // + // Attribute map is valid throughout at the "element level" until + // end_element and not just during start_element. As a special case, + // the map is still valid after peek() that returned end_element until + // this end_element event is retrieved with next(). + // + const std::string& + attribute (const std::string& name) const; + + template + T + attribute (const std::string& name) const; + + std::string + attribute (const std::string& name, const std::string& dv) const; + + template + T + attribute (const std::string& name, const T& dv) const; + + const std::string& + attribute (const qname_type& qname) const; + + template + T + attribute (const qname_type& qname) const; + + std::string + attribute (const qname_type& qname, const std::string& dv) const; + + template + T + attribute (const qname_type& qname, const T& dv) const; + + bool + attribute_present (const std::string& name) const; + + bool + attribute_present (const qname_type& qname) const; + + // Low-level attribute map access. Note that this API assumes + // all attributes are handled. + // + struct attribute_value_type + { + std::string value; + mutable bool handled; + }; + + typedef std::map attribute_map_type; + + const attribute_map_type& + attribute_map () const; + + // Optional content processing. + // + public: + enum content_type + { + // element characters whitespaces + empty, // no no ignored + simple, // no yes preserved + complex, // yes no ignored + mixed // yes yes preserved + }; + + // Note that you cannot get/set content while peeking. + // + void + content (content_type c) + { + assert (state_ == state_next); + + if (!element_state_.empty () && element_state_.back ().depth == depth_) + element_state_.back ().content = c; + else + element_state_.push_back (element_entry (depth_, c)); + } + + content_type + content () const + { + assert (state_ == state_next); + + return + !element_state_.empty () && element_state_.back ().depth == depth_ + ? element_state_.back ().content + : mixed; + } + + private: + static void XMLCALL + start_element_ (void*, const XML_Char*, const XML_Char**); + + static void XMLCALL + end_element_ (void*, const XML_Char*); + + static void XMLCALL + characters_ (void*, const XML_Char*, int); + + static void XMLCALL + start_namespace_decl_ (void*, const XML_Char*, const XML_Char*); + + static void XMLCALL + end_namespace_decl_ (void*, const XML_Char*); + + private: + event_type + next_ (bool peek); + + event_type + next_body (); + + void + handle_error (); + + private: + std::istream& is_; + const std::string iname_; + feature_type feature_; + + XML_Parser p_; + std::size_t depth_; + enum {state_next, state_peek} state_; + event_type event_; + event_type queue_; + + qname_type qname_; + std::string value_; + + // These are used to avoid copying when we are handling attributes + // and namespace decls. + // + const qname_type* pqname_; + const std::string* pvalue_; + + unsigned long long line_; + unsigned long long column_; + + // Attributes as events. + // + struct attribute_type + { + qname_type qname; + std::string value; + }; + + typedef std::vector attributes; + + attributes attr_; + attributes::size_type attr_i_; // Index of the current attribute. + + // Namespace declarations. + // + typedef std::vector namespace_decls; + + namespace_decls start_ns_; + namespace_decls::size_type start_ns_i_; // Index of the current decl. + + namespace_decls end_ns_; + namespace_decls::size_type end_ns_i_; // Index of the current decl. + + // Element state consisting of the content model and attribute map. + // + struct element_entry + { + element_entry (std::size_t d, content_type c = mixed) + : depth (d), content (c), attr_unhandled_ (0) {} + + std::size_t depth; + content_type content; + attribute_map_type attr_map_; + mutable attribute_map_type::size_type attr_unhandled_; + }; + + typedef std::vector element_state; + std::vector element_state_; + + // Empty attribute map to return when an element has no attributes. + // + const attribute_map_type empty_attr_map_; + + // Return the element entry corresponding to the current depth, if + // exists, and NULL otherwise. + // + const element_entry* + get_element () const; + + void + pop_element (); + }; + + LIBCUTL_EXPORT + std::ostream& + operator<< (std::ostream&, parser::event_type); + } +} + +#include +#include + +#endif // CUTL_XML_PARSER_HXX diff --git a/libcutl/cutl/xml/parser.ixx b/libcutl/cutl/xml/parser.ixx new file mode 100644 index 0000000..65834b6 --- /dev/null +++ b/libcutl/cutl/xml/parser.ixx @@ -0,0 +1,74 @@ +// file : cutl/xml/parser.ixx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include + +namespace cutl +{ + namespace xml + { + inline const std::string& parser:: + attribute (const std::string& n) const + { + return attribute (qname_type (n)); + } + + template + inline T parser:: + attribute (const std::string& n) const + { + return attribute (qname_type (n)); + } + + inline std::string parser:: + attribute (const std::string& n, const std::string& dv) const + { + return attribute (qname_type (n), dv); + } + + template + inline T parser:: + attribute (const std::string& n, const T& dv) const + { + return attribute (qname_type (n), dv); + } + + template + inline T parser:: + attribute (const qname_type& qn) const + { + return value_traits::parse (attribute (qn), *this); + } + + inline bool parser:: + attribute_present (const std::string& n) const + { + return attribute_present (qname_type (n)); + } + + inline const parser::attribute_map_type& parser:: + attribute_map () const + { + if (const element_entry* e = get_element ()) + { + e->attr_unhandled_ = 0; // Assume all handled. + return e->attr_map_; + } + + return empty_attr_map_; + } + + inline void parser:: + next_expect (event_type e, const qname_type& qn) + { + return next_expect (e, qn.namespace_ (), qn.name ()); + } + + inline void parser:: + next_expect (event_type e, const std::string& n) + { + return next_expect (e, std::string (), n); + } + } +} diff --git a/libcutl/cutl/xml/parser.txx b/libcutl/cutl/xml/parser.txx new file mode 100644 index 0000000..8189883 --- /dev/null +++ b/libcutl/cutl/xml/parser.txx @@ -0,0 +1,33 @@ +// file : cutl/xml/parser.txx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include + +namespace cutl +{ + namespace xml + { + template + T parser:: + attribute (const qname_type& qn, const T& dv) const + { + if (const element_entry* e = get_element ()) + { + attribute_map_type::const_iterator i (e->attr_map_.find (qn)); + + if (i != e->attr_map_.end ()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return value_traits::parse (i->second.value, *this); + } + } + + return dv; + } + } +} diff --git a/libcutl/cutl/xml/qname.cxx b/libcutl/cutl/xml/qname.cxx new file mode 100644 index 0000000..a32add2 --- /dev/null +++ b/libcutl/cutl/xml/qname.cxx @@ -0,0 +1,35 @@ +// file : cutl/xml/qname.cxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include + +#include + +using namespace std; + +namespace cutl +{ + namespace xml + { + string qname:: + string () const + { + std::string r; + if (!ns_.empty ()) + { + r += ns_; + r += '#'; + } + + r += name_; + return r; + } + + ostream& + operator<< (ostream& os, const qname& qn) + { + return os << qn.string (); + } + } +} diff --git a/libcutl/cutl/xml/qname.hxx b/libcutl/cutl/xml/qname.hxx new file mode 100644 index 0000000..0964705 --- /dev/null +++ b/libcutl/cutl/xml/qname.hxx @@ -0,0 +1,79 @@ +// file : cutl/xml/qname.hxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef CUTL_XML_QNAME_HXX +#define CUTL_XML_QNAME_HXX + +#include +#include + +#include + +namespace cutl +{ + namespace xml + { + // Note that the optional prefix is just a "syntactic sugar". In + // particular, it is ignored by the comparison operators and the + // std::ostream insertion operator. + // + class LIBCUTL_EXPORT qname + { + public: + qname () {} + qname (const std::string& name): name_ (name) {} + qname (const std::string& ns, const std::string& name) + : ns_ (ns), name_ (name) {} + qname (const std::string& ns, + const std::string& name, + const std::string& prefix) + : ns_ (ns), name_ (name), prefix_ (prefix) {} + + const std::string& namespace_ () const {return ns_;} + const std::string& name () const {return name_;} + const std::string& prefix () const {return prefix_;} + + std::string& namespace_ () {return ns_;} + std::string& name () {return name_;} + std::string& prefix () {return prefix_;} + + // Printable representation in the [#] form. + // + std::string + string () const; + + // Note that comparison operators + // + public: + friend bool + operator< (const qname& x, const qname& y) + { + return x.ns_ < y.ns_ || (x.ns_ == y.ns_ && x.name_ < y.name_); + } + + friend bool + operator== (const qname& x, const qname& y) + { + return x.ns_ == y.ns_ && x.name_ == y.name_; + } + + friend bool + operator!= (const qname& x, const qname& y) + { + return !(x == y); + } + + private: + std::string ns_; + std::string name_; + std::string prefix_; + }; + + LIBCUTL_EXPORT + std::ostream& + operator<< (std::ostream&, const qname&); + } +} + +#endif // CUTL_XML_QNAME_HXX diff --git a/libcutl/cutl/xml/serializer.cxx b/libcutl/cutl/xml/serializer.cxx new file mode 100644 index 0000000..8da3df5 --- /dev/null +++ b/libcutl/cutl/xml/serializer.cxx @@ -0,0 +1,258 @@ +// file : cutl/xml/serializer.cxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include // std::bad_alloc +#include // std::strlen + +#include + +using namespace std; + +namespace cutl +{ + namespace xml + { + // serialization + // + serialization:: + ~serialization () throw () {} + + serialization:: + serialization (const string& n, const string& d) + : name_ (n), description_ (d) + { + init (); + } + + serialization:: + serialization (const serializer& s, const std::string& d) + : name_ (s.output_name ()), description_ (d) + { + init (); + } + + void serialization:: + init () + { + if (!name_.empty ()) + { + what_ += name_; + what_ += ": "; + } + + what_ += "error: "; + what_ += description_; + } + + char const* serialization:: + what () const throw () + { + return what_.c_str (); + } + + // serializer + // + extern "C" genxStatus + genx_write (void* p, constUtf8 us) + { + // It would have been easier to throw the exception directly, + // however, the Genx code is most likely not exception safe. + // + ostream* os (static_cast (p)); + const char* s (reinterpret_cast (us)); + os->write (s, static_cast (strlen (s))); + return os->good () ? GENX_SUCCESS : GENX_IO_ERROR; + } + + extern "C" genxStatus + genx_write_bound (void* p, constUtf8 start, constUtf8 end) + { + ostream* os (static_cast (p)); + const char* s (reinterpret_cast (start)); + streamsize n (static_cast (end - start)); + os->write (s, n); + return os->good () ? GENX_SUCCESS : GENX_IO_ERROR; + } + + extern "C" genxStatus + genx_flush (void* p) + { + ostream* os (static_cast (p)); + os->flush (); + return os->good () ? GENX_SUCCESS : GENX_IO_ERROR; + } + + serializer:: + ~serializer () + { + if (s_ != 0) + genxDispose (s_); + } + + serializer:: + serializer (ostream& os, const string& oname, unsigned short ind) + : os_ (os), os_state_ (os.exceptions ()), oname_ (oname), depth_ (0) + { + // Temporarily disable exceptions on the stream. + // + os_.exceptions (ostream::goodbit); + + // Allocate the serializer. Make sure nothing else can throw after + // this call since otherwise we will leak it. + // + s_ = genxNew (0, 0, 0); + + if (s_ == 0) + throw bad_alloc (); + + genxSetUserData (s_, &os_); + + if (ind != 0) + genxSetPrettyPrint (s_, ind); + + sender_.send = &genx_write; + sender_.sendBounded = &genx_write_bound; + sender_.flush = &genx_flush; + + if (genxStatus e = genxStartDocSender (s_, &sender_)) + { + string m (genxGetErrorMessage (s_, e)); + genxDispose (s_); + throw serialization (oname, m); + } + } + + void serializer:: + handle_error (genxStatus e) + { + switch (e) + { + case GENX_ALLOC_FAILED: + throw bad_alloc (); + case GENX_IO_ERROR: + // Restoring the original exception state should trigger the + // exception. If it doesn't (e.g., because the user didn't + // configure the stream to throw), then fall back to the + // serialiation exception. + // + os_.exceptions (os_state_); + // Fall through. + default: + throw serialization (oname_, genxGetErrorMessage (s_, e)); + } + } + + void serializer:: + start_element (const string& ns, const string& name) + { + if (genxStatus e = genxStartElementLiteral ( + s_, + reinterpret_cast (ns.empty () ? 0 : ns.c_str ()), + reinterpret_cast (name.c_str ()))) + handle_error (e); + + depth_++; + } + + void serializer:: + end_element () + { + if (genxStatus e = genxEndElement (s_)) + handle_error (e); + + // Call EndDocument() if we are past the root element. + // + if (--depth_ == 0) + { + if (genxStatus e = genxEndDocument (s_)) + handle_error (e); + + // Also restore the original exception state on the stream. + // + os_.exceptions (os_state_); + } + } + + void serializer:: + start_attribute (const string& ns, const string& name) + { + if (genxStatus e = genxStartAttributeLiteral ( + s_, + reinterpret_cast (ns.empty () ? 0 : ns.c_str ()), + reinterpret_cast (name.c_str ()))) + handle_error (e); + } + + void serializer:: + end_attribute () + { + if (genxStatus e = genxEndAttribute (s_)) + handle_error (e); + } + + void serializer:: + attribute (const string& ns, + const string& name, + const string& value) + { + if (genxStatus e = genxAddAttributeLiteral ( + s_, + reinterpret_cast (ns.empty () ? 0 : ns.c_str ()), + reinterpret_cast (name.c_str ()), + reinterpret_cast (value.c_str ()))) + handle_error (e); + } + + void serializer:: + characters (const string& value) + { + if (genxStatus e = genxAddCountedText ( + s_, + reinterpret_cast (value.c_str ()), + static_cast (value.size ()))) + handle_error (e); + } + + void serializer:: + namespace_decl (const string& ns, const string& p) + { + if (genxStatus e = ns.empty () && p.empty () + ? genxUnsetDefaultNamespace (s_) + : genxAddNamespaceLiteral ( + s_, + reinterpret_cast (ns.c_str ()), + reinterpret_cast (p.c_str ()))) + handle_error (e); + } + + void serializer:: + xml_decl (const string& ver, const string& enc, const string& stl) + { + if (genxStatus e = genxXmlDeclaration ( + s_, + reinterpret_cast (ver.c_str ()), + (enc.empty () ? 0 : reinterpret_cast (enc.c_str ())), + (stl.empty () ? 0 : reinterpret_cast (stl.c_str ())))) + handle_error (e); + } + + bool serializer:: + lookup_namespace_prefix (const string& ns, string& p) + { + // Currently Genx will create a namespace mapping if one doesn't + // already exist. + // + genxStatus e; + genxNamespace gns ( + genxDeclareNamespace ( + s_, reinterpret_cast (ns.c_str ()), 0, &e)); + + if (e != GENX_SUCCESS) + handle_error (e); + + p = reinterpret_cast (genxGetNamespacePrefix (gns)); + return true; + } + } +} diff --git a/libcutl/cutl/xml/serializer.hxx b/libcutl/cutl/xml/serializer.hxx new file mode 100644 index 0000000..88164e2 --- /dev/null +++ b/libcutl/cutl/xml/serializer.hxx @@ -0,0 +1,183 @@ +// file : cutl/xml/serializer.hxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef CUTL_XML_SERIALIZER_HXX +#define CUTL_XML_SERIALIZER_HXX + +#include +#include +#include // std::size_t + +#include + +#include +#include + +#include + +namespace cutl +{ + namespace xml + { + class serializer; + + struct LIBCUTL_EXPORT serialization: exception + { + virtual + ~serialization () throw (); + + serialization (const std::string& name, + const std::string& description); + + serialization (const serializer&, const std::string& description); + + const std::string& + name () const {return name_;} + + const std::string& + description () const {return description_;} + + virtual const char* + what () const throw (); + + private: + void + init (); + + private: + std::string name_; + std::string description_; + std::string what_; + }; + + class LIBCUTL_EXPORT serializer + { + public: + ~serializer (); + + typedef xml::qname qname_type; + + // Serialize to std::ostream. Output name is used in diagnostics to + // identify the document being serialized. std::ios_base::failure + // exception is used to report io errors (badbit and failbit). The + // indentation argument specifies the number of indentation spaces + // that should be used for pretty-printing. If 0 is passed, no + // pretty-printing is performed. + // + serializer (std::ostream&, + const std::string& output_name, + unsigned short indentation = 2); + + const std::string& + output_name () const {return oname_;} + + // Serialization functions. + // + public: + + // Elements. + // + void + start_element (const qname_type& qname); + + void + start_element (const std::string& name); + + void + start_element (const std::string& ns, const std::string& name); + + void + end_element (); + + // Attributes. + // + void + start_attribute (const qname_type& qname); + + void + start_attribute (const std::string& name); + + void + start_attribute (const std::string& ns, const std::string& name); + + void + end_attribute (); + + void + attribute (const qname_type& qname, const std::string& value); + + template + void + attribute (const qname_type& qname, const T& value); + + void + attribute (const std::string& name, const std::string& value); + + template + void + attribute (const std::string& name, const T& value); + + void + attribute (const std::string& ns, + const std::string& name, + const std::string& value); + + template + void + attribute (const std::string& ns, + const std::string& name, + const T& value); + + // Characters. + // + void + characters (const std::string& value); + + template + void + characters (const T& value); + + // Namespaces declaration. If prefix is empty, then the default + // namespace is declared. If both prefix and namespace are empty, + // then the default namespace declaration is cleared (xmlns=""). + // + void + namespace_decl (const std::string& ns, const std::string& prefix); + + // XML Declaration. If encoding or standalone are not specified, + // then these attributes are omitted from the output. + // + void + xml_decl (const std::string& version = "1.0", + const std::string& encoding = "UTF-8", + const std::string& standalone = ""); + + // Other functions. + // + public: + // Return true if there is a mapping. In this case, prefix contains + // the mapped prefix. + // + bool + lookup_namespace_prefix (const std::string& ns, std::string& prefix); + + private: + void + handle_error (genxStatus); + + private: + std::ostream& os_; + std::ostream::iostate os_state_; // Original exception state. + const std::string oname_; + + genxWriter s_; + genxSender sender_; + std::size_t depth_; + }; + } +} + +#include + +#endif // CUTL_XML_SERIALIZER_HXX diff --git a/libcutl/cutl/xml/serializer.ixx b/libcutl/cutl/xml/serializer.ixx new file mode 100644 index 0000000..11ff33a --- /dev/null +++ b/libcutl/cutl/xml/serializer.ixx @@ -0,0 +1,75 @@ +// file : cutl/xml/serializer.ixx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include + +namespace cutl +{ + namespace xml + { + inline void serializer:: + start_element (const qname_type& qname) + { + start_element (qname.namespace_ (), qname.name ()); + } + + inline void serializer:: + start_element (const std::string& name) + { + start_element (std::string (), name); + } + + inline void serializer:: + start_attribute (const qname_type& qname) + { + start_attribute (qname.namespace_ (), qname.name ()); + } + + inline void serializer:: + start_attribute (const std::string& name) + { + start_attribute (std::string (), name); + } + + inline void serializer:: + attribute (const qname_type& qname, const std::string& value) + { + attribute (qname.namespace_ (), qname.name (), value); + } + + template + inline void serializer:: + attribute (const qname_type& qname, const T& value) + { + attribute (qname, value_traits::serialize (value, *this)); + } + + inline void serializer:: + attribute (const std::string& name, const std::string& value) + { + attribute (std::string (), name, value); + } + + template + inline void serializer:: + attribute (const std::string& name, const T& value) + { + attribute (name, value_traits::serialize (value, *this)); + } + + template + inline void serializer:: + attribute (const std::string& ns, const std::string& name, const T& value) + { + attribute (ns, name, value_traits::serialize (value, *this)); + } + + template + inline void serializer:: + characters (const T& value) + { + characters (value_traits::serialize (value, *this)); + } + } +} diff --git a/libcutl/cutl/xml/value-traits.cxx b/libcutl/cutl/xml/value-traits.cxx new file mode 100644 index 0000000..7598645 --- /dev/null +++ b/libcutl/cutl/xml/value-traits.cxx @@ -0,0 +1,25 @@ +// file : cutl/xml/value-traits.cxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include +#include + +using namespace std; + +namespace cutl +{ + namespace xml + { + bool default_value_traits:: + parse (string s, const parser& p) + { + if (s == "true" || s == "1" || s == "True" || s == "TRUE") + return true; + else if (s == "false" || s == "0" || s == "False" || s == "FALSE") + return false; + else + throw parsing (p, "invalid bool value '" + s + "'"); + } + } +} diff --git a/libcutl/cutl/xml/value-traits.hxx b/libcutl/cutl/xml/value-traits.hxx new file mode 100644 index 0000000..0b95205 --- /dev/null +++ b/libcutl/cutl/xml/value-traits.hxx @@ -0,0 +1,53 @@ +// file : cutl/xml/value-traits.hxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef CUTL_XML_VALUE_TRAITS_HXX +#define CUTL_XML_VALUE_TRAITS_HXX + +#include +#include // std::size_t + +#include + +namespace cutl +{ + namespace xml + { + class parser; + class serializer; + + template + struct default_value_traits + { + static T + parse (std::string, const parser&); + + static std::string + serialize (const T&, const serializer&); + }; + + template <> + struct LIBCUTL_EXPORT default_value_traits + { + static bool + parse (std::string, const parser&); + + static std::string + serialize (bool v, const serializer&) + { + return v ? "true" : "false"; + } + }; + + template + struct value_traits: default_value_traits {}; + + template + struct value_traits: default_value_traits {}; + } +} + +#include + +#endif // CUTL_XML_VALUE_TRAITS_HXX diff --git a/libcutl/cutl/xml/value-traits.txx b/libcutl/cutl/xml/value-traits.txx new file mode 100644 index 0000000..4868dba --- /dev/null +++ b/libcutl/cutl/xml/value-traits.txx @@ -0,0 +1,35 @@ +// file : cutl/xml/value-traits.txx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +namespace cutl +{ + namespace xml + { + template + T default_value_traits:: + parse (std::string s, const parser& p) + { + T r; + std::istringstream is (s); + if (!(is >> r && is.eof ()) ) + throw parsing (p, "invalid value '" + s + "'"); + return r; + } + + template + std::string default_value_traits:: + serialize (const T& v, const serializer& s) + { + std::ostringstream os; + if (!(os << v)) + throw serialization (s, "invalid value"); + return os.str (); + } + } +} -- cgit v1.2.3