diff options
Diffstat (limited to 'libcutl/cutl/xml/parser.hxx')
-rw-r--r-- | libcutl/cutl/xml/parser.hxx | 419 |
1 files changed, 419 insertions, 0 deletions
diff --git a/libcutl/cutl/xml/parser.hxx b/libcutl/cutl/xml/parser.hxx new file mode 100644 index 0000000..5c3c959 --- /dev/null +++ b/libcutl/cutl/xml/parser.hxx @@ -0,0 +1,419 @@ +// file : cutl/xml/parser.hxx +// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef CUTL_XML_PARSER_HXX +#define CUTL_XML_PARSER_HXX + +#include <map> +#include <vector> +#include <string> +#include <iosfwd> +#include <cstddef> // std::size_t +#include <cassert> + +#include <cutl/details/config.hxx> // LIBCUTL_EXTERNAL_EXPAT + +#ifndef LIBCUTL_EXTERNAL_EXPAT +# include <cutl/details/expat/expat.h> +#else +# include <expat.h> +#endif + +// We only support UTF-8 expat. +// +#ifdef XML_UNICODE +# error UTF-16 expat (XML_UNICODE defined) is not supported +#endif + +#include <cutl/xml/qname.hxx> +#include <cutl/xml/exception.hxx> + +#include <cutl/details/export.hxx> + +namespace cutl +{ + namespace xml + { + class parser; + + struct LIBCUTL_EXPORT parsing: exception + { + virtual + ~parsing () throw (); + + parsing (const std::string& name, + unsigned long long line, + unsigned long long column, + const std::string& description); + + parsing (const parser&, const std::string& description); + + const std::string& + name () const {return name_;} + + unsigned long long + line () const {return line_;} + + unsigned long long + column () const {return column_;} + + const std::string& + description () const {return description_;} + + virtual const char* + what () const throw (); + + private: + void + init (); + + private: + std::string name_; + unsigned long long line_; + unsigned long long column_; + std::string description_; + std::string what_; + }; + + class LIBCUTL_EXPORT parser + { + public: + ~parser (); + + typedef xml::qname qname_type; + typedef unsigned short feature_type; + + // If both receive_attributes_event and receive_attributes_map are + // specified, then receive_attributes_event is assumed. + // + static const feature_type receive_elements = 0x0001; + static const feature_type receive_characters = 0x0002; + static const feature_type receive_attributes_map = 0x0004; + static const feature_type receive_attributes_event = 0x0008; + static const feature_type receive_namespace_decls = 0x0010; + + static const feature_type receive_default = receive_elements | + receive_characters | + receive_attributes_map; + + // Parse std::istream. Input name is used in diagnostics to identify + // the document being parsed. std::ios_base::failure exception is + // used to report io errors (badbit and failbit). + // + parser (std::istream&, + const std::string& input_name, + feature_type = receive_default); + + const std::string& + input_name () const {return iname_;} + + // Parsing events. + // + public: + enum event_type + { + // If adding new events, also update the stream insertion operator. + // + start_element, + end_element, + start_attribute, + end_attribute, + characters, + start_namespace_decl, + end_namespace_decl, + eof + }; + + event_type + next () + { + if (state_ == state_next) + return next_ (false); + else + { + // If we previously peeked at start/end_element, then adjust + // state accordingly. + // + switch (event_) + { + case end_element: + { + if (!element_state_.empty () && + element_state_.back ().depth == depth_) + pop_element (); + + depth_--; + break; + } + case start_element: + { + depth_++; + break; + } + default: + break; + } + + state_ = state_next; + return event_; + } + } + + // Get the next event and make sure that it's what's expected. If it + // is not, then throw an appropriate parsing exception. + // + void + next_expect (event_type); + + void + next_expect (event_type, const qname_type& qname); + + void + next_expect (event_type, const std::string& name); + + void + next_expect (event_type, const std::string& ns, const std::string& name); + + event_type + peek () + { + if (state_ == state_peek) + return event_; + else + { + event_type e (next_ (true)); + state_ = state_peek; // Set it after the call to next_(). + return e; + } + } + + // Return the even that was last returned by the call to next() or + // peek(). + // + event_type + event () {return event_;} + + // Event data. + // + public: + const qname_type& qname () const {return *pqname_;} + + const std::string& namespace_ () const {return pqname_->namespace_ ();} + const std::string& name () const {return pqname_->name ();} + const std::string& prefix () const {return pqname_->prefix ();} + + const std::string& value () const {return *pvalue_;} + + unsigned long long line () const {return line_;} + unsigned long long column () const {return column_;} + + // Attribute map lookup. If attribute is not found, then the version + // without the default value throws an appropriate parsing exception + // while the version with the default value returns that value. + // + // Note also that there is no attribute(ns,name) version since it + // would conflict with attribute(name,dv) (qualified attributes + // are not very common). + // + // Attribute map is valid throughout at the "element level" until + // end_element and not just during start_element. As a special case, + // the map is still valid after peek() that returned end_element until + // this end_element event is retrieved with next(). + // + const std::string& + attribute (const std::string& name) const; + + template <typename T> + T + attribute (const std::string& name) const; + + std::string + attribute (const std::string& name, const std::string& dv) const; + + template <typename T> + T + attribute (const std::string& name, const T& dv) const; + + const std::string& + attribute (const qname_type& qname) const; + + template <typename T> + T + attribute (const qname_type& qname) const; + + std::string + attribute (const qname_type& qname, const std::string& dv) const; + + template <typename T> + T + attribute (const qname_type& qname, const T& dv) const; + + bool + attribute_present (const std::string& name) const; + + bool + attribute_present (const qname_type& qname) const; + + // Low-level attribute map access. Note that this API assumes + // all attributes are handled. + // + struct attribute_value_type + { + std::string value; + mutable bool handled; + }; + + typedef std::map<qname_type, attribute_value_type> attribute_map_type; + + const attribute_map_type& + attribute_map () const; + + // Optional content processing. + // + public: + enum content_type + { + // element characters whitespaces + empty, // no no ignored + simple, // no yes preserved + complex, // yes no ignored + mixed // yes yes preserved + }; + + // Note that you cannot get/set content while peeking. + // + void + content (content_type c) + { + assert (state_ == state_next); + + if (!element_state_.empty () && element_state_.back ().depth == depth_) + element_state_.back ().content = c; + else + element_state_.push_back (element_entry (depth_, c)); + } + + content_type + content () const + { + assert (state_ == state_next); + + return + !element_state_.empty () && element_state_.back ().depth == depth_ + ? element_state_.back ().content + : mixed; + } + + private: + static void XMLCALL + start_element_ (void*, const XML_Char*, const XML_Char**); + + static void XMLCALL + end_element_ (void*, const XML_Char*); + + static void XMLCALL + characters_ (void*, const XML_Char*, int); + + static void XMLCALL + start_namespace_decl_ (void*, const XML_Char*, const XML_Char*); + + static void XMLCALL + end_namespace_decl_ (void*, const XML_Char*); + + private: + event_type + next_ (bool peek); + + event_type + next_body (); + + void + handle_error (); + + private: + std::istream& is_; + const std::string iname_; + feature_type feature_; + + XML_Parser p_; + std::size_t depth_; + enum {state_next, state_peek} state_; + event_type event_; + event_type queue_; + + qname_type qname_; + std::string value_; + + // These are used to avoid copying when we are handling attributes + // and namespace decls. + // + const qname_type* pqname_; + const std::string* pvalue_; + + unsigned long long line_; + unsigned long long column_; + + // Attributes as events. + // + struct attribute_type + { + qname_type qname; + std::string value; + }; + + typedef std::vector<attribute_type> attributes; + + attributes attr_; + attributes::size_type attr_i_; // Index of the current attribute. + + // Namespace declarations. + // + typedef std::vector<qname_type> namespace_decls; + + namespace_decls start_ns_; + namespace_decls::size_type start_ns_i_; // Index of the current decl. + + namespace_decls end_ns_; + namespace_decls::size_type end_ns_i_; // Index of the current decl. + + // Element state consisting of the content model and attribute map. + // + struct element_entry + { + element_entry (std::size_t d, content_type c = mixed) + : depth (d), content (c), attr_unhandled_ (0) {} + + std::size_t depth; + content_type content; + attribute_map_type attr_map_; + mutable attribute_map_type::size_type attr_unhandled_; + }; + + typedef std::vector<element_entry> element_state; + std::vector<element_entry> element_state_; + + // Empty attribute map to return when an element has no attributes. + // + const attribute_map_type empty_attr_map_; + + // Return the element entry corresponding to the current depth, if + // exists, and NULL otherwise. + // + const element_entry* + get_element () const; + + void + pop_element (); + }; + + LIBCUTL_EXPORT + std::ostream& + operator<< (std::ostream&, parser::event_type); + } +} + +#include <cutl/xml/parser.ixx> +#include <cutl/xml/parser.txx> + +#endif // CUTL_XML_PARSER_HXX |