diff options
Diffstat (limited to 'libcutl/cutl/details/boost/regex/v4/states.hpp')
-rw-r--r-- | libcutl/cutl/details/boost/regex/v4/states.hpp | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/libcutl/cutl/details/boost/regex/v4/states.hpp b/libcutl/cutl/details/boost/regex/v4/states.hpp new file mode 100644 index 0000000..8f1152a --- /dev/null +++ b/libcutl/cutl/details/boost/regex/v4/states.hpp @@ -0,0 +1,301 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE states.cpp + * VERSION see <cutl/details/boost/version.hpp> + * DESCRIPTION: Declares internal state machine structures. + */ + +#ifndef BOOST_REGEX_V4_STATES_HPP +#define BOOST_REGEX_V4_STATES_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +/*** mask_type ******************************************************* +Whenever we have a choice of two alternatives, we use an array of bytes +to indicate which of the two alternatives it is possible to take for any +given input character. If mask_take is set, then we can take the next +state, and if mask_skip is set then we can take the alternative. +***********************************************************************/ +enum mask_type +{ + mask_take = 1, + mask_skip = 2, + mask_init = 4, + mask_any = mask_skip | mask_take, + mask_all = mask_any +}; + +/*** helpers ********************************************************** +These helpers let us use function overload resolution to detect whether +we have narrow or wide character strings: +***********************************************************************/ +struct _narrow_type{}; +struct _wide_type{}; +template <class charT> struct is_byte; +template<> struct is_byte<char> { typedef _narrow_type width_type; }; +template<> struct is_byte<unsigned char>{ typedef _narrow_type width_type; }; +template<> struct is_byte<signed char> { typedef _narrow_type width_type; }; +template <class charT> struct is_byte { typedef _wide_type width_type; }; + +/*** enum syntax_element_type ****************************************** +Every record in the state machine falls into one of the following types: +***********************************************************************/ +enum syntax_element_type +{ + // start of a marked sub-expression, or perl-style (?...) extension + syntax_element_startmark = 0, + // end of a marked sub-expression, or perl-style (?...) extension + syntax_element_endmark = syntax_element_startmark + 1, + // any sequence of literal characters + syntax_element_literal = syntax_element_endmark + 1, + // start of line assertion: ^ + syntax_element_start_line = syntax_element_literal + 1, + // end of line assertion $ + syntax_element_end_line = syntax_element_start_line + 1, + // match any character: . + syntax_element_wild = syntax_element_end_line + 1, + // end of expression: we have a match when we get here + syntax_element_match = syntax_element_wild + 1, + // perl style word boundary: \b + syntax_element_word_boundary = syntax_element_match + 1, + // perl style within word boundary: \B + syntax_element_within_word = syntax_element_word_boundary + 1, + // start of word assertion: \< + syntax_element_word_start = syntax_element_within_word + 1, + // end of word assertion: \> + syntax_element_word_end = syntax_element_word_start + 1, + // start of buffer assertion: \` + syntax_element_buffer_start = syntax_element_word_end + 1, + // end of buffer assertion: \' + syntax_element_buffer_end = syntax_element_buffer_start + 1, + // backreference to previously matched sub-expression + syntax_element_backref = syntax_element_buffer_end + 1, + // either a wide character set [..] or one with multicharacter collating elements: + syntax_element_long_set = syntax_element_backref + 1, + // narrow character set: [...] + syntax_element_set = syntax_element_long_set + 1, + // jump to a new state in the machine: + syntax_element_jump = syntax_element_set + 1, + // choose between two production states: + syntax_element_alt = syntax_element_jump + 1, + // a repeat + syntax_element_rep = syntax_element_alt + 1, + // match a combining character sequence + syntax_element_combining = syntax_element_rep + 1, + // perl style soft buffer end: \z + syntax_element_soft_buffer_end = syntax_element_combining + 1, + // perl style continuation: \G + syntax_element_restart_continue = syntax_element_soft_buffer_end + 1, + // single character repeats: + syntax_element_dot_rep = syntax_element_restart_continue + 1, + syntax_element_char_rep = syntax_element_dot_rep + 1, + syntax_element_short_set_rep = syntax_element_char_rep + 1, + syntax_element_long_set_rep = syntax_element_short_set_rep + 1, + // a backstep for lookbehind repeats: + syntax_element_backstep = syntax_element_long_set_rep + 1, + // an assertion that a mark was matched: + syntax_element_assert_backref = syntax_element_backstep + 1, + syntax_element_toggle_case = syntax_element_assert_backref + 1, + // a recursive expression: + syntax_element_recurse = syntax_element_toggle_case + 1 +}; + +#ifdef BOOST_REGEX_DEBUG +// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion +std::ostream& operator<<(std::ostream&, syntax_element_type); +#endif + +struct re_syntax_base; + +/*** union offset_type ************************************************ +Points to another state in the machine. During machine construction +we use integral offsets, but these are converted to pointers before +execution of the machine. +***********************************************************************/ +union offset_type +{ + re_syntax_base* p; + std::ptrdiff_t i; +}; + +/*** struct re_syntax_base ******************************************** +Base class for all states in the machine. +***********************************************************************/ +struct re_syntax_base +{ + syntax_element_type type; // what kind of state this is + offset_type next; // next state in the machine +}; + +/*** struct re_brace ************************************************** +A marked parenthesis. +***********************************************************************/ +struct re_brace : public re_syntax_base +{ + // The index to match, can be zero (don't mark the sub-expression) + // or negative (for perl style (?...) extentions): + int index; + bool icase; +}; + +/*** struct re_dot ************************************************** +Match anything. +***********************************************************************/ +enum +{ + dont_care = 1, + force_not_newline = 0, + force_newline = 2, + + test_not_newline = 2, + test_newline = 3 +}; +struct re_dot : public re_syntax_base +{ + unsigned char mask; +}; + +/*** struct re_literal ************************************************ +A string of literals, following this structure will be an +array of characters: charT[length] +***********************************************************************/ +struct re_literal : public re_syntax_base +{ + unsigned int length; +}; + +/*** struct re_case ************************************************ +Indicates whether we are moving to a case insensive block or not +***********************************************************************/ +struct re_case : public re_syntax_base +{ + bool icase; +}; + +/*** struct re_set_long *********************************************** +A wide character set of characters, following this structure will be +an array of type charT: +First csingles null-terminated strings +Then 2 * cranges NULL terminated strings +Then cequivalents NULL terminated strings +***********************************************************************/ +template <class mask_type> +struct re_set_long : public re_syntax_base +{ + unsigned int csingles, cranges, cequivalents; + mask_type cclasses; + mask_type cnclasses; + bool isnot; + bool singleton; +}; + +/*** struct re_set **************************************************** +A set of narrow-characters, matches any of _map which is none-zero +***********************************************************************/ +struct re_set : public re_syntax_base +{ + unsigned char _map[1 << CHAR_BIT]; +}; + +/*** struct re_jump *************************************************** +Jump to a new location in the machine (not next). +***********************************************************************/ +struct re_jump : public re_syntax_base +{ + offset_type alt; // location to jump to +}; + +/*** struct re_alt *************************************************** +Jump to a new location in the machine (possibly next). +***********************************************************************/ +struct re_alt : public re_jump +{ + unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump + unsigned int can_be_null; // true if we match a NULL string +}; + +/*** struct re_repeat ************************************************* +Repeat a section of the machine +***********************************************************************/ +struct re_repeat : public re_alt +{ + std::size_t min, max; // min and max allowable repeats + int state_id; // Unique identifier for this repeat + bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches) + bool greedy; // True if this is a greedy repeat +}; + +/*** struct re_recurse ************************************************ +Recurse to a particular subexpression. +**********************************************************************/ +struct re_recurse : public re_jump +{ + int state_id; // identifier of first nested repeat within the recursion. +}; + +/*** enum re_jump_size_type ******************************************* +Provides compiled size of re_jump structure (allowing for trailing alignment). +We provide this so we know how manybytes to insert when constructing the machine +(The value of padding_mask is defined in regex_raw_buffer.hpp). +***********************************************************************/ +enum re_jump_size_type +{ + re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask), + re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask), + re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask) +}; + +/*** proc re_is_set_member ********************************************* +Forward declaration: we'll need this one later... +***********************************************************************/ + +template<class charT, class traits> +struct regex_data; + +template <class iterator, class charT, class traits_type, class char_classT> +iterator BOOST_REGEX_CALL re_is_set_member(iterator next, + iterator last, + const re_set_long<char_classT>* set_, + const regex_data<charT, traits_type>& e, bool icase); + +} // namespace re_detail + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + |