diff options
author | Jörg Frings-Fürst <jff@merkur> | 2014-05-18 16:08:14 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <jff@merkur> | 2014-05-18 16:08:14 +0200 |
commit | a15cf65c44d5c224169c32ef5495b68c758134b7 (patch) | |
tree | 3419f58fc8e1b315ba8171910ee044c5d467c162 /libbackend-elements/backend-elements/indentation/cxx.hxx |
Imported Upstream version 3.3.0.2upstream/3.3.0.2
Diffstat (limited to 'libbackend-elements/backend-elements/indentation/cxx.hxx')
-rw-r--r-- | libbackend-elements/backend-elements/indentation/cxx.hxx | 1016 |
1 files changed, 1016 insertions, 0 deletions
diff --git a/libbackend-elements/backend-elements/indentation/cxx.hxx b/libbackend-elements/backend-elements/indentation/cxx.hxx new file mode 100644 index 0000000..f10341a --- /dev/null +++ b/libbackend-elements/backend-elements/indentation/cxx.hxx @@ -0,0 +1,1016 @@ +// file : backend-elements/indentation/cxx.hxx +// author : Boris Kolpackov <boris@kolpackov.net> +// copyright : Copyright (c) 2005-2010 Boris Kolpackov +// license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +#ifndef BACKEND_ELEMENTS_INDENTATION_CXX_HXX +#define BACKEND_ELEMENTS_INDENTATION_CXX_HXX + +#include <backend-elements/types.hxx> +#include <backend-elements/indentation/buffer.hxx> + +#include <cult/containers/set.hxx> +#include <cult/containers/stack.hxx> +#include <cult/containers/deque.hxx> + +#include <locale> + +// #include <iostream> // @@ tmp + +namespace BackendElements +{ + namespace Indentation + { + template <typename C> + class CXX: public Buffer<C>/*, public NonCopyable*/ + { + public: + typedef + typename Buffer<C>::Traits + Traits; + + typedef + typename Buffer<C>::AsChar + AsChar; + + typedef + typename Buffer<C>::AsInt + AsInt; + + typedef + typename Buffer<C>::Write + Write; + + public: + CXX (Buffer<C>& out) + : out_ (out), + buffering_ (false), + position_ (0), + paren_balance_ (0), + spaces_ (2), + construct_ (Construct::other), + lbrace_ ("{"), + rbrace_ ("}") + // locale_ ("C") + { + indentation_.push (0); + + single_line_blocks_.insert ("if"); + single_line_blocks_.insert ("do"); + single_line_blocks_.insert ("for"); + single_line_blocks_.insert ("else"); + single_line_blocks_.insert ("case"); + single_line_blocks_.insert ("while"); + + follow_blocks_.insert ("else"); + follow_blocks_.insert ("case"); + follow_blocks_.insert ("catch"); + follow_blocks_.insert ("default"); + } + + public: + virtual AsInt + put (AsChar c) + { + AsInt result (Traits::to_int_type (c)); + + try + { + // First determine what kind of construct we are in. + // + typename Construct::Value new_construct (construct_); + typename Construct::Value old_construct (construct_); + + switch (c) + { + case '\n': + { + if (construct_ == Construct::pp_directive || + construct_ == Construct::cxx_comment) + construct_ = new_construct = Construct::other; + + break; + } + case '#': + { + if (construct_ == Construct::other) + construct_ = new_construct = Construct::pp_directive; + + break; + } + case '\"': + { + if (construct_ != Construct::pp_directive && + construct_ != Construct::c_comment && + construct_ != Construct::cxx_comment && + construct_ != Construct::char_literal) + { + // We might be in an escape sequence. + // + Boolean es (!hold_.empty () && hold_.back () == '\\'); + + if (es) + { + // Scan the hold sequence backwards to figure out if this + // backslash is part of this escape sequence or a preceding + // one. + // + for (typename Hold::ReverseIterator i (hold_.rbegin () + 1), + e (hold_.rend ()); i != e && *i == '\\'; ++i) + es = !es; + } + + if (!es) + { + // Not an escape sequence. + // + if (construct_ == Construct::string_literal) + new_construct = Construct::other; + else + construct_ = new_construct = Construct::string_literal; + } + } + + break; + } + case '\'': + { + if (construct_ != Construct::pp_directive && + construct_ != Construct::c_comment && + construct_ != Construct::cxx_comment && + construct_ != Construct::string_literal) + { + // We might be in an escape sequence. + // + Boolean es (!hold_.empty () && hold_.back () == '\\'); + + if (es) + { + // Scan the hold sequence backwards to figure out if this + // backslash is part of this escape sequence or a preceding + // one. + // + for (typename Hold::ReverseIterator i (hold_.rbegin () + 1), + e (hold_.rend ()); i != e && *i == '\\'; ++i) + es = !es; + } + + if (!es) + { + if (construct_ == Construct::char_literal) + new_construct = Construct::other; + else + construct_ = new_construct = Construct::char_literal; + } + } + + break; + } + case '/': + { + if (construct_ == Construct::other) + { + if (!hold_.empty () && hold_.back () == '/') + construct_ = new_construct = Construct::cxx_comment; + } + + if (construct_ == Construct::c_comment) + { + if (!hold_.empty () && hold_.back () == '*') + construct_ = new_construct = Construct::other; + } + + break; + } + case '*': + { + if (construct_ == Construct::other) + { + if (!hold_.empty () && hold_.back () == '/') + construct_ = new_construct = Construct::c_comment; + } + + break; + } + default: + { + break; + } + } + + // Special handling of CPP directives. + // + if (construct_ == Construct::pp_directive) + { + result = write (c); + position_++; + return result; + } + + // + // + tokenize (c, old_construct); + + + // Indentation in parenthesis. We don't need to make sure + // we are not in a comments, etc. because we make sure we + // don't hold anything in those states. + // + if (!hold_.empty () && hold_.back () == '(') + { + unbuffer (); // We don't need to hold it anymore. + + if (c == '\n') + indentation_.push (indentation_.top () + spaces_); + else + indentation_.push (position_); + } + + + // + // + Boolean defaulting (false); + + switch (c) + { + case '\n': + { + if (!indent_block_.empty () && construct_ == Construct::other) + indent_block_.top ().newline_ = true; + + hold_.push_back (c); + position_ = 0; // Starting a new line. + + break; + } + case '{': + { + if (construct_ == Construct::other) + { + if (!indent_block_.empty ()) + { + // Pop all the blocks until the one that was indented. + // + while (!indent_block_.top ().indented_) + indent_block_.pop (); + + if (indentation_.size () > 1) + indentation_.pop (); + + indent_block_.pop (); + } + + ensure_new_line (); + output_indentation (); + result = write (c); + ensure_new_line (); + + indentation_.push (indentation_.top () + spaces_); + } + else + defaulting = true; + + break; + } + case '}': + { + if (construct_ == Construct::other) + { + if (indentation_.size () > 1) + indentation_.pop (); + + // Reduce multiple newlines to one. + // + while (hold_.size () > 1) + { + typename Hold::ReverseIterator i (hold_.rbegin ()); + + if (*i == '\n' && *(i + 1) == '\n') + hold_.pop_back (); + else + break; + } + + ensure_new_line (); + output_indentation (); + + hold_.push_back (c); + + + // Add double newline after '}'. + // + hold_.push_back ('\n'); + hold_.push_back ('\n'); + position_ = 0; + + buffering_ = true; + } + else + defaulting = true; + + break; + } + case ';': + { + if (construct_ == Construct::other) + { + // for (;;) + // + if (!indent_block_.empty () && paren_balance_ == 0) + { + // Pop all the blocks until the one that was indented. + // + while (!indent_block_.top ().indented_) + indent_block_.pop (); + + if (indentation_.size () > 1) + indentation_.pop (); + + indent_block_.pop (); + } + + if (paren_balance_ != 0) + { + // We are inside for (;;) statement. Nothing to do here. + // + defaulting = true; + } + else + { + // Handling '};' case. + // + + Boolean brace (false); + + if (hold_.size () > 1 && hold_.back () == '\n') + { + Boolean pop_nl (false); + + for (typename Hold::ReverseIterator + i (hold_.rbegin ()), e (hold_.rend ()); + i != e; ++i) + { + if (*i != '\n') + { + if (*i == '}') + brace = pop_nl = true; + + break; + } + } + + if (pop_nl) + while (hold_.back () == '\n') + hold_.pop_back (); + } + + output_indentation (); + result = write (c); + position_++; + + if (brace) + { + hold_.push_back ('\n'); + hold_.push_back ('\n'); + } + + ensure_new_line (); + } + } + else + defaulting = true; + + break; + } + case ' ': + { + if (construct_ == Construct::other) + { + // Handling '} foo_;' case. + // + if (hold_.size () > 1 && hold_.back () == '\n') + { + Boolean pop_nl (false); + + for (typename Hold::ReverseIterator + i (hold_.rbegin ()), e (hold_.rend ()); + i != e; ++i) + { + if (*i != '\n') + { + if (*i == '}') + pop_nl = true; + + break; + } + } + + if (pop_nl) + while (hold_.back () == '\n') + hold_.pop_back (); + } + } + + defaulting = true; + break; + } + case '\\': + { + if (construct_ != Construct::pp_directive && + construct_ != Construct::c_comment && + construct_ != Construct::cxx_comment) + { + output_indentation (); + hold_.push_back (c); + position_++; + } + else + defaulting = true; + + break; + + } + case '(': + { + if (construct_ == Construct::other) + { + // Hold it so that we can see what's coming next. + // + output_indentation (); + hold_.push_back (c); + position_++; + paren_balance_++; + } + else + defaulting = true; + break; + } + case ')': + { + if (construct_ == Construct::other) + { + if (indentation_.size () > 1) + indentation_.pop (); + + if (paren_balance_ > 0) + paren_balance_--; + } + + defaulting = true; + break; + } + case '/': + { + if (construct_ == Construct::other) + { + output_indentation (); + hold_.push_back (c); + position_++; + } + else + defaulting = true; + + break; + } + case '*': + { + if (construct_ == Construct::c_comment) + { + output_indentation (); + hold_.push_back (c); + position_++; + } + else + defaulting = true; + + break; + } + default: + { + defaulting = true; + break; + } + } + + + if (defaulting) + { + output_indentation (); + result = write (c); + position_++; + } + + construct_ = new_construct; + } + catch (Write const&) + { + result = Traits::eof (); + } + + return result; + } + + private: + typedef Cult::StringTemplate<C> String; + + Void + next_token (String const& old, AsChar c) + { + //std::cerr << "next token: " << token_ + // << "; old token: " << old << std::endl; + + // Handle one line indentation blocks (if, else, etc). + // + if (single_line_blocks_.find (token_) != single_line_blocks_.end ()) + { + // Only indent sub-blocks if we are on a new line. + // + Boolean indent (indent_block_.empty () || + indent_block_.top ().newline_); + + indent_block_.push (IndentBlockInfo (c == '\n', indent)); + + if (indent) + indentation_.push (indentation_.top () + spaces_); + } + + // Keep track of the do ... while construct in order to suppress + // the newline after } and before while. + // + if (old == String ("do") && token_ == lbrace_) + { + do_while_state_.push (0); + } + + if (!do_while_state_.empty ()) + { + if (token_ == lbrace_) + do_while_state_.top ()++; + + if (token_ == rbrace_) + do_while_state_.top ()--; + } + + // Suppress double newline in the "}else", etc., cases. + // + if (old == rbrace_) + { + Boolean dw (!do_while_state_.empty () && + do_while_state_.top () == 0); + + if (follow_blocks_.find (token_) != follow_blocks_.end () || dw) + { + if (dw) + do_while_state_.pop (); + + // Reduce double newline after "}" into a single one. + // + typename Hold::Iterator i (hold_.end ()), b (hold_.begin ()); + + for (--i; i != b; --i) + { + // See if this is the end of the "}\n\n" sequence. + // + if (*i == '\n') + { + --i; + if (i != b && *i == '\n') + { + --i; + if (*i == '}') + { + ++i; + hold_.erase (i); + break; + } + } + } + } + } + else if (token_ != rbrace_) + { + buffering_ = false; + } + } + } + + public: + virtual Void + unbuffer () + { + AsInt result; + + while (!hold_.empty ()) + { + result = out_.put (hold_.front ()); + + //@@ failed + if (result == Traits::eof ()) + throw Write (); + + hold_.pop_front (); + } + } + + private: + Void + ensure_new_line () + { + if (hold_.empty () || hold_.back () != '\n') + { + hold_.push_back ('\n'); + position_ = 0; // Starting a new line. + } + } + + + Void + output_indentation () + { + if (!hold_.empty () && hold_.back () == '\n') + { + for (UnsignedLong i (0); i < indentation_.top (); ++i) + write (' '); + + position_ += indentation_.top (); + } + } + + AsInt + write (AsChar c) + { + hold_.push_back (c); + + if (!buffering_) + { + AsInt result (Traits::eof ()); + + while (!hold_.empty ()) + { + result = out_.put (hold_.front ()); + + if (result == Traits::eof ()) + throw Write (); + + hold_.pop_front (); + } + + return result; + } + else + return c; + } + + private: + struct Construct + { + enum Value + { + other, + pp_directive, + c_comment, + cxx_comment, + string_literal, + char_literal + }; + }; + + Void + tokenize (AsChar c, typename Construct::Value old) + { + // + // + switch (construct_) + { + case Construct::pp_directive: + { + if (old == Construct::other) // Start PP directive + retire (c); + + return; + } + case Construct::c_comment: + { + if (old == Construct::other) // Start C comment. + lexeme_.clear (); + + return; + } + case Construct::cxx_comment: + { + if (old == Construct::other) // Start C++ comment. + lexeme_.clear (); + + return; + } + case Construct::string_literal: + { + if (old == Construct::other) // Start string literal + retire (c); + + lexeme_ += c; + return; + } + case Construct::char_literal: + { + if (old == Construct::other) // Start char literal + retire (c); + + lexeme_ += c; + return; + } + default: + break; + } + + // construct_ == other + // + switch (old) + { + case Construct::pp_directive: + { + // End PP directive (newline). + // + return; + } + case Construct::c_comment: + { + // End C comment. + // + return; + } + case Construct::cxx_comment: + { + // End C++ comment (newline). + // + return; + } + case Construct::string_literal: + { + // End string literal ("). + // + lexeme_ += c; + return; + } + case Construct::char_literal: + { + // End char literal ('). + // + lexeme_ += c; + return; + } + default: + break; + } + + + // construct_ == old == other + // + + /* + + The code below is equivalent to this (simpler) code which is + unfortunately not fast enough. + + using std::isalpha; + using std::isalnum; + using std::isdigit; + using std::isspace; + + if (c == '_' || isalpha (c, locale_)) + { + if (lexeme_.empty () || + lexeme_[0] == '_' || isalpha (lexeme_[0], locale_)) + lexeme_ += c; + else + { + retire (c); + lexeme_ += c; + } + } + else if (isdigit (c, locale_)) + { + if (lexeme_.empty () || + lexeme_[0] == '_' || isalnum (lexeme_[0], locale_)) + lexeme_ += c; + else + { + retire (c); + lexeme_ += c; + } + } + else // Delimiters + { + retire (c); + + if (!isspace (c, locale_)) + lexeme_ += c; + } + */ + + switch (char_class (c)) + { + case CharClass::alpha: + { + if (lexeme_.empty () || + char_class (lexeme_[0]) == CharClass::alpha) + lexeme_ += c; + else + { + retire (c); + lexeme_ += c; + } + break; + } + case CharClass::digit: + { + if (lexeme_.empty ()) + lexeme_ += c; + else + { + typename CharClass::Value cc (char_class (lexeme_[0])); + + if (cc == CharClass::alpha || cc == CharClass::digit) + lexeme_ += c; + else + { + retire (c); + lexeme_ += c; + } + } + break; + } + case CharClass::op_punc: + { + retire (c); + lexeme_ += c; + break; + } + case CharClass::space: + { + retire (c); + break; + } + } + } + + struct CharClass + { + enum Value + { + alpha, // Alpha + '_'. + digit, + op_punc, // Operator or punctuation. + space + }; + + }; + + typename CharClass::Value + char_class (C c) + { + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return CharClass::digit; + + case '!': + case '%': + case '^': + case '&': + case '*': + case '(': + case ')': + case '-': + case '+': + case '=': + case '{': + case '}': + case '|': + case '~': + case '[': + case ']': + case '\\': + case ';': + case '\'': + case ':': + case '"': + case '<': + case '>': + case '?': + case ',': + case '.': + case '/': + return CharClass::op_punc; + + case ' ': + case '\n': + case '\t': + case '\f': + case '\r': + case '\v': + return CharClass::space; + + default: + return CharClass::alpha; + } + } + + + Void + retire (AsChar c) + { + if (!lexeme_.empty ()) + { + token_.swap (lexeme_); + next_token (lexeme_, c); + lexeme_.clear (); + } + } + + private: + Buffer<C>& out_; + Boolean buffering_; // True if write() should buffer the char. + UnsignedLong position_; // Current position on the line. + UnsignedLong paren_balance_; // ( ) balance. + Cult::Containers::Stack<UnsignedLong> indentation_; + UnsignedLong spaces_; + + Boolean suppress_nl_; + + //@@ gcc bug# 18304 + // + typename Construct::Value construct_; + + // Special state stach for the do-while construct. The presence + // of an element in the stack indicates that we are in a braced + // do-while construct. The value of the element is the brace + // balance. + Cult::Containers::Stack<UnsignedLong> do_while_state_; + + typedef + Cult::Containers::Deque<AsInt> + Hold; + + Hold hold_; + + private: + String token_; // previously fully recognized token + String lexeme_; // current lexeme (accumulator) + + // std::locale locale_; + + // Keywords that may be folowed by a single-line block, e.g., if, + // else, etc. + // + Cult::Containers::Set<String> single_line_blocks_; + + // Keywords that may follow (and be related) to a previous block, + // e.g., else, case, catch. + // + Cult::Containers::Set<String> follow_blocks_; + + String lbrace_; + String rbrace_; + + private: + // Single-line indented blocks such as if, else, while, etc. The + // newline flag indicates whether a new line has been seen after + // the keyword. This is needed to properly distinguish cases such + // as: + // + // else if (...) + // foo (); + // + // else + // if (...) + // foo (); + // + struct IndentBlockInfo + { + IndentBlockInfo (Boolean newline, Boolean indented) + : newline_ (newline), indented_ (indented) + { + } + + Boolean newline_; + Boolean indented_; + }; + + Cult::Containers::Stack<IndentBlockInfo> indent_block_; + }; + } +} + +#include <backend-elements/indentation/cxx.txx> + +#endif // BACKEND_ELEMENTS_INDENTATION_CXX_HXX |