// Copyright 2012 Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of Google Inc. nor the names of its contributors // may be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "utils/text/templates.hpp" #include #include #include #include #include "utils/format/macros.hpp" #include "utils/noncopyable.hpp" #include "utils/sanity.hpp" #include "utils/text/exceptions.hpp" #include "utils/text/operations.ipp" namespace text = utils::text; namespace { /// Definition of a template statement. /// /// A template statement is a particular line in the input file that is /// preceeded by a template marker. This class provides a high-level /// representation of the contents of such statement and a mechanism to parse /// the textual line into this high-level representation. class statement_def { public: /// Types of the known statements. enum statement_type { /// Alternative clause of a conditional. /// /// Takes no arguments. type_else, /// End of conditional marker. /// /// Takes no arguments. type_endif, /// End of loop marker. /// /// Takes no arguments. type_endloop, /// Beginning of a conditional. /// /// Takes a single argument, which denotes the name of the variable or /// vector to check for existence. This is the only expression /// supported. type_if, /// Beginning of a loop over all the elements of a vector. /// /// Takes two arguments: the name of the vector over which to iterate /// and the name of the iterator to later index this vector. type_loop, }; private: /// Internal data describing the structure of a particular statement type. struct type_descriptor { /// The native type of the statement. statement_type type; /// The expected number of arguments. unsigned int n_arguments; /// Constructs a new type descriptor. /// /// \param type_ The native type of the statement. /// \param n_arguments_ The expected number of arguments. type_descriptor(const statement_type type_, const unsigned int n_arguments_) : type(type_), n_arguments(n_arguments_) { } }; /// Mapping of statement type names to their definitions. typedef std::map< std::string, type_descriptor > types_map; /// Description of the different statement types. /// /// This static map is initialized once and reused later for any statement /// lookup. Unfortunately, we cannot perform this initialization in a /// static manner without C++11. static types_map _types; /// Generates a new types definition map. /// /// \return A new types definition map, to be assigned to _types. static types_map generate_types_map(void) { // If you change this, please edit the comments in the enum above. types_map types; types.insert(types_map::value_type( "else", type_descriptor(type_else, 0))); types.insert(types_map::value_type( "endif", type_descriptor(type_endif, 0))); types.insert(types_map::value_type( "endloop", type_descriptor(type_endloop, 0))); types.insert(types_map::value_type( "if", type_descriptor(type_if, 1))); types.insert(types_map::value_type( "loop", type_descriptor(type_loop, 2))); return types; } public: /// The type of the statement. statement_type type; /// The arguments to the statement, in textual form. const std::vector< std::string > arguments; /// Creates a new statement. /// /// \param type_ The type of the statement. /// \param arguments_ The arguments to the statement. statement_def(const statement_type& type_, const std::vector< std::string >& arguments_) : type(type_), arguments(arguments_) { #if !defined(NDEBUG) for (types_map::const_iterator iter = _types.begin(); iter != _types.end(); ++iter) { const type_descriptor& descriptor = (*iter).second; if (descriptor.type == type_) { PRE(descriptor.n_arguments == arguments_.size()); return; } } UNREACHABLE; #endif } /// Parses a statement. /// /// \param line The textual representation of the statement without any /// prefix. /// /// \return The parsed statement. /// /// \throw text::syntax_error If the statement is not correctly defined. static statement_def parse(const std::string& line) { if (_types.empty()) _types = generate_types_map(); const std::vector< std::string > words = text::split(line, ' '); if (words.empty()) throw text::syntax_error("Empty statement"); const types_map::const_iterator iter = _types.find(words[0]); if (iter == _types.end()) throw text::syntax_error(F("Unknown statement '%s'") % words[0]); const type_descriptor& descriptor = (*iter).second; if (words.size() - 1 != descriptor.n_arguments) throw text::syntax_error(F("Invalid number of arguments for " "statement '%s'") % words[0]); std::vector< std::string > new_arguments; new_arguments.resize(words.size() - 1); std::copy(words.begin() + 1, words.end(), new_arguments.begin()); return statement_def(descriptor.type, new_arguments); } }; statement_def::types_map statement_def::_types; /// Definition of a loop. /// /// This simple structure is used to keep track of the parameters of a loop. struct loop_def { /// The name of the vector over which this loop is iterating. std::string vector; /// The name of the iterator defined by this loop. std::string iterator; /// Position in the input to which to rewind to on looping. /// /// This position points to the line after the loop statement, not the loop /// itself. This is one of the reasons why we have this structure, so that /// we can maintain the data about the loop without having to re-process it. std::istream::pos_type position; /// Constructs a new loop definition. /// /// \param vector_ The name of the vector (first argument). /// \param iterator_ The name of the iterator (second argumnet). /// \param position_ Position of the next line after the loop statement. loop_def(const std::string& vector_, const std::string& iterator_, const std::istream::pos_type position_) : vector(vector_), iterator(iterator_), position(position_) { } }; /// Stateful class to instantiate the templates in an input stream. /// /// The goal of this parser is to scan the input once and not buffer anything in /// memory. The only exception are loops: loops are reinterpreted on every /// iteration from the same input file by rewidining the stream to the /// appropriate position. class templates_parser : utils::noncopyable { /// The templates to apply. /// /// Note that this is not const because the parser has to have write access /// to the templates. In particular, it needs to be able to define the /// iterators as regular variables. text::templates_def _templates; /// Prefix that marks a line as a statement. const std::string _prefix; /// Delimiter to surround an expression instantiation. const std::string _delimiter; /// Whether to skip incoming lines or not. /// /// The top of the stack is true whenever we encounter a conditional that /// evaluates to false or a loop that does not have any iterations left. /// Under these circumstances, we need to continue scanning the input stream /// until we find the matching closing endif or endloop construct. /// /// This is a stack rather than a plain boolean to allow us deal with /// if-else clauses. std::stack< bool > _skip; /// Current count of nested conditionals. unsigned int _if_level; /// Level of the top-most conditional that evaluated to false. unsigned int _exit_if_level; /// Current count of nested loops. unsigned int _loop_level; /// Level of the top-most loop that does not have any iterations left. unsigned int _exit_loop_level; /// Information about all the nested loops up to the current point. std::stack< loop_def > _loops; /// Checks if a line is a statement or not. /// /// \param line The line to validate. /// /// \return True if the line looks like a statement, which is determined by /// checking if the line starts by the predefined prefix. bool is_statement(const std::string& line) { return ((line.length() >= _prefix.length() && line.substr(0, _prefix.length()) == _prefix) && (line.length() < _delimiter.length() || line.substr(0, _delimiter.length()) != _delimiter)); } /// Parses a given statement line into a statement definition. /// /// \param line The line to validate; it must be a valid statement. /// /// \return The parsed statement. /// /// \throw text::syntax_error If the input is not a valid statement. statement_def parse_statement(const std::string& line) { PRE(is_statement(line)); return statement_def::parse(line.substr(_prefix.length())); } /// Processes a line from the input when not in skip mode. /// /// \param line The line to be processed. /// \param input The input stream from which the line was read. The current /// position in the stream must be after the line being processed. /// \param output The output stream into which to write the results. /// /// \throw text::syntax_error If the input is not valid. void handle_normal(const std::string& line, std::istream& input, std::ostream& output) { if (!is_statement(line)) { // Fast path. Mostly to avoid an indentation level for the big // chunk of code below. output << line << '\n'; return; } const statement_def statement = parse_statement(line); switch (statement.type) { case statement_def::type_else: _skip.top() = !_skip.top(); break; case statement_def::type_endif: _if_level--; break; case statement_def::type_endloop: { PRE(_loops.size() == _loop_level); loop_def& loop = _loops.top(); const std::size_t next_index = 1 + text::to_type< std::size_t >( _templates.get_variable(loop.iterator)); if (next_index < _templates.get_vector(loop.vector).size()) { _templates.add_variable(loop.iterator, F("%s") % next_index); input.seekg(loop.position); } else { _loop_level--; _loops.pop(); _templates.remove_variable(loop.iterator); } } break; case statement_def::type_if: { _if_level++; const std::string value = _templates.evaluate( statement.arguments[0]); if (value.empty() || value == "0" || value == "false") { _exit_if_level = _if_level; _skip.push(true); } else { _skip.push(false); } } break; case statement_def::type_loop: { _loop_level++; const loop_def loop(statement.arguments[0], statement.arguments[1], input.tellg()); if (_templates.get_vector(loop.vector).empty()) { _exit_loop_level = _loop_level; _skip.push(true); } else { _templates.add_variable(loop.iterator, "0"); _loops.push(loop); _skip.push(false); } } break; } } /// Processes a line from the input when in skip mode. /// /// \param line The line to be processed. /// /// \throw text::syntax_error If the input is not valid. void handle_skip(const std::string& line) { PRE(_skip.top()); if (!is_statement(line)) return; const statement_def statement = parse_statement(line); switch (statement.type) { case statement_def::type_else: if (_exit_if_level == _if_level) _skip.top() = !_skip.top(); break; case statement_def::type_endif: INV(_if_level >= _exit_if_level); if (_if_level == _exit_if_level) _skip.top() = false; _if_level--; _skip.pop(); break; case statement_def::type_endloop: INV(_loop_level >= _exit_loop_level); if (_loop_level == _exit_loop_level) _skip.top() = false; _loop_level--; _skip.pop(); break; case statement_def::type_if: _if_level++; _skip.push(true); break; case statement_def::type_loop: _loop_level++; _skip.push(true); break; default: break; } } /// Evaluates expressions on a given input line. /// /// An expression is surrounded by _delimiter on both sides. We scan the /// string from left to right finding any expressions that may appear, yank /// them out and call templates_def::evaluate() to get their value. /// /// Lonely or unbalanced appearances of _delimiter on the input line are /// not considered an error, given that the user may actually want to supply /// that character sequence without being interpreted as a template. /// /// \param in_line The input line from which to evaluate expressions. /// /// \return The evaluated line. /// /// \throw text::syntax_error If the expressions in the line are malformed. std::string evaluate(const std::string& in_line) { std::string out_line; std::string::size_type last_pos = 0; while (last_pos != std::string::npos) { const std::string::size_type open_pos = in_line.find( _delimiter, last_pos); if (open_pos == std::string::npos) { out_line += in_line.substr(last_pos); last_pos = std::string::npos; } else { const std::string::size_type close_pos = in_line.find( _delimiter, open_pos + _delimiter.length()); if (close_pos == std::string::npos) { out_line += in_line.substr(last_pos); last_pos = std::string::npos; } else { out_line += in_line.substr(last_pos, open_pos - last_pos); out_line += _templates.evaluate(in_line.substr( open_pos + _delimiter.length(), close_pos - open_pos - _delimiter.length())); last_pos = close_pos + _delimiter.length(); } } } return out_line; } public: /// Constructs a new template parser. /// /// \param templates_ The templates to apply to the processed file. /// \param prefix_ The prefix that identifies lines as statements. /// \param delimiter_ Delimiter to surround a variable instantiation. templates_parser(const text::templates_def& templates_, const std::string& prefix_, const std::string& delimiter_) : _templates(templates_), _prefix(prefix_), _delimiter(delimiter_), _if_level(0), _exit_if_level(0), _loop_level(0), _exit_loop_level(0) { } /// Applies the templates to a given input. /// /// \param input The stream to which to apply the templates. /// \param output The stream into which to write the results. /// /// \throw text::syntax_error If the input is not valid. Note that the /// is not guaranteed to be unmodified on exit if an error is /// encountered. void instantiate(std::istream& input, std::ostream& output) { std::string line; while (std::getline(input, line).good()) { if (!_skip.empty() && _skip.top()) handle_skip(line); else handle_normal(evaluate(line), input, output); } } }; } // anonymous namespace /// Constructs an empty templates definition. text::templates_def::templates_def(void) { } /// Sets a string variable in the templates. /// /// If the variable already exists, its value is replaced. This behavior is /// required to implement iterators, but client code should really not be /// redefining variables. /// /// \pre The variable must not already exist as a vector. /// /// \param name The name of the variable to set. /// \param value The value to set the given variable to. void text::templates_def::add_variable(const std::string& name, const std::string& value) { PRE(_vectors.find(name) == _vectors.end()); _variables[name] = value; } /// Unsets a string variable from the templates. /// /// Client code has no reason to use this. This is only required to implement /// proper scoping of loop iterators. /// /// \pre The variable must exist. /// /// \param name The name of the variable to remove from the templates. void text::templates_def::remove_variable(const std::string& name) { PRE(_variables.find(name) != _variables.end()); _variables.erase(_variables.find(name)); } /// Creates a new vector in the templates. /// /// If the vector already exists, it is cleared. Client code should really not /// be redefining variables. /// /// \pre The vector must not already exist as a variable. /// /// \param name The name of the vector to set. void text::templates_def::add_vector(const std::string& name) { PRE(_variables.find(name) == _variables.end()); _vectors[name] = strings_vector(); } /// Adds a value to an existing vector in the templates. /// /// \pre name The vector must exist. /// /// \param name The name of the vector to append the value to. /// \param value The textual value to append to the vector. void text::templates_def::add_to_vector(const std::string& name, const std::string& value) { PRE(_variables.find(name) == _variables.end()); PRE(_vectors.find(name) != _vectors.end()); _vectors[name].push_back(value); } /// Checks whether a given identifier exists as a variable or a vector. /// /// This is used to implement the evaluation of conditions in if clauses. /// /// \param name The name of the variable or vector. /// /// \return True if the given name exists as a variable or a vector; false /// otherwise. bool text::templates_def::exists(const std::string& name) const { return (_variables.find(name) != _variables.end() || _vectors.find(name) != _vectors.end()); } /// Gets the value of a variable. /// /// \param name The name of the variable. /// /// \return The value of the requested variable. /// /// \throw text::syntax_error If the variable does not exist. const std::string& text::templates_def::get_variable(const std::string& name) const { const variables_map::const_iterator iter = _variables.find(name); if (iter == _variables.end()) throw text::syntax_error(F("Unknown variable '%s'") % name); return (*iter).second; } /// Gets a vector. /// /// \param name The name of the vector. /// /// \return A reference to the requested vector. /// /// \throw text::syntax_error If the vector does not exist. const text::templates_def::strings_vector& text::templates_def::get_vector(const std::string& name) const { const vectors_map::const_iterator iter = _vectors.find(name); if (iter == _vectors.end()) throw text::syntax_error(F("Unknown vector '%s'") % name); return (*iter).second; } /// Indexes a vector and gets the value. /// /// \param name The name of the vector to index. /// \param index_name The name of a variable representing the index to use. /// This must be convertible to a natural. /// /// \return The value of the vector at the given index. /// /// \throw text::syntax_error If the vector does not existor if the index is out /// of range. const std::string& text::templates_def::get_vector(const std::string& name, const std::string& index_name) const { const strings_vector& vector = get_vector(name); const std::string& index_str = get_variable(index_name); std::size_t index; try { index = text::to_type< std::size_t >(index_str); } catch (const text::syntax_error& e) { throw text::syntax_error(F("Index '%s' not an integer, value '%s'") % index_name % index_str); } if (index >= vector.size()) throw text::syntax_error(F("Index '%s' out of range at position '%s'") % index_name % index); return vector[index]; } /// Evaluates a expression using these templates. /// /// An expression is a query on the current templates to fetch a particular /// value. The value is always returned as a string, as this is how templates /// are internally stored. /// /// \param expression The expression to evaluate. This should not include any /// of the delimiters used in the user input, as otherwise the expression /// will not be evaluated properly. /// /// \return The result of the expression evaluation as a string. /// /// \throw text::syntax_error If there is any problem while evaluating the /// expression. std::string text::templates_def::evaluate(const std::string& expression) const { const std::string::size_type paren_open = expression.find('('); if (paren_open == std::string::npos) { return get_variable(expression); } else { const std::string::size_type paren_close = expression.find( ')', paren_open); if (paren_close == std::string::npos) throw text::syntax_error(F("Expected ')' in expression '%s')") % expression); if (paren_close != expression.length() - 1) throw text::syntax_error(F("Unexpected text found after ')' in " "expression '%s'") % expression); const std::string arg0 = expression.substr(0, paren_open); const std::string arg1 = expression.substr( paren_open + 1, paren_close - paren_open - 1); if (arg0 == "defined") { return exists(arg1) ? "true" : "false"; } else if (arg0 == "length") { return F("%s") % get_vector(arg1).size(); } else { return get_vector(arg0, arg1); } } } /// Applies a set of templates to an input stream. /// /// \param templates The templates to use. /// \param input The input to process. /// \param output The stream to which to write the processed text. /// /// \throw text::syntax_error If there is any problem processing the input. void text::instantiate(const templates_def& templates, std::istream& input, std::ostream& output) { templates_parser parser(templates, "%", "%%"); parser.instantiate(input, output); } /// Applies a set of templates to an input file and writes an output file. /// /// \param templates The templates to use. /// \param input_file The path to the input to process. /// \param output_file The path to the file into which to write the output. /// /// \throw text::error If the input or output files cannot be opened. /// \throw text::syntax_error If there is any problem processing the input. void text::instantiate(const templates_def& templates, const fs::path& input_file, const fs::path& output_file) { std::ifstream input(input_file.c_str()); if (!input) throw text::error(F("Failed to open %s for read") % input_file); std::ofstream output(output_file.c_str()); if (!output) throw text::error(F("Failed to open %s for write") % output_file); instantiate(templates, input, output); }