Optimized parser:

* Removed tokenizer_string to let compiler to inline a lot called methods * Made parser to create stringstream object from given string for tokenizer * Some minor preprocessor tweaks
2008-02-11 15:43:47 +00:00 · 2008-02-11 15:43:47 +00:00 · dca851d3b0
commit dca851d3b0
parent 25bb143e33
5 changed files with 55 additions and 119 deletions
--- a/2
+++ b/2
@ -49,6 +49,7 @@ Version 1.3.15+svn:
   * fixed an off by one error in [scroll_to]
   * unified the two different max_loop counters and used highest maximum
     (65536).
+   * Fixed abilities filtering to test [filter] allways
 * terrains:
   * added stone bridge terrain over lava and chasms.
 * miscellaneous and bug fixes:
@ -64,6 +65,7 @@ Version 1.3.15+svn:
   * Added some toys&whisles to unit tests
   * Added networking unit tests
   * Optimized MP chat log building
+   * Optimized tokenizer to speed up loading config files
   * Hide race sections having only units with "hide_help=true"
   * Fixed ai handling of unit without attacking weapons (bug #10886)
   * Optimize roads placing of random map
--- a/src/serialization/parser.cpp
+++ b/src/serialization/parser.cpp
@ -54,7 +54,6 @@ class parser
 	parser& operator=(const parser&);
 public:
 	parser(config& cfg, std::istream& in);
-	parser(config& cfg, std::string& in);
 	~parser();
 	void operator() (std::string* error_log=NULL);

@ -90,17 +89,11 @@ private:

 parser::parser(config &cfg, std::istream &in) :
 		cfg_(cfg), 
-		tok_(new tokenizer_stream(in)),
+		tok_(new tokenizer(in)),
 		elements()
 {
 }

-parser::parser(config &cfg, std::string &in) :
-		cfg_(cfg),
-		tok_(new tokenizer_string(in)),
-		elements()
-{
-}

 parser::~parser()
 {
@ -356,7 +349,8 @@ void read(config &cfg, std::istream &in, std::string* error_log)

 void read(config &cfg, std::string &in, std::string* error_log)
 {
-	parser(cfg, in)(error_log);
+	std::stringstream ss(in);
+	parser(cfg, ss)(error_log);
 }

 void read_gz(config &cfg, std::istream &file, std::string* error_log)
--- a/src/serialization/preprocessor.cpp
+++ b/src/serialization/preprocessor.cpp
@ -134,13 +134,16 @@ int preprocessor_streambuf::underflow()
 		// The buffer has been completely read; fill it again.
 		// Keep part of the previous buffer, to ensure putback capabilities.
 		sz = out_buffer_.size();
+		buffer_.str(std::string());
 		if (sz > 3) {
-			out_buffer_ = out_buffer_.substr(sz - 3);
+			buffer_ << out_buffer_.substr(sz - 3);
 			sz = 3;
 		}
-		buffer_.str(std::string());
-		buffer_ << out_buffer_;
-        buffer_size_ = out_buffer_.size();
+		else
+		{
+			buffer_ << out_buffer_;
+		}
+	        buffer_size_ = sz;
 	} else {
 		// The internal get-data pointer is null
 	}
@ -282,7 +285,8 @@ class preprocessor_data: preprocessor
 	void push_token(char);
 	void pop_token();
 	void put(char);
-	void put(std::string const &);
+	void put(std::string const & /*, int change_line 
+	= 0 */);
 public:
 	preprocessor_data(preprocessor_streambuf &, std::istream *,
 	                  std::string const &history,
@ -315,9 +319,10 @@ preprocessor_file::preprocessor_file(preprocessor_streambuf &t,
 bool preprocessor_file::get_chunk()
 {
 	while (pos_ != end_) {
-		std::string const &name = *(pos_++);
+		const std::string &name = *(pos_++);
 		unsigned sz = name.size();
-		if (sz < 5 || !std::equal(name.begin() + sz - 4, name.end(), ".cfg"))
+		// Use reverse iterator to optimize testing
+		if (sz < 5 || !std::equal(name.rbegin(), name.rbegin() + 4, "gfc."))
 			continue;
 		new preprocessor_file(target_, name);
 		return true;
@ -458,7 +463,7 @@ void preprocessor_data::put(char c)
    target_.buffer_size_ += 1;
 }

-void preprocessor_data::put(std::string const &s)
+void preprocessor_data::put(std::string const &s /*, int line_change*/)
 {
 	if (skipping_)
 		return;
@ -467,9 +472,8 @@ void preprocessor_data::put(std::string const &s)
 		return;
 	}
 	target_.buffer_ << s;
+//	target_.linenum_ += line_change;
    target_.buffer_size_ += s.size();
-	target_.linenum_ += std::count(s.begin(), s.end(), '\n');
-	target_.linenum_ -= std::count(s.begin(), s.end(), '\376');
 }

 bool preprocessor_data::get_chunk()
@ -510,6 +514,7 @@ bool preprocessor_data::get_chunk()
 			buffer += d;
 		}
 		buffer += '\n';
+		// line_change = 1-1 = 0
 		put(buffer);
 	} else if (c == '"') {
 		if (token.type == '"') {
--- a/src/serialization/tokenizer.cpp
+++ b/src/serialization/tokenizer.cpp
@ -23,21 +23,22 @@

 #include <iostream>
 #include <sstream>
-#include <list>

-tokenizer::tokenizer() :
+tokenizer::tokenizer(std::istream& in) :
 	current_(EOF),
 	lineno_(1),
 	textdomain_(),
 	file_(),
 	tokenstart_lineno_(),
-	token_()
+	token_(),
+	in_(in)
 {
+	next_char_fast();
 }

 void tokenizer::skip_comment()
 {
-	this->next_char_fast();
+	next_char_fast();
 	if(current_ != '\n' && current_ != EOF) {
 		if(current_ == 't') {
 			// When the string 'textdomain[ |\t] is matched the rest of the line is
@ -124,7 +125,7 @@ const token& tokenizer::next_token()
 	for(;;) {
 		while (is_space(current_)) {
 			token_.leading_spaces += current_;
-			this->next_char_fast();
+			next_char_fast();
 		}
 		if (current_ != 254)
 			break;
@ -154,7 +155,7 @@ const token& tokenizer::next_token()
 			if(current_ == '"' && peek_char() != '"')
 				break;
 			if(current_ == '"' && peek_char() == '"')
-				this->next_char_fast();
+				next_char_fast();
 			if (current_ == 254) {
 				skip_comment();
 				--lineno_;
@ -173,7 +174,7 @@ const token& tokenizer::next_token()
 			token_.type = token::STRING;
 			token_.value += current_;
 			while(is_alnum(peek_char())) {
-				this->next_char_fast();
+				next_char_fast();
 				token_.value += current_;
 			}
 		} else {
@ -220,65 +221,4 @@ std::string& tokenizer::textdomain()
 }


-tokenizer_string::tokenizer_string(std::string& in) :
-	in_(in),
-	offset_(0)
-{
-	this->next_char_fast();
-}
-
-
-tokenizer_stream::tokenizer_stream(std::istream& in) :
-	in_(in)
-{
-	this->next_char_fast();
-}
-
-void tokenizer_stream::next_char_fast()
-{
-	if(LIKELY(in_.good())) {
-		current_ = in_.get();
-		if (UNLIKELY(current_ == '\r'))
-		{
-			// we assume that there is only one '\r'
-			if(LIKELY(in_.good())) {
-				current_ = in_.get();
-			} else {
-				current_ = EOF;
-			}
-		}
-	} else {
-		current_ = EOF;
-	}
-}
-
-int tokenizer_stream::peek_char() const
-{
-	return in_.peek();
-}
-
-
-void tokenizer_string::next_char_fast()
-{
-
-	if(LIKELY(offset_ < in_.size())) {
-		current_ = in_[offset_++];
-		if (UNLIKELY(current_ == '\r'))
-		{
-			if(LIKELY(offset_ < in_.size())) {
-				current_ = in_[offset_++];
-			} else {
-				current_ = EOF;
-			}
-		}
-	} else {
-		current_ = EOF;
-	}
-	
-}
-
-int tokenizer_string::peek_char() const
-{
-	return in_[offset_];
-}

--- a/src/serialization/tokenizer.hpp
+++ b/src/serialization/tokenizer.hpp
@ -58,8 +58,8 @@ struct token
 class tokenizer
 {
 public:
-	tokenizer();
-	virtual ~tokenizer() {}
+	tokenizer(std::istream& in);
+	~tokenizer() {}

 	const token& next_token();
 	const token& current_token() const;
@ -67,18 +67,40 @@ public:
 	std::string& textdomain();

 protected:
+	tokenizer();
 	int current_;
 	size_t lineno_;

-	void next_char()
+	inline void next_char()
 	{
 		if (UNLIKELY(current_ == '\n'))
 			lineno_++;
 		this->next_char_fast();
 	}

-	virtual void next_char_fast() = 0;
-	virtual int peek_char() const = 0;
+	inline void next_char_fast()
+	{
+		if(LIKELY(in_.good())) {
+			current_ = in_.get();
+			if (UNLIKELY(current_ == '\r'))
+			{
+				// we assume that there is only one '\r'
+				if(LIKELY(in_.good())) {
+					current_ = in_.get();
+				} else {
+					current_ = EOF;
+				}
+			}
+		} else {
+			current_ = EOF;
+		}
+	}
+
+	inline int peek_char() const
+	{
+		return in_.peek();
+	}
+
 private:
 	bool is_space(const int c) const;
 	bool is_alnum(const int c) const;
@ -88,35 +110,8 @@ private:
 	std::string file_;
 	size_t tokenstart_lineno_;
 	token token_;
-};
-
-//! tokenizer which uses an istream as input
-class tokenizer_stream : public tokenizer
-{
-public:
-	tokenizer_stream(std::istream& in);
-
-protected:
-	void next_char_fast();
-	int peek_char() const;
-
-private:
 	std::istream& in_;
 };

-//! tokenizer which uses an string as input
-class tokenizer_string : public tokenizer
-{
-public:
-	tokenizer_string(std::string& in);
-
-protected:
-	void next_char_fast();
-	int peek_char() const;
-
-private:
-	std::string& in_;
-	size_t offset_;
-};
 #endif