Optimized tokenizer a bit (skip_comment)

This commit is contained in:
Pauli Nieminen 2008-02-08 12:03:57 +00:00
parent b0a17b80f4
commit 376408d8a2
5 changed files with 155 additions and 47 deletions

View file

@ -84,6 +84,11 @@ AC_ARG_ENABLE([debug],
[debug=$enableval],
[debug=no])
AC_ARG_ENABLE([profile],
AS_HELP_STRING([--enable-profile], [enable profiling in wesnoth]),
[profile=$enableval],
[profile=no])
if test "x$debug" = "xyes"
then
CXXFLAGS="$CXXFLAGS -O0 -DDEBUG -ggdb3 -W -Wall -ansi"
@ -91,6 +96,10 @@ else
CXXFLAGS="-O2 -W -Wall -ansi $CXXFLAGS"
fi
if test "x$profile" = "xyes"
then
CXXFLAGS="$CXXFLAGS -pg"
fi
# Make tests default in svn version
svn_in_version=`expr match "$WESNOTH_VERSION" '.*svn'`
@ -363,6 +372,11 @@ AC_PROG_CC
AC_PROG_INSTALL
AM_PROG_CC_C_O
# Check for __builtin_expect
AC_TRY_LINK([int foo (int a) { a = __builtin_expect (a, 10); return a == 10 ? 0 : 1; }],
[],
[CPPFLAGS="$CPPFLAGS -DHAVE_BUILTIN_EXPECT"])
have_libx11='no'
if test "$with_x" != 'no'; then

View file

@ -478,6 +478,10 @@ if PREFSDIR
CXXFLAGS += -DPREFERENCES_DIR=\"$(prefsdir)\"
endif
##if HAVE_BUILTIN_EXPECT
## CXXFLAGS += -DHAVE_BUILTIN_EXPECT
##endif
if BOOST_TEST_DYN_LINK
CXXFLAGS += -DBOOST_TEST_DYN_LINK
endif

View file

@ -17,11 +17,13 @@
#include "global.hpp"
#include "util.hpp"
#include "serialization/tokenizer.hpp"
#include "serialization/string_utils.hpp"
#include <iostream>
#include <sstream>
#include <list>
tokenizer::tokenizer() :
current_(EOF),
@ -32,33 +34,98 @@ tokenizer::tokenizer() :
token_()
{
}
const size_t matching_comments = 2;
const std::string comment[] = {"textdomain","line"};
void tokenizer::skip_comment()
{
// Dump comments up to \n
std::string comment;
next_char();
while (current_ != EOF && current_ != '\n') {
comment += current_;
next_char();
std::list<int> matching;
std::list<int>::iterator index;
size_t n;
for (n = 0; n < matching_comments; ++n)
{
matching.push_back(n);
}
n = 0;
this->next_char_fast();
while (current_ != EOF && current_ != '\n') {
for (index = matching.begin(); index != matching.end();)
{
if(comment[*index][n] != static_cast<unsigned char>(current_))
{
index = matching.erase(index);
}
else
{
if (n+1 == comment[*index].size())
{
// We have a match
switch(*index)
{
case 0:
do {
this->next_char_fast();
} while (current_ == ' ' || current_ == '\t');
textdomain_ = "";
while(current_ != EOF && current_ != '\n')
{
textdomain_ += current_;
this->next_char_fast();
}
std::cerr << textdomain_ << " ";
return;
case 1:
do {
this->next_char_fast();
} while (current_ == ' ' || current_ == '\t');
std::string lineno;
while(current_ != EOF && current_ != '\n')
{
if (current_ == ' ' || current_ == '\t')
{
break;
}
lineno += current_;
this->next_char_fast();
}
// Identifies and processes tokenizer directives
std::string::size_type pos = comment.find_first_of(" \t");
if (pos != std::string::npos) {
const std::string word = comment.substr(0, pos);
if (word == "textdomain" && pos < comment.size() - 1) {
textdomain_ = comment.substr(pos + 1);
} else if (word == "line" && pos < comment.size() - 1) {
std::string::size_type pos2 = comment.find_first_of(" \t", pos + 1);
if (current_ == EOF || current_ == '\n')
{
return;
}
do {
this->next_char_fast();
} while (current_ == ' ' || current_ == '\t');
file_ = "";
while (current_ != EOF && current_ != '\n')
{
file_ += current_;
this->next_char_fast();
}
lineno_ = lexical_cast<size_t>(lineno);
std::cerr << lineno_ << " " << file_ << " ";
if (pos2 != std::string::npos) {
lineno_ = lexical_cast<size_t>(comment.substr(pos + 1, pos2 - pos));
file_ = comment.substr(pos2 + 1);
return;
}
}
++index;
}
}
++n;
if (!matching.empty())
{
break;
}
this->next_char_fast();
}
while (current_ != '\n' && current_ != EOF)
{
this->next_char_fast();
}
}
const token& tokenizer::next_token()
@ -70,7 +137,7 @@ const token& tokenizer::next_token()
for(;;) {
while (is_space(current_)) {
token_.leading_spaces += current_;
next_char();
this->next_char_fast();
}
if (current_ != 254)
break;
@ -100,7 +167,7 @@ const token& tokenizer::next_token()
if(current_ == '"' && peek_char() != '"')
break;
if(current_ == '"' && peek_char() == '"')
next_char();
this->next_char_fast();
if (current_ == 254) {
skip_comment();
--lineno_;
@ -119,7 +186,7 @@ const token& tokenizer::next_token()
token_.type = token::STRING;
token_.value += current_;
while(is_alnum(peek_char())) {
next_char();
this->next_char_fast();
token_.value += current_;
}
} else {
@ -170,30 +237,32 @@ tokenizer_string::tokenizer_string(std::string& in) :
in_(in),
offset_(0)
{
next_char();
this->next_char_fast();
}
tokenizer_stream::tokenizer_stream(std::istream& in) :
in_(in)
{
if(in_.good()) {
current_ = in_.get();
}
this->next_char_fast();
}
void tokenizer_stream::next_char()
void tokenizer_stream::next_char_fast()
{
if (current_ == '\n')
lineno_++;
do {
if(in_.good()) {
current_ = in_.get();
} else {
current_ = EOF;
if(LIKELY(in_.good())) {
current_ = in_.get();
if (UNLIKELY(current_ == '\r'))
{
// we assume that there is only one '\r'
if(LIKELY(in_.good())) {
current_ = in_.get();
} else {
current_ = EOF;
}
}
} while(current_ == '\r');
} else {
current_ = EOF;
}
}
int tokenizer_stream::peek_char() const
@ -202,19 +271,22 @@ int tokenizer_stream::peek_char() const
}
void tokenizer_string::next_char()
void tokenizer_string::next_char_fast()
{
if (current_ == '\n')
lineno_++;
do {
if(offset_ < in_.size()) {
current_ = in_[offset_++];
} else {
current_ = EOF;
if(LIKELY(offset_ < in_.size())) {
current_ = in_[offset_++];
if (UNLIKELY(current_ == '\r'))
{
if(LIKELY(offset_ < in_.size())) {
current_ = in_[offset_++];
} else {
current_ = EOF;
}
}
} while(current_ == '\r');
} else {
current_ = EOF;
}
}

View file

@ -18,6 +18,8 @@
#ifndef TOKENIZER_H_INCLUDED
#define TOKENIZER_H_INCLUDED
#include "util.hpp"
#include <istream>
#include <string>
@ -68,7 +70,14 @@ protected:
int current_;
size_t lineno_;
virtual void next_char() = 0;
void next_char()
{
if (UNLIKELY(current_ == '\n'))
lineno_++;
this->next_char_fast();
}
virtual void next_char_fast() = 0;
virtual int peek_char() const = 0;
private:
bool is_space(const int c) const;
@ -88,7 +97,7 @@ public:
tokenizer_stream(std::istream& in);
protected:
void next_char();
void next_char_fast();
int peek_char() const;
private:
@ -102,7 +111,7 @@ public:
tokenizer_string(std::string& in);
protected:
void next_char();
void next_char_fast();
int peek_char() const;
private:

View file

@ -145,6 +145,15 @@ void push_back(T& str, C c)
str[str.size()-1] = c;
}
#ifdef HAVE_BUILTIN_EXPECT
#define LIKELY(a) __builtin_expect((a),1)
#define UNLIKELY(a) __builtin_expect((a),1)
#else
#define LIKELY(a) a
#define UNLIKELY(a) a
#endif
#if 1
# include <SDL_types.h>
typedef Sint32 fixed_t;