Optimized tokenizer a bit (skip_comment)
This commit is contained in:
parent
b0a17b80f4
commit
376408d8a2
5 changed files with 155 additions and 47 deletions
14
configure.ac
14
configure.ac
|
@ -84,6 +84,11 @@ AC_ARG_ENABLE([debug],
|
|||
[debug=$enableval],
|
||||
[debug=no])
|
||||
|
||||
AC_ARG_ENABLE([profile],
|
||||
AS_HELP_STRING([--enable-profile], [enable profiling in wesnoth]),
|
||||
[profile=$enableval],
|
||||
[profile=no])
|
||||
|
||||
if test "x$debug" = "xyes"
|
||||
then
|
||||
CXXFLAGS="$CXXFLAGS -O0 -DDEBUG -ggdb3 -W -Wall -ansi"
|
||||
|
@ -91,6 +96,10 @@ else
|
|||
CXXFLAGS="-O2 -W -Wall -ansi $CXXFLAGS"
|
||||
fi
|
||||
|
||||
if test "x$profile" = "xyes"
|
||||
then
|
||||
CXXFLAGS="$CXXFLAGS -pg"
|
||||
fi
|
||||
|
||||
# Make tests default in svn version
|
||||
svn_in_version=`expr match "$WESNOTH_VERSION" '.*svn'`
|
||||
|
@ -363,6 +372,11 @@ AC_PROG_CC
|
|||
AC_PROG_INSTALL
|
||||
AM_PROG_CC_C_O
|
||||
|
||||
# Check for __builtin_expect
|
||||
AC_TRY_LINK([int foo (int a) { a = __builtin_expect (a, 10); return a == 10 ? 0 : 1; }],
|
||||
[],
|
||||
[CPPFLAGS="$CPPFLAGS -DHAVE_BUILTIN_EXPECT"])
|
||||
|
||||
have_libx11='no'
|
||||
if test "$with_x" != 'no'; then
|
||||
|
||||
|
|
|
@ -478,6 +478,10 @@ if PREFSDIR
|
|||
CXXFLAGS += -DPREFERENCES_DIR=\"$(prefsdir)\"
|
||||
endif
|
||||
|
||||
##if HAVE_BUILTIN_EXPECT
|
||||
## CXXFLAGS += -DHAVE_BUILTIN_EXPECT
|
||||
##endif
|
||||
|
||||
if BOOST_TEST_DYN_LINK
|
||||
CXXFLAGS += -DBOOST_TEST_DYN_LINK
|
||||
endif
|
||||
|
|
|
@ -17,11 +17,13 @@
|
|||
|
||||
#include "global.hpp"
|
||||
|
||||
#include "util.hpp"
|
||||
#include "serialization/tokenizer.hpp"
|
||||
#include "serialization/string_utils.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <list>
|
||||
|
||||
tokenizer::tokenizer() :
|
||||
current_(EOF),
|
||||
|
@ -32,33 +34,98 @@ tokenizer::tokenizer() :
|
|||
token_()
|
||||
{
|
||||
}
|
||||
const size_t matching_comments = 2;
|
||||
const std::string comment[] = {"textdomain","line"};
|
||||
|
||||
void tokenizer::skip_comment()
|
||||
{
|
||||
// Dump comments up to \n
|
||||
std::string comment;
|
||||
next_char();
|
||||
while (current_ != EOF && current_ != '\n') {
|
||||
comment += current_;
|
||||
next_char();
|
||||
std::list<int> matching;
|
||||
std::list<int>::iterator index;
|
||||
size_t n;
|
||||
for (n = 0; n < matching_comments; ++n)
|
||||
{
|
||||
matching.push_back(n);
|
||||
}
|
||||
n = 0;
|
||||
this->next_char_fast();
|
||||
while (current_ != EOF && current_ != '\n') {
|
||||
for (index = matching.begin(); index != matching.end();)
|
||||
{
|
||||
if(comment[*index][n] != static_cast<unsigned char>(current_))
|
||||
{
|
||||
index = matching.erase(index);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (n+1 == comment[*index].size())
|
||||
{
|
||||
// We have a match
|
||||
switch(*index)
|
||||
{
|
||||
case 0:
|
||||
do {
|
||||
this->next_char_fast();
|
||||
} while (current_ == ' ' || current_ == '\t');
|
||||
textdomain_ = "";
|
||||
while(current_ != EOF && current_ != '\n')
|
||||
{
|
||||
textdomain_ += current_;
|
||||
this->next_char_fast();
|
||||
}
|
||||
std::cerr << textdomain_ << " ";
|
||||
return;
|
||||
case 1:
|
||||
do {
|
||||
this->next_char_fast();
|
||||
} while (current_ == ' ' || current_ == '\t');
|
||||
std::string lineno;
|
||||
while(current_ != EOF && current_ != '\n')
|
||||
{
|
||||
if (current_ == ' ' || current_ == '\t')
|
||||
{
|
||||
break;
|
||||
}
|
||||
lineno += current_;
|
||||
this->next_char_fast();
|
||||
}
|
||||
|
||||
// Identifies and processes tokenizer directives
|
||||
std::string::size_type pos = comment.find_first_of(" \t");
|
||||
if (pos != std::string::npos) {
|
||||
const std::string word = comment.substr(0, pos);
|
||||
|
||||
if (word == "textdomain" && pos < comment.size() - 1) {
|
||||
textdomain_ = comment.substr(pos + 1);
|
||||
} else if (word == "line" && pos < comment.size() - 1) {
|
||||
std::string::size_type pos2 = comment.find_first_of(" \t", pos + 1);
|
||||
if (current_ == EOF || current_ == '\n')
|
||||
{
|
||||
return;
|
||||
}
|
||||
do {
|
||||
this->next_char_fast();
|
||||
} while (current_ == ' ' || current_ == '\t');
|
||||
file_ = "";
|
||||
while (current_ != EOF && current_ != '\n')
|
||||
{
|
||||
file_ += current_;
|
||||
this->next_char_fast();
|
||||
}
|
||||
lineno_ = lexical_cast<size_t>(lineno);
|
||||
std::cerr << lineno_ << " " << file_ << " ";
|
||||
|
||||
if (pos2 != std::string::npos) {
|
||||
lineno_ = lexical_cast<size_t>(comment.substr(pos + 1, pos2 - pos));
|
||||
file_ = comment.substr(pos2 + 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
++index;
|
||||
}
|
||||
}
|
||||
++n;
|
||||
if (!matching.empty())
|
||||
{
|
||||
break;
|
||||
}
|
||||
this->next_char_fast();
|
||||
}
|
||||
|
||||
while (current_ != '\n' && current_ != EOF)
|
||||
{
|
||||
this->next_char_fast();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
const token& tokenizer::next_token()
|
||||
|
@ -70,7 +137,7 @@ const token& tokenizer::next_token()
|
|||
for(;;) {
|
||||
while (is_space(current_)) {
|
||||
token_.leading_spaces += current_;
|
||||
next_char();
|
||||
this->next_char_fast();
|
||||
}
|
||||
if (current_ != 254)
|
||||
break;
|
||||
|
@ -100,7 +167,7 @@ const token& tokenizer::next_token()
|
|||
if(current_ == '"' && peek_char() != '"')
|
||||
break;
|
||||
if(current_ == '"' && peek_char() == '"')
|
||||
next_char();
|
||||
this->next_char_fast();
|
||||
if (current_ == 254) {
|
||||
skip_comment();
|
||||
--lineno_;
|
||||
|
@ -119,7 +186,7 @@ const token& tokenizer::next_token()
|
|||
token_.type = token::STRING;
|
||||
token_.value += current_;
|
||||
while(is_alnum(peek_char())) {
|
||||
next_char();
|
||||
this->next_char_fast();
|
||||
token_.value += current_;
|
||||
}
|
||||
} else {
|
||||
|
@ -170,30 +237,32 @@ tokenizer_string::tokenizer_string(std::string& in) :
|
|||
in_(in),
|
||||
offset_(0)
|
||||
{
|
||||
next_char();
|
||||
this->next_char_fast();
|
||||
}
|
||||
|
||||
|
||||
tokenizer_stream::tokenizer_stream(std::istream& in) :
|
||||
in_(in)
|
||||
{
|
||||
if(in_.good()) {
|
||||
current_ = in_.get();
|
||||
}
|
||||
this->next_char_fast();
|
||||
}
|
||||
|
||||
void tokenizer_stream::next_char()
|
||||
void tokenizer_stream::next_char_fast()
|
||||
{
|
||||
if (current_ == '\n')
|
||||
lineno_++;
|
||||
|
||||
do {
|
||||
if(in_.good()) {
|
||||
current_ = in_.get();
|
||||
} else {
|
||||
current_ = EOF;
|
||||
if(LIKELY(in_.good())) {
|
||||
current_ = in_.get();
|
||||
if (UNLIKELY(current_ == '\r'))
|
||||
{
|
||||
// we assume that there is only one '\r'
|
||||
if(LIKELY(in_.good())) {
|
||||
current_ = in_.get();
|
||||
} else {
|
||||
current_ = EOF;
|
||||
}
|
||||
}
|
||||
} while(current_ == '\r');
|
||||
} else {
|
||||
current_ = EOF;
|
||||
}
|
||||
}
|
||||
|
||||
int tokenizer_stream::peek_char() const
|
||||
|
@ -202,19 +271,22 @@ int tokenizer_stream::peek_char() const
|
|||
}
|
||||
|
||||
|
||||
void tokenizer_string::next_char()
|
||||
void tokenizer_string::next_char_fast()
|
||||
{
|
||||
|
||||
if (current_ == '\n')
|
||||
lineno_++;
|
||||
|
||||
do {
|
||||
if(offset_ < in_.size()) {
|
||||
current_ = in_[offset_++];
|
||||
} else {
|
||||
current_ = EOF;
|
||||
if(LIKELY(offset_ < in_.size())) {
|
||||
current_ = in_[offset_++];
|
||||
if (UNLIKELY(current_ == '\r'))
|
||||
{
|
||||
if(LIKELY(offset_ < in_.size())) {
|
||||
current_ = in_[offset_++];
|
||||
} else {
|
||||
current_ = EOF;
|
||||
}
|
||||
}
|
||||
} while(current_ == '\r');
|
||||
} else {
|
||||
current_ = EOF;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#ifndef TOKENIZER_H_INCLUDED
|
||||
#define TOKENIZER_H_INCLUDED
|
||||
|
||||
#include "util.hpp"
|
||||
|
||||
#include <istream>
|
||||
#include <string>
|
||||
|
||||
|
@ -68,7 +70,14 @@ protected:
|
|||
int current_;
|
||||
size_t lineno_;
|
||||
|
||||
virtual void next_char() = 0;
|
||||
void next_char()
|
||||
{
|
||||
if (UNLIKELY(current_ == '\n'))
|
||||
lineno_++;
|
||||
this->next_char_fast();
|
||||
}
|
||||
|
||||
virtual void next_char_fast() = 0;
|
||||
virtual int peek_char() const = 0;
|
||||
private:
|
||||
bool is_space(const int c) const;
|
||||
|
@ -88,7 +97,7 @@ public:
|
|||
tokenizer_stream(std::istream& in);
|
||||
|
||||
protected:
|
||||
void next_char();
|
||||
void next_char_fast();
|
||||
int peek_char() const;
|
||||
|
||||
private:
|
||||
|
@ -102,7 +111,7 @@ public:
|
|||
tokenizer_string(std::string& in);
|
||||
|
||||
protected:
|
||||
void next_char();
|
||||
void next_char_fast();
|
||||
int peek_char() const;
|
||||
|
||||
private:
|
||||
|
|
|
@ -145,6 +145,15 @@ void push_back(T& str, C c)
|
|||
str[str.size()-1] = c;
|
||||
}
|
||||
|
||||
#ifdef HAVE_BUILTIN_EXPECT
|
||||
#define LIKELY(a) __builtin_expect((a),1)
|
||||
#define UNLIKELY(a) __builtin_expect((a),1)
|
||||
#else
|
||||
#define LIKELY(a) a
|
||||
#define UNLIKELY(a) a
|
||||
#endif
|
||||
|
||||
|
||||
#if 1
|
||||
# include <SDL_types.h>
|
||||
typedef Sint32 fixed_t;
|
||||
|
|
Loading…
Add table
Reference in a new issue