
#include <vector>
#include <fstream>

using namespace std;

#include "Tokenizer.h"
#include <string.h>
#include <stdio.h>

#include "../cutils.h"

#define SPACE_CHAR	' '

// #define debug_tok_lang(x) cerr << x << endl
#define debug_tok_lang(x)

// GNU version of index not defined in MingW, ...
// Replaces by calls to strcontains
int strcontains(const char* str, char ch) {
	char cr = *str;
	while (cr != 0) {
		if (cr == ch) return 1;
		cr = *(++str);
	}
	return 0;
}

int strposition(const char* str, char ch) {
	int  pos = 0;
	char cr = *str;
	while (cr != 0) {
		if (cr == ch) return pos;
		cr = str[++pos];
	}
	return -1;
}

bool is_integer(const string& strg) {
	int len = strg.length();
	if (len == 0) return false;
	for (int i = 0; i < len; i++) {
		char ch = strg[i];
		if (ch < '0' || ch > '9') {
			if (i != 0) return false;
			if (ch != '+' && ch != '-') return false;
		}
	}
	return true;
}

bool is_integer_e(const string& strg) {
	int len = strg.length();
	if (len <= 1) return false;
	char ch = strg[len-1];
	if (ch != 'e' && ch != 'E') return false;
	for (int i = 0; i < len-1; i++) {
		char ch = strg[i];
		if (ch < '0' || ch > '9') {
			if (i != 0) return false;
			if (ch != '+' && ch != '-') return false;
		}
	}
	return true;
}

bool is_float(const string& strg) {
	int len = strg.length();
	if (len == 0) return 0;
	int pos = 0;
	char ch = strg[pos];
	// cout << "is_float(" << strg << ")" << endl;
	if (ch == '+' || ch == '-') {
		pos++;
		ch = pos < len ? strg[pos] : 0;
	}
	bool has_dot = false;
	if (ch == '.') {
		pos++; has_dot = true;
		ch = pos < len ? strg[pos] : 0;
	}
	int count1 = 0, count2 = 0;
	while (ch >= '0' && ch <= '9') {
		pos++; count1++;
		ch = pos < len ? strg[pos] : 0;
	}
	if (!has_dot) {
		if (ch == '.') {
			pos++; has_dot = true;
			ch = pos < len ? strg[pos] : 0;
			while (ch >= '0' && ch <= '9') {
				pos++; count2++;
				ch = pos < len ? strg[pos] : 0;
			}
		}
	} else {
		count2 = count1;
		count1 = 0;
	}
	if (count1 > 0 || count2 > 0) {
		if (ch == 'e' || ch == 'E') {
			pos++;
			ch = pos < len ? strg[pos] : 0;
			if (ch == '+' || ch == '-') {
				pos++;
				ch = pos < len ? strg[pos] : 0;
			}
			int counte = 0;
			while (ch >= '0' && ch <= '9') {
				pos++; counte++;
				ch = pos < len ? strg[pos] : 0;
			}
			return counte > 0;
		} else {
			return pos == len;
		}
	} else {
		return false;
	}
}

void strip_string_markers(string& strg) {
	int len = strg.length();
	if (len >= 2) {
		char ch0 = strg[0];
		if (ch0 == '"' || ch0 == '\'') {
			strg.erase(strg.begin());
			strg.resize(len-2);
		}
	}
}

ostream& mtab(ostream &os, int nb) {
	for (int i = 0; i < nb; i++) os << '\t';
	return os;
}

TokenizerPos::TokenizerPos() {
	m_col = -10; m_line = -10;
}

TokenizerPos::TokenizerPos(const TokenizerPos& pos) {
	m_col = pos.m_col;
	m_line = pos.m_line;
}

void TokenizerPos::set(int line, int column) {
	m_line = line;
	m_col = column;
}

void TokenizerPos::incRow() {
	m_line++;
	m_col = 0;
}

ostream& TokenizerPos::write(ostream &os) const {
	if (m_col < 0 && m_line < 0) os << "?:?";
	else os << m_line << ":" << (m_col-1);
	return os;
}

int TokenizerPos::equals(TokenizerPos& pos) const {
	return (m_col == pos.m_col && m_line == pos.m_line);
}

string TokenizerPos::getString(int tab1, int tab2) const {
	int i, pos = 0;
	char lines[15], cols[15], res[50];
	if (m_line < 0) strcpy(lines, "?");
	else sprintf(lines, "%d", m_line);
	if (m_col < 0) strcpy(cols, "?");
	else sprintf(cols, "%d", m_col-1);
	for (i = tab1-strlen(lines); i > 0; i--) {
		res[pos++] = ' ';
	}
	i = 0;
	while (lines[i] != 0) {
		res[pos++] = lines[i++];
	}
	i = 0;
	res[pos++] = ':';
	while (cols[i] != 0) {
		res[pos++] = cols[i++];
	}
	for (i = tab2-strlen(cols); i > 0; i--) {
		res[pos++] = ' ';
	}
	res[pos] = 0;
	return string(res);
}

void g_throw_parser_error(const string& err) throw(ParserError) {
	TokenizerPos pos;
	pos.setColumn(-1);
	ParserError err_exp(err, pos, NULL);
	throw err_exp;
}

void g_throw_parser_error(const char* err, int idx) throw(ParserError) {
	char str[30];
	sprintf(str, "%d", idx);
	TokenizerPos pos;
	pos.setColumn(-1);
	ParserError err_exp(string(err)+str, pos, NULL);
	throw err_exp;
}

ParserError::ParserError(const string& txt, const TokenizerPos& pos, const char* fname) {
	m_txt = txt;
	m_pos = pos;
	m_flag = 0;
	if (fname == NULL) m_fname = "";
	else m_fname = fname;
}

ParserError::ParserError(const ParserError& err) {
	m_flag = err.m_flag;
	m_txt = err.m_txt;
	m_fname = err.m_fname;
	m_parsestr = err.m_parsestr;
	m_pos = err.m_pos;
}

void ParserError::setParserString(const char* str) {
	m_parsestr = str;
	m_flag |= TOK_PARSER_ERROR_PSTRING;
}

ostream& ParserError::write(ostream& os) const {
	os << m_txt << " at " << m_pos;
	if (m_fname != "") {
		os << ", while reading '" << m_fname << "'";
	}
	return os;
}

int ParserError::equals(ParserError* err) const {
	return (m_txt == err->m_txt &&
	        m_fname == err->m_fname &&
		m_pos.equals(err->m_pos));
}

TokenizerLangHash::TokenizerLangHash() {
}

TokenizerLangHash::TokenizerLangHash(const string& name) {
}

TokenizerLangHash::~TokenizerLangHash() {
}

ostream& TokenizerLangHash::write(ostream &os, int depth) const {
	if (!m_default.isNull()) {
		mtab(os, depth);
		os << m_default->getName() << endl;
	}
	for (const_iterator i = begin(); i != end(); i++ ) {
		const name_hash_key& key = i->first;
		const TokenizerLangHash* hash = i->second.get();
		mtab(os, depth);
		os << key << endl;
		hash->write(os, depth + 1);
	}
	return os;
}

TokenAndPos::TokenAndPos() {
};

TokenAndPos::~TokenAndPos() {
};

TokenAndPos::TokenAndPos(const TokenAndPos& copy) {
	m_token = copy.m_token;
	m_pos = copy.m_pos;
	m_space = copy.m_space;
}

TokenAndPos::TokenAndPos(const string& token, const TokenizerPos& pos, char space) {
	m_token = token; m_pos = pos; m_space = space;
};

TokenizerLangElem* Tokenizer::findLangElem(const TokenizerLangHash* hash) {
	string backup_token = m_token;
	TokenizerPos backup_pos = m_token_start;
	get_token_2();
	if (m_token.length() != 0) {
		debug_tok_lang("Tokenizer::first part of token '" << m_token << "'");
		if (m_space_before != 0) {
			pushback_token();
		} else {
			TokenizerLangElem* result = findLangElem2(hash);
			if (result != NULL) {
				debug_tok_lang("Tokenizer::recursive call returns NULL");
				m_token_start = backup_pos;
				return result;
			}
		}
	}
	TokenizerLangElem* elem = hash->getDefault();
	debug_tok_lang("Tokenizer::default elem " << elem);
	if (elem != NULL) {
		return elem;
	} else {
		m_token = backup_token;
		m_token_start = backup_pos;
		return NULL;
	}
}

TokenizerLangElem* Tokenizer::findLangElem2(const TokenizerLangHash* hash) {
	TokenAndPos my_tkpos(m_token, m_token_start, m_space_before);
	TokenizerLangHash::const_iterator i = hash->find(my_tkpos.getToken());
	if (i != hash->end()) {
		get_token_2();
		const TokenizerLangHash* child = i->second.get();
		if (m_token.length() != 0) {
			if (m_space_before != 0) {
				pushback_token();
			} else {
				TokenizerLangElem* result = findLangElem2(child);
				if (result != NULL) return result;
			}
		}
		TokenizerLangElem* elem = child->getDefault();
		if (elem != NULL) return elem;
		pushback_token(my_tkpos);
		return NULL;
	} else {
		pushback_token(my_tkpos);
		return NULL;
	}
}

void TokenizerLangHash::addLangElem(Tokenizer* tokens, TokenizerLangElem* elem) {
	string& token = tokens->try_next_token();
	if (token.length() == 0) {
		m_default = elem;
	} else {
		elem->addName(token);
		TokenizerLangHashPtr hash = try_add(token);
		hash.addLangElem(tokens, elem);
	}
}

void TokenizerLangHash::addLangElem(const vector<string>& toks, TokenizerLangElem* elem, unsigned int pos) {
	if (pos >= toks.size()) {
		m_default = elem;
	} else {
		TokenizerLangHashPtr hash = try_add(toks[pos]);
		hash.addLangElem(toks, elem, pos+1);
	}
}

TokenizerLanguage::TokenizerLanguage() {
	m_index = NULL;
	m_enable_c_comm = 0;
	m_enable_cpp_comm = 0;
	m_parse_strings = 1;
	m_decimal_dot = 0;
}

TokenizerLanguage::~TokenizerLanguage() {
	if (m_index != NULL) delete[] m_index;
}

void TokenizerLanguage::addElementIndex(int size) {
	if (m_index != NULL) delete[] m_index;
	m_index = new TokLangElemPtr[size];
}

void TokenizerLanguage::setElement(int i, TokenizerLangElem* elem) {
	m_index[i] = elem;
}

void TokenizerLanguage::addSubLanguages(int nb) {
	for (int i = 0; i < nb; i++) {
		m_sublanguage.push_back(new TokenizerLangHash());
	}
}

void TokenizerLanguage::addLanguageElem(int sublang, const char* elem) {
	StringTokenizer parser(elem, this);
	TokenizerLangElem* le = new TokenizerLangElem();
	m_sublanguage[sublang].addLangElem(&parser, le);
}

void TokenizerLanguage::addLanguageElem(int sublang, const vector<string>& toks, TokenizerLangElem* elem) {
	m_sublanguage[sublang].addLangElem(toks, elem, 0);
}

void TokenizerLanguage::resetCharMaps() {
	m_one_char_tokens.clear();
	m_space_tokens.clear();
	m_line_comment_tokens.clear();
}

void TokenizerLanguage::initDefault() {
	initDefaultSingleCharTokens();
	initDefaultSpaceTokens();
}

void TokenizerLanguage::initDefaultSingleCharTokens() {
	setSingleCharTokens(",.:;[]{}()+-*/=#<>|^@");
}

void TokenizerLanguage::initDefaultSpaceTokens() {
	setSpaceTokens(" \t\n\r");
}

TokenizerLanguageMultiLevel::TokenizerLanguageMultiLevel() {
	for (int i = 0; i < 255; i++) {
		m_open_tokens[i] = 0;
	}
}

void TokenizerLanguageMultiLevel::setOpenClose(char open, char close) {
	m_open_tokens[open] = close;
	m_close_tokens.set(close);
}

Tokenizer::Tokenizer() {
	m_language = new TokenizerLanguage();
	init();
}

Tokenizer::Tokenizer(TokenizerLanguage* language) {
	m_language = language;
	init();
}

Tokenizer::~Tokenizer() {
}

void Tokenizer::select_language(int i) {
	if (i == TOKENIZER_LANG_NONE) m_langhash = NULL;
	else m_langhash = m_language->getLanguage(i);
}

void Tokenizer::init() {
	m_fname = NULL;
	m_langhash = NULL;
	reset_all();
}

void Tokenizer::reset_all() {
	m_token_at_end = 0;
	m_token_has_pushback = 0;
	m_token_has_pushback_ch = 0;
	m_token_count.set(1, 0);
	m_token_start.set(0, 0);
	m_space_before = 0;
	m_space_kind = 0;
	m_pushback_tokens.clear();
}

TokenizerLanguage* Tokenizer::get_language() {
	return m_language;
}

void Tokenizer::reset_position() {
	m_token_count.set(1, 0);
	m_token_start.set(0, 0);
}


int Tokenizer::has_more_tokens() throw(ParserError) {
  if (m_token_at_end == 1) return 0;
  char token_ch = token_read_sig_char();
  if (m_token_at_end == 1) {
    return 0;
  } else {
    token_pushback_ch(token_ch);
    return 1;
  }
}

string& Tokenizer::next_token() throw(ParserError) {
	get_check_token();
//	cerr << m_token_start << "\t" << m_token << endl;
	return m_token;
}

string& Tokenizer::try_next_token() throw(ParserError) {
	get_token();
	return m_token;
}

double Tokenizer::next_double() throw(ParserError) {
	char *endptr;
	get_check_token();
	return strtod(m_token.c_str(), &endptr);
}

int Tokenizer::next_integer() throw(ParserError) {
	char* ptr;
	get_check_token();
	int result = strtol(m_token.c_str(), &ptr, 10);
	if (*ptr != 0) throw error("expected integer, not '" + m_token + "'");
	return result;
}

int Tokenizer::try_next_integer(int *i) {
	char* ptr;
	get_check_token();
	*i = strtol(m_token.c_str(), &ptr, 10);
	return *ptr != 0 ? 0 : 1;
}

void Tokenizer::get_token() throw(ParserError) {
	get_token_2();
	if ((!m_langhash.isNull()) && m_token.length() > 0) {
		TokenizerLangHash::const_iterator i = m_langhash->find(m_token);
		if (i != m_langhash->end()) {
			const TokenizerLangElem* elem = findLangElem(i->second.get());
			if (elem != NULL) m_token = elem->getName();
		}
	}
}

TokenizerLangElem* Tokenizer::try_find_lang_elem(int i) {
  get_token_2();
  if (m_token.length() > 0) {
    const TokenizerLangHash* hash = m_language->getLanguage(i).get();
    TokenizerLangHash::const_iterator i = hash->find(m_token);
    if (i != hash->end()) {
      debug_tok_lang("Tokenizer::find first part " << m_token);
      TokenizerLangElem* elem = findLangElem(i->second.get());
      if (elem != NULL) {
        debug_tok_lang("Tokenizer::found elem " << elem->getName());
	return elem;
      } else {
        debug_tok_lang("Tokenizer::call returns NULL, pushback " << m_token);
	pushback_token();
      }
    } else {
      pushback_token();
    }
  }
  return NULL;
}

void Tokenizer::get_token_2() throw(ParserError) {
	if (m_token_has_pushback > 0) {
		const TokenAndPos& tkpos = m_pushback_tokens.back();
		m_token = tkpos.getToken();
		m_token_start = tkpos.getPos();
		m_space_before = tkpos.getSpace();
		m_pushback_tokens.pop_back();
		m_token_has_pushback--;
		return;
	}
	m_space_before = 0;
	char token_ch = token_read_sig_char();
	m_token_start = m_token_count;
	if (m_token_at_end == 1) {
		 m_token = "";
		 return;
	}
	// String with first type of marks \'
	if ((token_ch == '\'')&&m_language->getParseStrings()) {
		char p1_ch = 0, p2_ch = 0;
		m_token = token_ch;
		do {
			token_ch = token_read_char_no_comment();
			m_token += token_ch;
			// Support ''', '\'' and '\\' allowed by some prologs
			if (token_ch == '\'' && (p1_ch != '\\' || p2_ch == '\\')) {
				token_ch = token_read_char_no_comment();
				if (token_ch == '\'') {
					m_token += token_ch;
				} else {
					token_pushback_ch(token_ch);
					return;
				}
			}
			p2_ch = p1_ch;
			p1_ch = token_ch;
		} while (m_token_at_end == 0);
		throw error("unterminated string constant");
	}
	// String with second type of marks \"
	if ((token_ch  == '\"')&&(m_language->getParseStrings())) {
		char prev_ch = 0;
		m_token = token_ch;
		do {
			token_ch = token_read_char_no_comment();
			m_token += token_ch;
			if (token_ch == '\"' && prev_ch != '\\') return;
			prev_ch = token_ch;
		} while (m_token_at_end == 0);
		throw error("unterminated string constant");
	}
	// No string found
	if (m_language->isSingleCharToken(token_ch)) {
		if (m_language->isDecimalDot(token_ch)) {
			// Number starting with decimal dot
			m_token = "";
			read_number_term(token_ch, false, false);
		} else {
			m_token = token_ch;
		}
	} else {
		m_token = token_ch;
		do {
			token_ch = token_read_char();
			if (m_language->isDecimalDot(token_ch)) {
				if (is_integer(m_token)) {
					// Number starting with "123."
					read_number_term(token_ch, false, true);
					return;
				} else {
					token_pushback_ch(token_ch);
					return;
				}
			} else {
				if (m_language->isSingleCharToken(token_ch)) {
					// Support numbers of the form 1e-9
					if ((token_ch == '+' || token_ch == '-') && is_integer_e(m_token)) {
						read_number_term(token_ch, true, true);
						return;
					}
					token_pushback_ch(token_ch);
					return;
				}
			}
			if (token_ch == SPACE_CHAR) {
				on_trailing_space();
				return;
			}
			m_token += token_ch;
		} while (m_token_at_end == 0);
	}
}

void Tokenizer::copy_string() throw(ParserError) {
	char prev_ch = 0;
	TokenizerPos pos = token_stream_pos();
	while (m_token_at_end == 0) {
		char token_ch = token_read_char_no_comment();
		m_token += token_ch;
		if (token_ch == '\"' && prev_ch != '\\') return;
		prev_ch = token_ch;
	}
	throw error(pos, "unterminated string constant");
}

void Tokenizer::multi_level_do_multi(char open) throw(ParserError) {
	vector<char> m_open_token;
	m_open_token.push_back(open);
	TokenizerLanguageMultiLevel* multi = m_language->getMulti();
	char token_ch = token_read_char();
	while (m_token_at_end == 0) {
		if (m_open_token.size() == 0 && multi->isEndToken(token_ch)) {
			if (token_ch != SPACE_CHAR) {
				token_pushback_ch(token_ch);
			}
			return;
		} else {
			m_token += token_ch;
			if (token_ch == '\"' && m_language->getParseStrings()) {
				copy_string();
			} else if (multi->isOpenToken(token_ch)) {
				m_open_token.push_back(open);
			} else if (multi->isCloseToken(token_ch)) {
				if (m_open_token.size() == 0) {
					throw error(token_stream_pos(), string("illegal closing '")+token_ch+"'");
				} else {
					char expected = multi->getCloseToken(m_open_token.back());
					if (expected == token_ch) {
						m_open_token.pop_back();
					} else {
						throw error(token_stream_pos(), string("illegal closing '")+token_ch+"', expected a closing '"+expected+"' first");
					}
				}
			}
			token_ch = token_read_char();
		}
	}
	if (m_open_token.size() != 0) {
		char expected = multi->getCloseToken(m_open_token.back());
		throw error(token_stream_pos(), string("expected closing '")+expected+"'");
	}
}

string& Tokenizer::next_multilevel_token() throw(ParserError) {
	if (m_token_has_pushback > 0) {
		TokenAndPos& tkpos = m_pushback_tokens.back();
		// cout << "goto position = " << tkpos.getPos() << endl;
		goto_position(tkpos.getPos());
		m_pushback_tokens.clear();
		m_token_has_pushback = 0;
	}
	m_token = "";
	char token_ch = token_read_sig_char();
	m_token_start = m_token_count;
	if (m_token_at_end == 1) {
		return m_token;
	}
	TokenizerLanguageMultiLevel* multi = m_language->getMulti();
	do {
		if (multi->isEndToken(token_ch)) {
			if (token_ch != SPACE_CHAR) {
				token_pushback_ch(token_ch);
			}
			break;
		} else {
			m_token += token_ch;
			if (token_ch == '\"' && m_language->getParseStrings()) {
				copy_string();
			} else if (multi->isOpenToken(token_ch)) {
				/* Use subroutine for efficiency: */
				/* no vector constructed if not multi-level */
				multi_level_do_multi(token_ch);
				break;
			} else if (multi->isCloseToken(token_ch)) {
				throw error(token_stream_pos(), string("illegal closing '")+token_ch+"'");
			}
			token_ch = token_read_char();
		}
	} while (m_token_at_end == 0);
	return m_token;
}

void Tokenizer::on_trailing_space() {
}

void Tokenizer::goto_position(const TokenizerPos& pos) {
	m_token_count = pos;
	m_token_count.incCol(-1);
	m_token_has_pushback_ch	= 0;
}

string& Tokenizer::read_line() throw(ParserError) {
	m_token = "";
	while (m_token_has_pushback > 0) {
		TokenAndPos& tkpos = m_pushback_tokens.back();
		m_token += tkpos.getToken();
		m_pushback_tokens.pop_back();
		m_token_has_pushback--;
	}
	while (m_token_has_pushback_ch > 0) {
		m_token += m_token_pushback_ch[--m_token_has_pushback_ch];
	}
	while (1) {
		char ch = stream_get();
		if (!stream_ok()) break;
		if (ch == '\n') break;
		m_token += ch;
	}
	return m_token;
}

void Tokenizer::read_number_term(char token_ch, bool has_e, bool sure_num) {
	// token_ch is "decimal dot" or integer+e+"+/-", always add!
	m_token += token_ch;
	int count_exp = 0;
	int count_num = 0;
	if (!has_e) {
		// token_ch is "decimal dot"
		bool busy_main = true;
		while (busy_main) {
			token_ch = token_read_char();
			// keep on reading numbers or "e/E"
			if (token_ch == 'e' || token_ch == 'E') {
				// should be number or "+/-"
				char next_ch = token_read_char();
				if ((next_ch < '0' || next_ch > '9') && next_ch != '+' && next_ch != '-') {
					if (sure_num) {
						throw error(token_stream_pos(), string("illegal character '")+next_ch+"' while reading exponent of floating point number");
					} else {
						token_pushback_ch(next_ch);
						token_pushback_ch(token_ch);
						return;
					}
				} else {
					m_token += token_ch;
					busy_main = false;
					m_token += next_ch;
					if (next_ch >= '0' && next_ch <= '9') {
						count_exp = 1;
					}
				}
			} else {
				if (token_ch < '0' || token_ch > '9') {
					if (token_ch == SPACE_CHAR) {
						on_trailing_space();
						return;
					}
					if (m_language->isSingleCharToken(token_ch)) {
						token_pushback_ch(token_ch);
						return;
					}
					if (count_num > 0) {
						throw error(token_stream_pos(), string("illegal character '")+token_ch+"' while reading floating point number");
					} else {
						token_pushback_ch(token_ch);
						return;
					}
				}
				m_token += token_ch;
				count_num++;
			}
		}
	}
	// read regular number, after the e+"+/-"
	while (1) {
		token_ch = token_read_char();
		if (token_ch < '0' || token_ch > '9') {
			if (count_exp == 0) {
				throw error(token_stream_pos(), string("illegal character '")+token_ch+"' while reading exponent of floating point number");
			}
			if (token_ch == SPACE_CHAR) {
				on_trailing_space();
				return;
			}
			if (m_language->isSingleCharToken(token_ch)) {
				token_pushback_ch(token_ch);
				return;
			}
			throw error(token_stream_pos(), string("illegal character '")+token_ch+"' while in exponent of floating point number");
		}
		count_exp++;
		m_token += token_ch;
	}
}

void Tokenizer::next_token_and_pos(TokenAndPos& tkpos) throw(ParserError) {
	get_check_token();
	tkpos.setToken(m_token);
	tkpos.setPos(m_token_start);
	tkpos.setSpace(m_space_before);
}

void Tokenizer::pushback_token() {
	m_pushback_tokens.push_back(TokenAndPos(m_token, m_token_start, m_space_before));
	m_token_has_pushback++;
}

void Tokenizer::pushback_token(const TokenAndPos& tkpos) {
	m_pushback_tokens.push_back(tkpos);
	m_token_has_pushback++;
}

void Tokenizer::pushback_token(const string& token, const TokenizerPos& pos) {
	m_pushback_tokens.push_back(TokenAndPos(token, pos, 0));
	m_token_has_pushback++;
}

void Tokenizer::pushback_token(const string& token) {
	pushback_token(token, m_token_start);
}

void Tokenizer::pushback_token(const char* token) {
	pushback_token(string(token), m_token_start);
}

void Tokenizer::get_check_token() throw(ParserError) {
	get_token();
	if (m_token.length() == 0) {
		throw eof_error();
	}
}

void Tokenizer::peek_token(string* token) throw(ParserError) {
	get_check_token();
	pushback_token();
	*token = m_token;
}

int Tokenizer::token_line() const {
	return m_token_start.getLine();
}

int Tokenizer::token_column() const {
	return m_token_start.getColumn();
}

int Tokenizer::is_next_token(const char* token) throw(ParserError) {
	get_token();
	if (m_token.length() == 0) {
		return m_token == token;
	} else {
		if (m_token == token) {
			return 1;
		}
		pushback_token();
		return 0;
	}
}

int Tokenizer::is_next_token_in(const char* charlist) throw(ParserError) {
	get_check_token();
	if (m_token.length() == 1) {
		char ch = m_token[0];
		if (strcontains(charlist, ch)) return ch;
	}
	pushback_token();
	return -1;
}

int Tokenizer::ensure_next_token_in(const char* charlist) throw(ParserError) {
	get_check_token();
	if (m_token.length() == 1) {
		char ch = m_token[0];
		if (strcontains(charlist, ch)) return ch;
	}
	throw error(string("expected one of '") + charlist + "', found '" + m_token + "'");
}

void Tokenizer::ensure_next_token(const char* token) throw(ParserError) {
	get_check_token();
	if (m_token != token) {
		throw error(string("expected '") + token + "', found '" + m_token + "'");
	}
}

void Tokenizer::ensure_next_token_i(const char* token) throw(ParserError) {
	get_check_token();
	if (!str_i_equals(m_token.c_str(), token)) {
		throw error(string("expected '") + token + "', found '" + m_token + "'");
	}
}

void Tokenizer::ensure_next_token_list(const char* charlist) throw(ParserError) {
	char err = 0;
	int len = strlen(charlist);
	TokenizerPos start = m_token_start;
	for (int i = 0; i < len; i++) {
		get_check_token();
		if (m_token.length() != 1) {
			err = 1;
			break;
		} else {
			char ch = m_token[0];
			if (ch != charlist[i]) {
				err = 1;
				break;
			}
		}
	}
	if (err == 1) {
		throw error(start, string("expected ") + charlist);
	}
}

char Tokenizer::token_read_sig_char() throw(ParserError) {
	char token_ch;
	while (1) {
		do {
			token_ch = token_read_char();
			if (m_token_at_end == 1) {
				return SPACE_CHAR;
			}
			if (token_ch == SPACE_CHAR) {
				m_space_before = 1;
			}
		} while (token_ch == SPACE_CHAR);
		if (m_language->isLineCommentToken(token_ch)) {
			m_space_before = 1;
			token_skip_to_end();
		} else if (token_ch == '/') {
			char next_token_ch = token_read_char();
			if (next_token_ch == '/' && m_language->isEnableCPPComment()) {
				m_space_before = 1;
				token_skip_to_end();
			} else if (next_token_ch == '*' && m_language->isEnableCComment()) {
				m_space_before = 1;
				read_till_close_comment();
			} else {
				token_pushback_ch(next_token_ch);
				return token_ch;
			}
		} else {
			return token_ch;
		}
	}
}

char Tokenizer::token_read_char() {
	if (m_token_has_pushback_ch > 0) {
		return m_token_pushback_ch[--m_token_has_pushback_ch];
	}
	while (1) {
		char ch = stream_get();
		if (stream_ok()) {
			if (ch == '\t') m_token_count.incTab();
			else m_token_count.incCol();
			if (ch == '\n') m_token_count.incRow();
			if (m_language->isLineCommentToken(ch)) {
				token_skip_to_end();
				m_space_kind = ch;
				return SPACE_CHAR;
			}
			if (m_language->isSpaceToken(ch)) {
				m_space_kind = ch;
				return SPACE_CHAR;
			} else {
				return ch;
			}
		} else {
			if (m_token_at_end == 0) {
				m_token_count.incCol();
			}
			m_token_at_end = 1;
			return SPACE_CHAR;
		}
	}
}

char Tokenizer::token_read_char_no_comment() {
	if (m_token_has_pushback_ch > 0) {
		return m_token_pushback_ch[--m_token_has_pushback_ch];
	}
	while (1) {
		char ch = stream_get();
		if (stream_ok()) {
			if (ch == '\t') m_token_count.incTab();
			else m_token_count.incCol();
			if (ch == '\n') m_token_count.incRow();
			if (m_language->isSpaceToken(ch)) {
				m_space_kind = ch;
				return SPACE_CHAR;
			} else {
				return ch;
			}
		} else {
			if (m_token_at_end == 0) {
				m_token_count.incCol();
			}
			m_token_at_end = 1;
			return SPACE_CHAR;
		}
	}
}

void Tokenizer::token_skip_to_end() {
	while (1) {
		char ch = stream_get();
		if (stream_ok()) {
			m_token_count.incCol();
			if (ch == '\n') {
				m_token_count.incRow();
				if (!m_language->isSpaceToken('\n')) token_pushback_ch('\n');
				return;
			}
		} else {
			m_token_at_end = 1;
			return;
		}
	}
}

void Tokenizer::read_till_close_comment() throw(ParserError) {
	TokenizerPos start = m_token_count;
	int prev_ch = 0;
	while (1) {
		int token_ch = token_read_char();
		if (prev_ch == '*' && token_ch == '/') {
			return;
		}
		if (m_token_at_end == 1) {
			start.incCol(-1);
			throw error(start, "comment block '/*' not terminated");
		}
		prev_ch = token_ch;
	}
}

ParserError Tokenizer::error(const string& src) const {
	ParserError err(src, token_pos(), m_fname);
	const char* parse_str = parse_string_in_error();
	if (parse_str != NULL) err.setParserString(parse_str);
	return err;
}

ParserError Tokenizer::error(const TokenizerPos& pos, const string& src) const {
	ParserError err(src, pos, m_fname);
	const char* parse_str = parse_string_in_error();
	if (parse_str != NULL) err.setParserString(parse_str);
	return err;
}

ParserError Tokenizer::eof_error() const {
	ParserError err("unexpected end of file", token_pos(), m_fname);
	err.setFlag(TOK_PARSER_ERROR_ATEND);
	const char* parse_str = parse_string_in_error();
	if (parse_str != NULL) err.setParserString(parse_str);
	return err;
}

const char* Tokenizer::parse_string_in_error() const {
	return NULL;
}

ParserError Tokenizer::error(int column, const string& src) const {
	TokenizerPos pos;
	pos.set(-1, column);
	return ParserError(src, pos, m_fname);
}

StreamTokenizer::StreamTokenizer() : Tokenizer() {
	m_fb = NULL;
	m_is = NULL;
}

StreamTokenizer::StreamTokenizer(TokenizerLanguage* lang) : Tokenizer(lang) {
	m_fb = NULL;
	m_is = NULL;
}

StreamTokenizer::StreamTokenizer(istream* _is) : Tokenizer(), m_is(_is)  {
	m_fb = NULL;
}

StreamTokenizer::StreamTokenizer(istream* _is, TokenizerLanguage* lang) : Tokenizer(lang), m_is(_is) {
	m_fb = NULL;
}

StreamTokenizer::~StreamTokenizer() {
	close_tokens();
}

void StreamTokenizer::open_tokens(const char* fname) throw(ParserError) {
	m_fname = fname;
	m_fb = new filebuf();
	m_fb->open(m_fname, ios::in);
	if (!m_fb->is_open()) {
		perror ("StreamTokenizer::open_tokens");
		throw error(string("can't open: '") + fname + "'");
	}
	m_is = new istream(m_fb);
}

void StreamTokenizer::open_tokens(istream* strm, const char* fname) {
	m_fb = NULL;
	m_is = strm;
	m_fname = fname;
}

void StreamTokenizer::close_tokens() {
	if (m_fb != NULL) {
		m_fb->close();
		delete m_fb;
		m_fb = NULL;
		delete m_is;
		m_is = NULL;
	}
}

int StreamTokenizer::stream_ok() {
	return m_is->good();
}

int StreamTokenizer::stream_get() {
	return m_is->get();
}

StreamEOFTokenizer::StreamEOFTokenizer() : StreamTokenizer() {
	setFakeEOF(0);
}

StreamEOFTokenizer::StreamEOFTokenizer(TokenizerLanguage* lang) : StreamTokenizer(lang) {
	setFakeEOF(0);
}

StreamEOFTokenizer::StreamEOFTokenizer(istream* _is) : StreamTokenizer(_is) {
	setFakeEOF(0);
}

StreamEOFTokenizer::StreamEOFTokenizer(istream* _is, TokenizerLanguage* lang) : StreamTokenizer(_is, lang) {
	setFakeEOF(0);
}

StreamEOFTokenizer::~StreamEOFTokenizer() {
}

int StreamEOFTokenizer::stream_ok() {
	return m_fakeeof == 0 && m_is->good();
}

int StreamEOFTokenizer::stream_get() {
	int res = m_is->get();
	if (res == 0) {
		m_fakeeof = 1;
		return ' ';
	}
	return res;
}

StringTokenizer::StringTokenizer() {
	init();
}

StringTokenizer::StringTokenizer(TokenizerLanguage* lang) : Tokenizer(lang) {
	init();
}

StringTokenizer::StringTokenizer(const char* tokens) {
	init(tokens);
}

StringTokenizer::StringTokenizer(const char* tokens, TokenizerLanguage* lang) : Tokenizer(lang) {
	init(tokens);
}

void StringTokenizer::init() {
	m_tokens = NULL;
	m_len = 0;
	m_pos = -1;
}

void StringTokenizer::init(const char* tokens) {
	m_tokens = tokens;
	m_len = strlen(tokens);
	m_pos = -1;
}

void StringTokenizer::set_string(const char* tokens) {
	init(tokens);
	reset_all();
}

int StringTokenizer::stream_ok() {
	return (int)(m_pos < m_len);
}

int StringTokenizer::stream_get() {
	m_pos++;
	return m_pos < m_len ? m_tokens[m_pos] : ' ';
}

void StringTokenizer::goto_position(const TokenizerPos& pos) {
	Tokenizer::goto_position(pos);
	int offs = 0;
	for (int idx = 0; idx < m_len; idx++) {
		char ch = m_tokens[idx];
		if (ch == '\t') {
			offs = (offs/8 + 1)*8;
		} else {
			offs++;
		}
		if (offs == pos.getColumn() - 1) {
			m_pos = idx;
			if (m_pos < m_len) {
				m_token_at_end = 0;
			}
			break;
		}
	}
/*
	m_pos = pos.getColumn() - 2;
	if (m_pos < m_len) {
		m_token_at_end = 0;
	}
*/
}

TokenizerLanguagePtr g_SpaceLang;

TokenizerLanguage* createSpaceLanguage() {
	if (g_SpaceLang.isNull()) {
		g_SpaceLang = new TokenizerLanguage();
		g_SpaceLang->setSpaceTokens(" ,\t\r\n");
	}
	return g_SpaceLang.get();
}

SpaceStringTokenizer::SpaceStringTokenizer(const char* tokens) : StringTokenizer(tokens, createSpaceLanguage()) {
}

SpaceStringTokenizer::~SpaceStringTokenizer() {
}

const char* SpaceStringTokenizer::parse_string_in_error() const {
	return m_tokens;
}

StreamEOFCopyTokenizer::StreamEOFCopyTokenizer() : StreamEOFTokenizer() {
	m_out_fb = NULL;
	m_os = NULL;
}

StreamEOFCopyTokenizer::StreamEOFCopyTokenizer(TokenizerLanguage* lang) : StreamEOFTokenizer(lang) {
	m_out_fb = NULL;
	m_os = NULL;
}

StreamEOFCopyTokenizer::StreamEOFCopyTokenizer(istream* _is) : StreamEOFTokenizer(_is)  {
	m_out_fb = NULL;
	m_os = NULL;
}

StreamEOFCopyTokenizer::StreamEOFCopyTokenizer(istream* _is, TokenizerLanguage* lang) : StreamEOFTokenizer(_is, lang) {
	m_out_fb = NULL;
	m_os = NULL;
}

StreamEOFCopyTokenizer::~StreamEOFCopyTokenizer() {
	close_output();
}

void StreamEOFCopyTokenizer::open_output(const char* fname) throw(ParserError) {
	m_out_fb = new filebuf();
	if (m_out_fb->open(fname, ios::out) == NULL) {
		TokenizerPos pos;
		throw ParserError(string("can't open: ") + fname, pos, fname);
	}
	m_os = new ostream(m_out_fb);
}

void StreamEOFCopyTokenizer::output_term_space() {
	if (m_termspace != -1) *m_os << m_termspace;
}

void StreamEOFCopyTokenizer::output_term_space(char ch) {
	if (ch != -1) *m_os << ch;
}

void StreamEOFCopyTokenizer::close_output() {
	if (m_out_fb != NULL) {
		m_out_fb->close();
		delete m_out_fb;
		m_out_fb = NULL;
		delete m_os;
		m_os = NULL;
	}
}

string& StreamEOFCopyTokenizer::try_next_token_output() {
	string& token = try_next_token();
	out() << token;
	output_term_space();
	return token;
}

string& StreamEOFCopyTokenizer::next_token_output() {
	string& token = next_token();
	out() << token;
	output_term_space();
	return token;
}

void StreamEOFCopyTokenizer::output_token_and_space() {
	out() << m_token;
	output_term_space();
}

void StreamEOFCopyTokenizer::on_trailing_space() {
	m_termspace = m_space_kind;
}

char StreamEOFCopyTokenizer::token_read_sig_char() throw(ParserError) {
	m_intoken = 0;
	m_termspace = -1;
	char ch = StreamTokenizer::token_read_sig_char();
	m_intoken = 1;
	return ch;
}

int StreamEOFCopyTokenizer::stream_get() {
	int ch = m_is->get();
	if (ch == 0) {
		m_fakeeof = 1;
		return ' ';
	}
	if (m_intoken == 0 && m_language->isSpaceToken(ch)) {
		*m_os << (char)ch;
	}
	return ch;
}

MyOutputFile::MyOutputFile() {
	m_OS = NULL;
}

MyOutputFile::~MyOutputFile() {
	close();
}

void MyOutputFile::open(const char* fname) {
	close();
	m_FB.open(fname, ios::out);
	m_OS = new ostream(&m_FB);
}

void MyOutputFile::open(const string& fname) {
	close();
	m_FB.open(fname.c_str(), ios::out);
	m_OS = new ostream(&m_FB);
}

void MyOutputFile::close() {
	if (m_OS != NULL) {
		m_FB.close();
		delete m_OS;
		m_OS = NULL;
	}
}

