namespace SL {
Tokenizer::Tokenizer():
- allow_preprocess(true)
+ allow_preprocess(true),
+ suppress_line_advance(false)
{
static string empty;
iter = empty.begin();
source_end = empty.end();
- location.line = 0;
}
-void Tokenizer::begin(const string &name, const string &src)
+void Tokenizer::begin(const string &src, const string &name)
{
iter = src.begin();
source_end = src.end();
{
while(next_tokens.size()<=index)
next_tokens.push_back(parse_token_());
- return (last_token = next_tokens[index]);
+ return next_tokens[index];
}
const string &Tokenizer::parse_token()
{
+ progress_mark = true;
+
if(!next_tokens.empty())
{
last_token = next_tokens.front();
throw parse_error(location, parsed, format("'%s'", token));
}
+void Tokenizer::set_location(const Location &loc)
+{
+ location = loc;
+ suppress_line_advance = true;
+}
+
string Tokenizer::parse_token_()
{
while(1)
{
skip_comment_and_whitespace();
+ bool allow_preproc = allow_preprocess;
+ allow_preprocess = false;
if(iter==source_end)
return string();
- else if(allow_preprocess && *iter=='#')
+ else if(allow_preproc && *iter=='#')
{
- allow_preprocess = false;
+ ++iter;
preprocess();
}
else if(isalpha(*iter) || *iter=='_')
return parse_identifier();
else if(isdigit(*iter))
return parse_number();
+ else if(*iter=='"')
+ return parse_string();
+ else if(*iter=='#' || *iter=='$' || *iter=='\'' || *iter=='@' || *iter=='\\' || *iter=='`')
+ throw syntax_error(location, string(1, *iter), "Invalid character in source");
else
return parse_other();
}
string Tokenizer::parse_number()
{
- bool accept_sign = false;
+ bool got_fract = false;
string number;
while(iter!=source_end)
{
- if(isdigit(*iter) || *iter=='.')
+ if(isdigit(*iter))
number += *iter++;
- else if(*iter=='e' || *iter=='E')
+ else if(!got_fract && *iter=='.')
{
number += *iter++;
- accept_sign = true;
+ got_fract = true;
}
- else if(accept_sign && (*iter=='+' || *iter=='-'))
- number += *iter++;
else
break;
}
+ bool require_digit = false;
+ if(iter!=source_end && (*iter=='e' || *iter=='E'))
+ {
+ number += *iter++;
+ if(iter!=source_end && (*iter=='-' || *iter=='+'))
+ number += *iter++;
+ require_digit = true;
+ while(iter!=source_end)
+ {
+ if(isdigit(*iter))
+ {
+ number += *iter++;
+ require_digit = false;
+ }
+ else
+ break;
+ }
+ }
+
+ if(require_digit)
+ throw syntax_error(location, number, "Incomplete numeric literal");
+ else if(isalnum(*iter) || *iter=='_')
+ throw syntax_error(location, number, "Garbage at end of numeric literal");
+
return number;
}
+string Tokenizer::parse_string()
+{
+ string str(1, *iter++);
+ bool escape = false;
+ while(iter!=source_end)
+ {
+ char c = *iter++;
+ str += c;
+ if(c=='\\')
+ escape = true;
+ else if(c=='"' && !escape)
+ break;
+ else
+ escape = false;
+ }
+
+ return str;
+}
+
string Tokenizer::parse_other()
{
if(iter==source_end)
if(*iter=='\n')
{
- ++location.line;
+ if(!suppress_line_advance)
+ ++location.line;
allow_preprocess = (comment<3);
}
++iter;
}
+
+ suppress_line_advance = false;
}
} // namespace SL