]> git.tdb.fi Git - libs/gl.git/blob - source/glsl/tokenizer.cpp
Make the GLSL parser resilient against common errors
[libs/gl.git] / source / glsl / tokenizer.cpp
1 #include <msp/core/raii.h>
2 #include "glsl_error.h"
3 #include "preprocessor.h"
4 #include "syntax.h"
5 #include "tokenizer.h"
6
7 using namespace std;
8
9 namespace Msp {
10 namespace GL {
11 namespace SL {
12
13 Tokenizer::Tokenizer():
14         allow_preprocess(true),
15         suppress_line_advance(false)
16 {
17         static string empty;
18         iter = empty.begin();
19         source_end = empty.end();
20 }
21
22 void Tokenizer::begin(const string &name, const string &src)
23 {
24         iter = src.begin();
25         source_end = src.end();
26         location.name = name;
27         location.line = 1;
28         allow_preprocess = true;
29         last_token.clear();
30         next_tokens.clear();
31 }
32
33 const string &Tokenizer::peek_token(unsigned index)
34 {
35         while(next_tokens.size()<=index)
36                 next_tokens.push_back(parse_token_());
37         return next_tokens[index];
38 }
39
40 const string &Tokenizer::parse_token()
41 {
42         progress_mark = true;
43
44         if(!next_tokens.empty())
45         {
46                 last_token = next_tokens.front();
47                 next_tokens.pop_front();
48                 return last_token;
49         }
50
51         return (last_token = parse_token_());
52 }
53
54 void Tokenizer::expect(const string &token)
55 {
56         string parsed = parse_token();
57         if(parsed!=token)
58                 throw parse_error(location, parsed, format("'%s'", token));
59 }
60
61 void Tokenizer::set_location(const Location &loc)
62 {
63         location = loc;
64 }
65
66 string Tokenizer::parse_token_()
67 {
68         while(1)
69         {
70                 skip_comment_and_whitespace();
71                 bool allow_preproc = allow_preprocess;
72                 allow_preprocess = false;
73                 if(iter==source_end)
74                         return string();
75                 else if(allow_preproc && *iter=='#')
76                 {
77                         ++iter;
78                         preprocess();
79                 }
80                 else if(isalpha(*iter) || *iter=='_')
81                         return parse_identifier();
82                 else if(isdigit(*iter))
83                         return parse_number();
84                 else if(*iter=='"')
85                         return parse_string();
86                 else if(*iter=='#' || *iter=='$' || *iter=='\'' || *iter=='@' || *iter=='\\' || *iter=='`')
87                         throw syntax_error(location, string(1, *iter), "Invalid character in source");
88                 else
89                         return parse_other();
90         }
91 }
92
93 void Tokenizer::preprocess()
94 {
95         SetForScope<deque<string> > clear_tokens(next_tokens, deque<string>());
96
97         string::const_iterator line_end = iter;
98         for(; (line_end!=source_end && *line_end!='\n'); ++line_end) ;
99         SetForScope<string::const_iterator> stop_at_line_end(source_end, line_end);
100
101         signal_preprocess.emit();
102
103         iter = line_end;
104 }
105
106 string Tokenizer::parse_identifier()
107 {
108         string ident;
109         while(iter!=source_end)
110         {
111                 if(isalnum(*iter) || *iter=='_')
112                         ident += *iter++;
113                 else
114                         break;
115         }
116
117         return ident;
118 }
119
120 string Tokenizer::parse_number()
121 {
122         bool got_fract = false;
123         string number;
124         while(iter!=source_end)
125         {
126                 if(isdigit(*iter))
127                         number += *iter++;
128                 else if(!got_fract && *iter=='.')
129                 {
130                         number += *iter++;
131                         got_fract = true;
132                 }
133                 else
134                         break;
135         }
136
137         bool require_digit = false;
138         if(iter!=source_end && (*iter=='e' || *iter=='E'))
139         {
140                 number += *iter++;
141                 if(iter!=source_end && (*iter=='-' || *iter=='+'))
142                         number += *iter++;
143                 require_digit = true;
144                 while(iter!=source_end)
145                 {
146                         if(isdigit(*iter))
147                         {
148                                 number += *iter++;
149                                 require_digit = false;
150                         }
151                         else
152                                 break;
153                 }
154         }
155
156         if(require_digit)
157                 throw syntax_error(location, number, "Incomplete numeric literal");
158         else if(isalnum(*iter) || *iter=='_')
159                 throw syntax_error(location, number, "Garbage at end of numeric literal");
160
161         return number;
162 }
163
164 string Tokenizer::parse_string()
165 {
166         string str(1, *iter++);
167         bool escape = false;
168         while(iter!=source_end)
169         {
170                 char c = *iter++;
171                 str += c;
172                 if(c=='\\')
173                         escape = true;
174                 else if(c=='"' && !escape)
175                         break;
176                 else
177                         escape = false;
178         }
179
180         return str;
181 }
182
183 string Tokenizer::parse_other()
184 {
185         if(iter==source_end)
186                 return string();
187
188         string token(1, *iter++);
189         for(unsigned i=1; (i<3 && iter!=source_end); ++i)
190         {
191                 bool matched = false;
192                 for(const Operator *j=Operator::operators; (!matched && j->type); ++j)
193                 {
194                         matched = (j->token[i]==*iter);
195                         for(unsigned k=0; (matched && k<i && j->token[k]); ++k)
196                                 matched = (j->token[k]==token[k]);
197                 }
198
199                 if(!matched)
200                         break;
201
202                 token += *iter++;
203         }
204
205         return token;
206 }
207
208 void Tokenizer::skip_comment_and_whitespace()
209 {
210         unsigned comment = 0;
211         while(iter!=source_end)
212         {
213                 if(comment==0)
214                 {
215                         if(*iter=='/')
216                                 comment = 1;
217                         else if(!isspace(*iter))
218                                 break;
219                 }
220                 else if(comment==1)
221                 {
222                         if(*iter=='/')
223                                 comment = 2;
224                         else if(*iter=='*')
225                                 comment = 3;
226                         else
227                         {
228                                 comment = 0;
229                                 --iter;
230                                 break;
231                         }
232                 }
233                 else if(comment==2)
234                 {
235                         if(*iter=='\n')
236                                 comment = 0;
237                 }
238                 else if(comment==3 && *iter=='*')
239                         comment = 4;
240                 else if(comment==4)
241                 {
242                         if(*iter=='/')
243                                 comment = 0;
244                         else if(*iter!='*')
245                                 comment = 3;
246                 }
247
248                 if(*iter=='\n')
249                 {
250                         if(!suppress_line_advance)
251                                 ++location.line;
252                         allow_preprocess = (comment<3);
253                 }
254
255                 ++iter;
256         }
257
258         suppress_line_advance = false;
259 }
260
261 } // namespace SL
262 } // namespace GL
263 } // namespace Msp