]> git.tdb.fi Git - libs/gl.git/blob - source/glsl/tokenizer.cpp
Recognize the #line directive in the GLSL parser
[libs/gl.git] / source / glsl / tokenizer.cpp
1 #include <msp/core/raii.h>
2 #include "glsl_error.h"
3 #include "preprocessor.h"
4 #include "syntax.h"
5 #include "tokenizer.h"
6
7 using namespace std;
8
9 namespace Msp {
10 namespace GL {
11 namespace SL {
12
13 Tokenizer::Tokenizer():
14         allow_preprocess(true),
15         suppress_line_advance(false)
16 {
17         static string empty;
18         iter = empty.begin();
19         source_end = empty.end();
20 }
21
22 void Tokenizer::begin(const string &name, const string &src)
23 {
24         iter = src.begin();
25         source_end = src.end();
26         location.name = name;
27         location.line = 1;
28         allow_preprocess = true;
29         last_token.clear();
30         next_tokens.clear();
31 }
32
33 const string &Tokenizer::peek_token(unsigned index)
34 {
35         while(next_tokens.size()<=index)
36                 next_tokens.push_back(parse_token_());
37         return (last_token = next_tokens[index]);
38 }
39
40 const string &Tokenizer::parse_token()
41 {
42         if(!next_tokens.empty())
43         {
44                 last_token = next_tokens.front();
45                 next_tokens.pop_front();
46                 return last_token;
47         }
48
49         return (last_token = parse_token_());
50 }
51
52 void Tokenizer::expect(const string &token)
53 {
54         string parsed = parse_token();
55         if(parsed!=token)
56                 throw parse_error(location, parsed, format("'%s'", token));
57 }
58
59 void Tokenizer::set_location(const Location &loc)
60 {
61         location = loc;
62 }
63
64 string Tokenizer::parse_token_()
65 {
66         while(1)
67         {
68                 skip_comment_and_whitespace();
69                 bool allow_preproc = allow_preprocess;
70                 allow_preprocess = false;
71                 if(iter==source_end)
72                         return string();
73                 else if(allow_preproc && *iter=='#')
74                 {
75                         ++iter;
76                         preprocess();
77                 }
78                 else if(isalpha(*iter) || *iter=='_')
79                         return parse_identifier();
80                 else if(isdigit(*iter))
81                         return parse_number();
82                 else if(*iter=='"')
83                         return parse_string();
84                 else if(*iter=='#' || *iter=='$' || *iter=='\'' || *iter=='@' || *iter=='\\' || *iter=='`')
85                         throw syntax_error(location, string(1, *iter), "Invalid character in source");
86                 else
87                         return parse_other();
88         }
89 }
90
91 void Tokenizer::preprocess()
92 {
93         SetForScope<deque<string> > clear_tokens(next_tokens, deque<string>());
94
95         string::const_iterator line_end = iter;
96         for(; (line_end!=source_end && *line_end!='\n'); ++line_end) ;
97         SetForScope<string::const_iterator> stop_at_line_end(source_end, line_end);
98
99         signal_preprocess.emit();
100
101         iter = line_end;
102 }
103
104 string Tokenizer::parse_identifier()
105 {
106         string ident;
107         while(iter!=source_end)
108         {
109                 if(isalnum(*iter) || *iter=='_')
110                         ident += *iter++;
111                 else
112                         break;
113         }
114
115         return ident;
116 }
117
118 string Tokenizer::parse_number()
119 {
120         bool got_fract = false;
121         string number;
122         while(iter!=source_end)
123         {
124                 if(isdigit(*iter))
125                         number += *iter++;
126                 else if(!got_fract && *iter=='.')
127                 {
128                         number += *iter++;
129                         got_fract = true;
130                 }
131                 else
132                         break;
133         }
134
135         bool require_digit = false;
136         if(iter!=source_end && (*iter=='e' || *iter=='E'))
137         {
138                 number += *iter++;
139                 if(iter!=source_end && (*iter=='-' || *iter=='+'))
140                         number += *iter++;
141                 require_digit = true;
142                 while(iter!=source_end)
143                 {
144                         if(isdigit(*iter))
145                         {
146                                 number += *iter++;
147                                 require_digit = false;
148                         }
149                         else
150                                 break;
151                 }
152         }
153
154         if(require_digit)
155                 throw syntax_error(location, number, "Incomplete numeric literal");
156         else if(isalnum(*iter) || *iter=='_')
157                 throw syntax_error(location, number, "Garbage at end of numeric literal");
158
159         return number;
160 }
161
162 string Tokenizer::parse_string()
163 {
164         string str(1, *iter++);
165         bool escape = false;
166         while(iter!=source_end)
167         {
168                 char c = *iter++;
169                 str += c;
170                 if(c=='\\')
171                         escape = true;
172                 else if(c=='"' && !escape)
173                         break;
174                 else
175                         escape = false;
176         }
177
178         return str;
179 }
180
181 string Tokenizer::parse_other()
182 {
183         if(iter==source_end)
184                 return string();
185
186         string token(1, *iter++);
187         for(unsigned i=1; (i<3 && iter!=source_end); ++i)
188         {
189                 bool matched = false;
190                 for(const Operator *j=Operator::operators; (!matched && j->type); ++j)
191                 {
192                         matched = (j->token[i]==*iter);
193                         for(unsigned k=0; (matched && k<i && j->token[k]); ++k)
194                                 matched = (j->token[k]==token[k]);
195                 }
196
197                 if(!matched)
198                         break;
199
200                 token += *iter++;
201         }
202
203         return token;
204 }
205
206 void Tokenizer::skip_comment_and_whitespace()
207 {
208         unsigned comment = 0;
209         while(iter!=source_end)
210         {
211                 if(comment==0)
212                 {
213                         if(*iter=='/')
214                                 comment = 1;
215                         else if(!isspace(*iter))
216                                 break;
217                 }
218                 else if(comment==1)
219                 {
220                         if(*iter=='/')
221                                 comment = 2;
222                         else if(*iter=='*')
223                                 comment = 3;
224                         else
225                         {
226                                 comment = 0;
227                                 --iter;
228                                 break;
229                         }
230                 }
231                 else if(comment==2)
232                 {
233                         if(*iter=='\n')
234                                 comment = 0;
235                 }
236                 else if(comment==3 && *iter=='*')
237                         comment = 4;
238                 else if(comment==4)
239                 {
240                         if(*iter=='/')
241                                 comment = 0;
242                         else if(*iter!='*')
243                                 comment = 3;
244                 }
245
246                 if(*iter=='\n')
247                 {
248                         if(!suppress_line_advance)
249                                 ++location.line;
250                         allow_preprocess = (comment<3);
251                 }
252
253                 ++iter;
254         }
255
256         suppress_line_advance = false;
257 }
258
259 } // namespace SL
260 } // namespace GL
261 } // namespace Msp