]> git.tdb.fi Git - libs/gl.git/blob - source/glsl/tokenizer.cpp
Further refactor the resolving process in SL::Compiler
[libs/gl.git] / source / glsl / tokenizer.cpp
1 #include <msp/core/raii.h>
2 #include "glsl_error.h"
3 #include "preprocessor.h"
4 #include "syntax.h"
5 #include "tokenizer.h"
6
7 using namespace std;
8
9 namespace Msp {
10 namespace GL {
11 namespace SL {
12
13 Tokenizer::Tokenizer():
14         allow_preprocess(true),
15         suppress_line_advance(false)
16 {
17         static string empty;
18         iter = empty.begin();
19         source_end = empty.end();
20 }
21
22 void Tokenizer::begin(const string &src, const string &name)
23 {
24         iter = src.begin();
25         source_end = src.end();
26         location.name = name;
27         location.line = 1;
28         allow_preprocess = true;
29         last_token.clear();
30         next_tokens.clear();
31 }
32
33 const string &Tokenizer::peek_token(unsigned index)
34 {
35         while(next_tokens.size()<=index)
36                 next_tokens.push_back(parse_token_());
37         return next_tokens[index];
38 }
39
40 const string &Tokenizer::parse_token()
41 {
42         progress_mark = true;
43
44         if(!next_tokens.empty())
45         {
46                 last_token = next_tokens.front();
47                 next_tokens.pop_front();
48                 return last_token;
49         }
50
51         return (last_token = parse_token_());
52 }
53
54 void Tokenizer::expect(const string &token)
55 {
56         string parsed = parse_token();
57         if(parsed!=token)
58                 throw parse_error(location, parsed, format("'%s'", token));
59 }
60
61 void Tokenizer::set_location(const Location &loc)
62 {
63         location = loc;
64         suppress_line_advance = true;
65 }
66
67 string Tokenizer::parse_token_()
68 {
69         while(1)
70         {
71                 skip_comment_and_whitespace();
72                 bool allow_preproc = allow_preprocess;
73                 allow_preprocess = false;
74                 if(iter==source_end)
75                         return string();
76                 else if(allow_preproc && *iter=='#')
77                 {
78                         ++iter;
79                         preprocess();
80                 }
81                 else if(isalpha(*iter) || *iter=='_')
82                         return parse_identifier();
83                 else if(isdigit(*iter))
84                         return parse_number();
85                 else if(*iter=='"')
86                         return parse_string();
87                 else if(*iter=='#' || *iter=='$' || *iter=='\'' || *iter=='@' || *iter=='\\' || *iter=='`')
88                         throw syntax_error(location, string(1, *iter), "Invalid character in source");
89                 else
90                         return parse_other();
91         }
92 }
93
94 void Tokenizer::preprocess()
95 {
96         SetForScope<deque<string> > clear_tokens(next_tokens, deque<string>());
97
98         string::const_iterator line_end = iter;
99         for(; (line_end!=source_end && *line_end!='\n'); ++line_end) ;
100         SetForScope<string::const_iterator> stop_at_line_end(source_end, line_end);
101
102         signal_preprocess.emit();
103
104         iter = line_end;
105 }
106
107 string Tokenizer::parse_identifier()
108 {
109         string ident;
110         while(iter!=source_end)
111         {
112                 if(isalnum(*iter) || *iter=='_')
113                         ident += *iter++;
114                 else
115                         break;
116         }
117
118         return ident;
119 }
120
121 string Tokenizer::parse_number()
122 {
123         bool got_fract = false;
124         string number;
125         while(iter!=source_end)
126         {
127                 if(isdigit(*iter))
128                         number += *iter++;
129                 else if(!got_fract && *iter=='.')
130                 {
131                         number += *iter++;
132                         got_fract = true;
133                 }
134                 else
135                         break;
136         }
137
138         bool require_digit = false;
139         if(iter!=source_end && (*iter=='e' || *iter=='E'))
140         {
141                 number += *iter++;
142                 if(iter!=source_end && (*iter=='-' || *iter=='+'))
143                         number += *iter++;
144                 require_digit = true;
145                 while(iter!=source_end)
146                 {
147                         if(isdigit(*iter))
148                         {
149                                 number += *iter++;
150                                 require_digit = false;
151                         }
152                         else
153                                 break;
154                 }
155         }
156
157         if(require_digit)
158                 throw syntax_error(location, number, "Incomplete numeric literal");
159         else if(isalnum(*iter) || *iter=='_')
160                 throw syntax_error(location, number, "Garbage at end of numeric literal");
161
162         return number;
163 }
164
165 string Tokenizer::parse_string()
166 {
167         string str(1, *iter++);
168         bool escape = false;
169         while(iter!=source_end)
170         {
171                 char c = *iter++;
172                 str += c;
173                 if(c=='\\')
174                         escape = true;
175                 else if(c=='"' && !escape)
176                         break;
177                 else
178                         escape = false;
179         }
180
181         return str;
182 }
183
184 string Tokenizer::parse_other()
185 {
186         if(iter==source_end)
187                 return string();
188
189         string token(1, *iter++);
190         for(unsigned i=1; (i<3 && iter!=source_end); ++i)
191         {
192                 bool matched = false;
193                 for(const Operator *j=Operator::operators; (!matched && j->type); ++j)
194                 {
195                         matched = (j->token[i]==*iter);
196                         for(unsigned k=0; (matched && k<i && j->token[k]); ++k)
197                                 matched = (j->token[k]==token[k]);
198                 }
199
200                 if(!matched)
201                         break;
202
203                 token += *iter++;
204         }
205
206         return token;
207 }
208
209 void Tokenizer::skip_comment_and_whitespace()
210 {
211         unsigned comment = 0;
212         while(iter!=source_end)
213         {
214                 if(comment==0)
215                 {
216                         if(*iter=='/')
217                                 comment = 1;
218                         else if(!isspace(*iter))
219                                 break;
220                 }
221                 else if(comment==1)
222                 {
223                         if(*iter=='/')
224                                 comment = 2;
225                         else if(*iter=='*')
226                                 comment = 3;
227                         else
228                         {
229                                 comment = 0;
230                                 --iter;
231                                 break;
232                         }
233                 }
234                 else if(comment==2)
235                 {
236                         if(*iter=='\n')
237                                 comment = 0;
238                 }
239                 else if(comment==3 && *iter=='*')
240                         comment = 4;
241                 else if(comment==4)
242                 {
243                         if(*iter=='/')
244                                 comment = 0;
245                         else if(*iter!='*')
246                                 comment = 3;
247                 }
248
249                 if(*iter=='\n')
250                 {
251                         if(!suppress_line_advance)
252                                 ++location.line;
253                         allow_preprocess = (comment<3);
254                 }
255
256                 ++iter;
257         }
258
259         suppress_line_advance = false;
260 }
261
262 } // namespace SL
263 } // namespace GL
264 } // namespace Msp