]> git.tdb.fi Git - libs/datafile.git/blob - source/jsonparser.cpp
Use default member initializers for constant initial values
[libs/datafile.git] / source / jsonparser.cpp
1 #include <msp/stringcodec/utf8.h>
2 #include "except.h"
3 #include "input.h"
4 #include "jsonparser.h"
5
6 using namespace std;
7
8 namespace Msp {
9 namespace DataFile {
10
11 JsonParser::JsonParser(Input &i, const string &s):
12         ParserMode(i, s)
13 { }
14
15 Statement JsonParser::parse()
16 {
17         if(toplevel_state==STATE_END)
18                 return Statement();
19
20         bool was_init = (toplevel_state==STATE_INIT);
21         Token token = parse_token();
22         if(toplevel_state==STATE_INIT)
23         {
24                 if(token.str=="[")
25                         toplevel_state = STATE_ARRAY;
26                 else if(token.str=="{")
27                         toplevel_state = STATE_OBJECT;
28                 else
29                 {
30                         // TODO Standalone simple values; does anyone use them?
31                         toplevel_state = STATE_END;
32                         throw syntax_error(token.str);
33                 }
34
35                 token = parse_token();
36         }
37
38         if((toplevel_state==STATE_ARRAY && token.str=="]") || (toplevel_state==STATE_OBJECT && token.str=="}"))
39         {
40                 toplevel_state = STATE_END;
41                 return Statement();
42         }
43         else if(!was_init)
44         {
45                 if(token.str!=",")
46                         throw syntax_error(token.str);
47
48                 token = parse_token();
49         }
50
51         return parse_statement(&token, toplevel_state, string());
52 }
53
54 Statement JsonParser::parse_statement(const Token *t, State outer_state, const string &outer_kw)
55 {
56         enum ParseState
57         {
58                 INIT,
59                 NAME,
60                 VALUE,
61                 ARRAY_INIT,
62                 ARRAY,
63                 ARRAY_ELEMENT,
64                 OBJECT_INIT,
65                 OBJECT,
66                 OBJECT_MEMBER
67         };
68
69         Statement result;
70         ParseState state = INIT;
71
72         if(outer_state==STATE_ARRAY)
73         {
74                 result.keyword = outer_kw+"[]";
75                 state = VALUE;
76         }
77
78         while(in)
79         {
80                 Token token;
81                 if(t)
82                 {
83                         token = *t;
84                         t = nullptr;
85                 }
86                 else
87                         token = parse_token();
88
89                 if(!result.valid)
90                 {
91                         result.valid = true;
92                         result.source = src;
93                         result.line = in.get_line_number();
94                 }
95
96                 if(state==INIT)
97                 {
98                         if(token.type!=Token::STRING)
99                                 throw syntax_error(token.str);
100
101                         result.keyword = token.str;
102                         state = NAME;
103                 }
104                 else if((state==ARRAY_INIT || state==ARRAY_ELEMENT) && token.str=="]")
105                         break;
106                 else if((state==ARRAY_INIT || state==ARRAY))
107                 {
108                         Statement ss = parse_statement(&token, STATE_ARRAY, result.keyword);
109                         result.sub.push_back(ss);
110                         state = ARRAY_ELEMENT;
111                 }
112                 else if(state==ARRAY_ELEMENT && token.str==",")
113                         state = ARRAY;
114                 else if((state==OBJECT_INIT || state==OBJECT_MEMBER) && token.str=="}")
115                         break;
116                 else if((state==OBJECT_INIT || state==OBJECT))
117                 {
118                         Statement ss = parse_statement(&token, STATE_OBJECT, result.keyword);
119                         result.sub.push_back(ss);
120                         state = OBJECT_MEMBER;
121                 }
122                 else if(state==OBJECT_MEMBER && token.str==",")
123                         state = OBJECT;
124                 else if(state==NAME && token.str==":")
125                         state = VALUE;
126                 else if(state==VALUE)
127                 {
128                         if(token.str=="[")
129                                 state = ARRAY_INIT;
130                         else if(token.str=="{")
131                                 state = OBJECT_INIT;
132                         else if(token.type!=Token::SPECIAL)
133                         {
134                                 result.append_from_token(token);
135                                 break;
136                         }
137                         else
138                                 throw syntax_error(token.str);
139                 }
140                 else
141                         throw syntax_error(token.str);
142         }
143
144         return result;
145 }
146
147 Token JsonParser::parse_token()
148 {
149         int c = 0;
150
151         while(in)
152         {
153                 c = in.get();
154                 if(!isspace(c))
155                         break;
156         }
157
158         if(!in)
159                 return Token(Token::SPECIAL, "");
160
161         enum ParseState
162         {
163                 INIT,
164                 SIGN,
165                 FLOATEXPINIT,
166                 FLOATEXPSIGN,
167                 STRING,
168                 STRING_ESCAPE,
169                 ACCEPT,
170                 DECIMAL,
171                 FLOAT,
172                 FLOATEXP,
173                 STRING_END,
174                 IDENTIFIER
175         };
176
177         static Token::Type token_type[]=
178         {
179                 Token::SPECIAL,
180                 Token::SPECIAL,
181                 Token::SPECIAL,
182                 Token::SPECIAL,
183                 Token::SPECIAL,
184                 Token::SPECIAL,
185                 Token::SPECIAL,
186                 Token::INTEGER,
187                 Token::FLOAT,
188                 Token::FLOAT,
189                 Token::STRING,
190                 Token::IDENTIFIER
191         };
192
193         ParseState state = INIT;
194         string buf;
195
196         while(1)
197         {
198                 if(state!=INIT)
199                         c = in.get();
200                 int next = in.peek();
201
202                 buf += c;
203
204                 switch(state)
205                 {
206                 case INIT:
207                         if(c=='-' || c=='+')
208                                 state = SIGN;
209                         else if(c=='.')
210                                 state = FLOAT;
211                         else if(c=='"')
212                                 state = STRING;
213                         else if(c=='{' || c=='}' || c=='[' || c==']' || c==':' || c==',')
214                                 return Token(Token::SPECIAL, string(1, c));
215                         else if(isdigit(c))
216                                 state = DECIMAL;
217                         else if(isalpha(c))
218                                 state = IDENTIFIER;
219                         else
220                                 throw parse_error(buf);
221                         break;
222
223                 case SIGN:
224                         if(isdigit(c))
225                                 state = DECIMAL;
226                         else if(c=='.')
227                                 state = FLOAT;
228                         else
229                                 throw parse_error(buf);
230                         break;
231
232                 case DECIMAL:
233                         if(c=='.')
234                                 state = FLOAT;
235                         else if(c=='e' || c=='E')
236                                 state = FLOATEXPINIT;
237                         else if(!isdigit(c))
238                                 throw parse_error(buf);
239                         break;
240
241                 case FLOAT:
242                         if(c=='e' || c=='E')
243                                 state = FLOATEXPINIT;
244                         else if(!isdigit(c))
245                                 throw parse_error(buf);
246                         break;
247
248                 case FLOATEXPINIT:
249                         if(c=='+' || c=='-')
250                                 state = FLOATEXPSIGN;
251                         else if(isdigit(c))
252                                 state = FLOATEXP;
253                         else
254                                 throw parse_error(buf);
255                         break;
256
257                 case FLOATEXPSIGN:
258                         if(isdigit(c))
259                                 state = FLOATEXP;
260                         else
261                                 throw parse_error(buf);
262                         break;
263
264                 case FLOATEXP:
265                         if(!isdigit(c))
266                                 throw parse_error(buf);
267                         break;
268
269                 case STRING:
270                         if(c=='\\')
271                                 state = STRING_ESCAPE;
272                         else if(c=='"')
273                                 state = STRING_END;
274                         break;
275
276                 case STRING_ESCAPE:
277                         state = STRING;
278                         break;
279
280                 case IDENTIFIER:
281                         if(!isalpha(c))
282                                 throw parse_error(buf);
283                         break;
284
285                 case STRING_END:
286                         throw parse_error(buf);
287
288                 default:
289                         throw logic_error("bad parser state");
290                 }
291
292                 if(is_delimiter(next) && state>=ACCEPT)
293                 {
294                         if(state==STRING_END)
295                                 return Token(Token::STRING, unescape(buf.substr(1, buf.size()-2)));
296                         else
297                                 return Token(token_type[state], buf);
298                 }
299         }
300 }
301
302 bool JsonParser::is_delimiter(int c)
303 {
304         return (isspace(c) || c=='{' || c=='}' || c=='[' || c==']' || c==':' || c==',');
305 }
306
307 string JsonParser::unescape(const string &str)
308 {
309         string result;
310         StringCodec::Utf8::Decoder dec;
311         StringCodec::Utf8::Encoder enc;
312         bool escape = false;
313
314         for(auto i=str.begin(); i!=str.end(); )
315         {
316                 StringCodec::unichar c = dec.decode_char(str, i);
317
318                 if(escape)
319                 {
320                         if(c=='\"')
321                                 enc.encode_char('\"', result);
322                         else if(c=='\\')
323                                 enc.encode_char('\\', result);
324                         else if(c=='/')
325                                 enc.encode_char('/', result);
326                         else if(c=='b')
327                                 enc.encode_char('\b', result);
328                         else if(c=='f')
329                                 enc.encode_char('\f', result);
330                         else if(c=='n')
331                                 enc.encode_char('\n', result);
332                         else if(c=='r')
333                                 enc.encode_char('\r', result);
334                         else if(c=='t')
335                                 enc.encode_char('\t', result);
336                         else if(c=='u')
337                         {
338                                 unsigned code = 0;
339                                 for(unsigned n=0; n<4; ++n)
340                                 {
341                                         if(i==str.end())
342                                                 throw invalid_argument("JsonParser::unescape");
343
344                                         c = dec.decode_char(str, i);
345
346                                         unsigned digit = 0;
347                                         if(c>='0' && c<='9')
348                                                 digit = c-'0';
349                                         else if(c>='a' && c<='f')
350                                                 digit = c-'a'+10;
351                                         else if(c>='A' && c<='F')
352                                                 digit = c-'A'+10;
353                                         else
354                                                 throw invalid_argument("JsonParser::unescape");
355
356                                         code = (code<<4)+digit;
357                                 }
358
359                                 enc.encode_char(code, result);
360                         }
361                         else
362                                 throw invalid_argument("JsonParser::unescape");
363
364                         escape = false;
365                 }
366                 else if(c=='\\')
367                         escape = true;
368                 else
369                         enc.encode_char(c, result);
370         }
371
372         return result;
373 }
374
375 } // namespace DataFile
376 } // namespace Msp