--- /dev/null
+#include <msp/stringcodec/utf8.h>
+#include "except.h"
+#include "input.h"
+#include "jsonparser.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+JsonParser::JsonParser(Input &i, const string &s):
+ ParserMode(i, s),
+ toplevel_state(STATE_INIT)
+{ }
+
+Statement JsonParser::parse()
+{
+ if(toplevel_state==STATE_END)
+ return Statement();
+
+ bool was_init = (toplevel_state==STATE_INIT);
+ Token token = parse_token();
+ if(toplevel_state==STATE_INIT)
+ {
+ if(token.str=="[")
+ toplevel_state = STATE_ARRAY;
+ else if(token.str=="{")
+ toplevel_state = STATE_OBJECT;
+ else
+ {
+ // TODO Standalone simple values; does anyone use them?
+ toplevel_state = STATE_END;
+ throw syntax_error(token.str);
+ }
+
+ token = parse_token();
+ }
+
+ if((toplevel_state==STATE_ARRAY && token.str=="]") || (toplevel_state==STATE_OBJECT && token.str=="}"))
+ {
+ toplevel_state = STATE_END;
+ return Statement();
+ }
+ else if(!was_init)
+ {
+ if(token.str!=",")
+ throw syntax_error(token.str);
+
+ token = parse_token();
+ }
+
+ return parse_statement(&token, toplevel_state, string());
+}
+
+Statement JsonParser::parse_statement(const Token *t, State outer_state, const string &outer_kw)
+{
+ enum ParseState
+ {
+ INIT,
+ NAME,
+ VALUE,
+ ARRAY_INIT,
+ ARRAY,
+ ARRAY_ELEMENT,
+ OBJECT_INIT,
+ OBJECT,
+ OBJECT_MEMBER
+ };
+
+ Statement result;
+ ParseState state = INIT;
+
+ if(outer_state==STATE_ARRAY)
+ {
+ result.keyword = outer_kw+"[]";
+ state = VALUE;
+ }
+
+ while(in)
+ {
+ Token token;
+ if(t)
+ {
+ token = *t;
+ t = 0;
+ }
+ else
+ token = parse_token();
+
+ if(!result.valid)
+ {
+ result.valid = true;
+ result.source = src;
+ result.line = in.get_line_number();
+ }
+
+ if(state==INIT)
+ {
+ if(token.type!=Token::STRING)
+ throw syntax_error(token.str);
+
+ result.keyword = token.str;
+ state = NAME;
+ }
+ else if((state==ARRAY_INIT || state==ARRAY_ELEMENT) && token.str=="]")
+ break;
+ else if((state==ARRAY_INIT || state==ARRAY))
+ {
+ Statement ss = parse_statement(&token, STATE_ARRAY, result.keyword);
+ result.sub.push_back(ss);
+ state = ARRAY_ELEMENT;
+ }
+ else if(state==ARRAY_ELEMENT && token.str==",")
+ state = ARRAY;
+ else if((state==OBJECT_INIT || state==OBJECT_MEMBER) && token.str=="}")
+ break;
+ else if((state==OBJECT_INIT || state==OBJECT))
+ {
+ Statement ss = parse_statement(&token, STATE_OBJECT, result.keyword);
+ result.sub.push_back(ss);
+ state = OBJECT_MEMBER;
+ }
+ else if(state==OBJECT_MEMBER && token.str==",")
+ state = OBJECT;
+ else if(state==NAME && token.str==":")
+ state = VALUE;
+ else if(state==VALUE)
+ {
+ if(token.str=="[")
+ state = ARRAY_INIT;
+ else if(token.str=="{")
+ state = OBJECT_INIT;
+ else if(token.type!=Token::SPECIAL)
+ {
+ result.append_from_token(token);
+ break;
+ }
+ else
+ throw syntax_error(token.str);
+ }
+ else
+ throw syntax_error(token.str);
+ }
+
+ return result;
+}
+
+Token JsonParser::parse_token()
+{
+ int c = 0;
+
+ while(in)
+ {
+ c = in.get();
+ if(!isspace(c))
+ break;
+ }
+
+ if(!in)
+ return Token(Token::SPECIAL, "");
+
+ enum ParseState
+ {
+ INIT,
+ SIGN,
+ FLOATEXPINIT,
+ FLOATEXPSIGN,
+ STRING,
+ STRING_ESCAPE,
+ ACCEPT,
+ DECIMAL,
+ FLOAT,
+ FLOATEXP,
+ STRING_END,
+ IDENTIFIER
+ };
+
+ static Token::Type token_type[]=
+ {
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::INTEGER,
+ Token::FLOAT,
+ Token::FLOAT,
+ Token::STRING,
+ Token::IDENTIFIER
+ };
+
+ ParseState state = INIT;
+ string buf;
+
+ while(1)
+ {
+ if(state!=INIT)
+ c = in.get();
+ int next = in.peek();
+
+ buf += c;
+
+ switch(state)
+ {
+ case INIT:
+ if(c=='-' || c=='+')
+ state = SIGN;
+ else if(c=='.')
+ state = FLOAT;
+ else if(c=='"')
+ state = STRING;
+ else if(c=='{' || c=='}' || c=='[' || c==']' || c==':' || c==',')
+ return Token(Token::SPECIAL, string(1, c));
+ else if(isdigit(c))
+ state = DECIMAL;
+ else if(isalpha(c))
+ state = IDENTIFIER;
+ else
+ throw parse_error(buf);
+ break;
+
+ case SIGN:
+ if(isdigit(c))
+ state = DECIMAL;
+ else if(c=='.')
+ state = FLOAT;
+ else
+ throw parse_error(buf);
+ break;
+
+ case DECIMAL:
+ if(c=='.')
+ state = FLOAT;
+ else if(c=='e' || c=='E')
+ state = FLOATEXPINIT;
+ else if(!isdigit(c))
+ throw parse_error(buf);
+ break;
+
+ case FLOAT:
+ if(c=='e' || c=='E')
+ state = FLOATEXPINIT;
+ else if(!isdigit(c))
+ throw parse_error(buf);
+ break;
+
+ case FLOATEXPINIT:
+ if(c=='+' || c=='-')
+ state = FLOATEXPSIGN;
+ else if(isdigit(c))
+ state = FLOATEXP;
+ else
+ throw parse_error(buf);
+ break;
+
+ case FLOATEXPSIGN:
+ if(isdigit(c))
+ state = FLOATEXP;
+ else
+ throw parse_error(buf);
+ break;
+
+ case FLOATEXP:
+ if(!isdigit(c))
+ throw parse_error(buf);
+ break;
+
+ case STRING:
+ if(c=='\\')
+ state = STRING_ESCAPE;
+ else if(c=='"')
+ state = STRING_END;
+ break;
+
+ case STRING_ESCAPE:
+ state = STRING;
+ break;
+
+ case IDENTIFIER:
+ if(!isalpha(c))
+ throw parse_error(buf);
+ break;
+
+ case STRING_END:
+ throw parse_error(buf);
+
+ default:
+ throw logic_error("bad parser state");
+ }
+
+ if(is_delimiter(next) && state>=ACCEPT)
+ {
+ if(state==STRING_END)
+ return Token(Token::STRING, unescape(buf.substr(1, buf.size()-2)));
+ else
+ return Token(token_type[state], buf);
+ }
+ }
+}
+
+bool JsonParser::is_delimiter(int c)
+{
+ return (isspace(c) || c=='{' || c=='}' || c=='[' || c==']' || c==':' || c==',');
+}
+
+string JsonParser::unescape(const string &str)
+{
+ string result;
+ StringCodec::Utf8::Decoder dec;
+ StringCodec::Utf8::Encoder enc;
+ bool escape = false;
+
+ for(string::const_iterator i=str.begin(); i!=str.end(); )
+ {
+ StringCodec::unichar c = dec.decode_char(str, i);
+
+ if(escape)
+ {
+ if(c=='\"')
+ enc.encode_char('\"', result);
+ else if(c=='\\')
+ enc.encode_char('\\', result);
+ else if(c=='/')
+ enc.encode_char('/', result);
+ else if(c=='b')
+ enc.encode_char('\b', result);
+ else if(c=='f')
+ enc.encode_char('\f', result);
+ else if(c=='n')
+ enc.encode_char('\n', result);
+ else if(c=='r')
+ enc.encode_char('\r', result);
+ else if(c=='t')
+ enc.encode_char('\t', result);
+ else if(c=='u')
+ {
+ unsigned code = 0;
+ for(unsigned n=0; n<4; ++n)
+ {
+ if(i==str.end())
+ throw invalid_argument("JsonParser::unescape");
+
+ c = dec.decode_char(str, i);
+
+ unsigned digit = 0;
+ if(c>='0' && c<='9')
+ digit = c-'0';
+ else if(c>='a' && c<='f')
+ digit = c-'a'+10;
+ else if(c>='A' && c<='F')
+ digit = c-'A'+10;
+ else
+ throw invalid_argument("JsonParser::unescape");
+
+ code = (code<<4)+digit;
+ }
+
+ enc.encode_char(code, result);
+ }
+ else
+ throw invalid_argument("JsonParser::unescape");
+
+ escape = false;
+ }
+ else if(c=='\\')
+ escape = true;
+ else
+ enc.encode_char(c, result);
+ }
+
+ return result;
+}
+
+} // namespace DataFile
+} // namespace Msp