// JSON5 + SJSON parser module // // License: // This software is dual-licensed to the public domain and under the following // license: you are granted a perpetual, irrevocable license to copy, modify, // publish, and distribute this file as you see fit. // No warranty is implied, use at your own risk. // // Credits: // r-lyeh (fork) // Dominik Madarasz (@zaklaus) (original code) #ifndef JSON5_H #define JSON5_H #ifndef JSON5_ASSERT #define JSON5_ASSERT do { printf("JSON5: Error L%d while parsing '%c' in '%.16s'\n", __LINE__, p[0], p); assert(0); } while(0) #endif #include #include typedef enum json5_type { JSON5_UNDEFINED, // 0 JSON5_NULL, // 1 JSON5_BOOL, // 2 JSON5_OBJECT, // 3 JSON5_STRING, // 4 JSON5_ARRAY, // 5 JSON5_INTEGER, // 6 JSON5_REAL, // 7 } json5_type; typedef struct json5 { char* name; #ifdef NDEBUG unsigned type : 3; #else json5_type type; #endif unsigned count : 29; union { struct json5* array; struct json5* nodes; int64_t integer; double real; char* string; int boolean; }; } json5; char* json5_parse(json5 *root, char *source, int flags); void json5_write(FILE *fp, const json5 *root); void json5_free(json5 *root); #endif // JSON5_H // json5 ---------------------------------------------------------------------- #ifdef JSON5_C //#pragma once #include #include #include #include #include char *json5__trim(char *p) { while (*p) { /**/ if( isspace(*p) ) ++p; else if( p[0] == '/' && p[1] == '*' ) { // skip C comment for( p += 2; *p && !(p[0] == '*' && p[1] == '/'); ++p) {} if( *p ) p += 2; } else if( p[0] == '/' && p[1] == '/' ) { // skip C++ comment for( p += 2; *p && p[0] != '\n'; ++p) {} if( *p ) ++p; } else break; } return p; } char *json5__parse_value(json5 *obj, char *p, char **err_code); char *json5__parse_string(json5 *obj, char *p, char **err_code) { assert(obj && p); if( *p == '"' || *p == '\'' || *p == '`' ) { obj->type = JSON5_STRING; obj->string = p + 1; char eos_char = *p, *b = obj->string, *e = b; while (*e) { /**/ if( *e == '\\' && (e[1] == eos_char) ) ++e; else if( *e == '\\' && (e[1] == '\r' || e[1] == '\n') ) *e = ' '; else if( *e == eos_char ) break; ++e; } *e = '\0'; return p = e + 1; } //JSON5_ASSERT; *err_code = "json5_error_invalid_value"; return NULL; } char *json5__parse_object(json5 *obj, char *p, char **err_code) { assert(obj && p); if( 1 /* *p == '{' */ ) { /* <-- for SJSON */ int skip = *p == '{'; /* <-- for SJSON */ obj->type = JSON5_OBJECT; obj->nodes = 0; obj->count = 0; while (*p) { json5 node = { 0 }; do { p = json5__trim(p + skip); skip = 1; } while( *p == ',' ); if( *p == '}' ) { ++p; break; } // @todo: is_unicode() (s[0] == '\\' && isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4]))) { else if( isalnum(*p) || *p == '_' || *p == '$' || *p == '.' ) { // also || is_unicode(p) node.name = p; do { ++p; } while (*p && (isalnum(*p) || *p == '_' || *p == '$' || *p == '.') ); // also || is_unicode(p) char *e = p; p = json5__trim(p); *e = '\0'; } else { //if( *p == '"' || *p == '\'' || *p == '`' ) { char *ps = json5__parse_string(&node, p, err_code); if( !ps ) { return NULL; } p = ps; node.name = node.string; p = json5__trim(p); } // @todo: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6 if( !(node.name && node.name[0]) ) { // !json5__validate_name(node.name) ) { JSON5_ASSERT; *err_code = "json5_error_invalid_name"; return NULL; } if( !p || (*p && (*p != ':' && *p != '=' /* <-- for SJSON */)) ) { JSON5_ASSERT; *err_code = "json5_error_invalid_name"; return NULL; } p = json5__trim(p + 1); p = json5__parse_value(&node, p, err_code); if( *err_code[0] ) { return NULL; } if( node.type != JSON5_UNDEFINED ) { array_push(obj->nodes, node); ++obj->count; } if( *p == '}') { ++p; break; } } return p; } JSON5_ASSERT; *err_code = "json5_error_invalid_value"; return NULL; } char *json5__parse_value(json5 *obj, char *p, char **err_code) { assert(obj && p); p = json5__trim(p); char *is_string = json5__parse_string(obj, p, err_code); if( is_string ) { p = is_string; if( *err_code[0] ) { return NULL; } } else if( *p == '{' ) { p = json5__parse_object( obj, p, err_code ); if( *err_code[0] ) { return NULL; } } else if( *p == '[' ) { obj->type = JSON5_ARRAY; obj->array = 0; obj->count = 0; while (*p) { json5 elem = { 0 }; do { p = json5__trim(p + 1); } while( *p == ',' ); if( *p == ']') { ++p; break; } p = json5__parse_value(&elem, p, err_code); if( *err_code[0] ) { return NULL; } if( elem.type != JSON5_UNDEFINED ) { array_push(obj->array, elem); ++obj->count; } if (*p == ']') { ++p; break; } } } else if( isalpha(*p) || (*p == '-' && !isdigit(p[1])) ) { const char *labels[] = { "null", "on","true", "off","false", "nan","NaN", "-nan","-NaN", "inf","Infinity", "-inf","-Infinity", 0 }; const int lenghts[] = { 4, 2,4, 3,5, 3,3, 4,4, 3,8, 4,9 }; for( int i = 0; labels[i]; ++i ) { if( !strncmp(p, labels[i], lenghts[i] ) ) { p += lenghts[i]; #ifdef _MSC_VER // somehow, NaN is apparently signed in MSC /**/ if( i >= 5 ) obj->type = JSON5_REAL, obj->real = i >= 11 ? -INFINITY : i >= 9 ? INFINITY : i >= 7 ? NAN :-NAN; #else /**/ if( i >= 5 ) obj->type = JSON5_REAL, obj->real = i >= 11 ? -INFINITY : i >= 9 ? INFINITY : i >= 7 ? -NAN : NAN; #endif else if( i >= 1 ) obj->type = JSON5_BOOL, obj->boolean = i <= 2; else obj->type = JSON5_NULL; break; } } if( obj->type == JSON5_UNDEFINED ) { JSON5_ASSERT; *err_code = "json5_error_invalid_value"; return NULL; } } else if( isdigit(*p) || *p == '+' || *p == '-' || *p == '.' ) { char buffer[32] = {0}, *buf = buffer, is_hex = 0, is_dbl = 0; while( *p && strchr("+-.xX0123456789aAbBcCdDeEfF", *p)) { is_hex |= (*p | 32) == 'x'; is_dbl |= *p == '.'; *buf++ = *p++; } obj->type = is_dbl ? JSON5_REAL : JSON5_INTEGER; /**/ if( is_dbl ) sscanf( buffer, "%lf", &obj->real ); else if( is_hex ) sscanf( buffer, "%llx", &obj->integer ); // SCNx64 -> inttypes.h else sscanf( buffer, "%lld", &obj->integer ); // SCNd64 -> inttypes.h } else { return NULL; } return p; } char *json5_parse(json5 *root, char *p, int flags) { char *err_code = ""; *root = (json5) {0}; if( p && p[0] ) { p = json5__trim(p); if( *p == '[' ) { /* <-- for SJSON */ json5__parse_value(root, p, &err_code); } else { json5__parse_object(root, p, &err_code); /* <-- for SJSON */ } } else { root->type = JSON5_OBJECT; } return err_code[0] ? err_code : 0; } void json5_free(json5 *root) { if( root->type == JSON5_ARRAY && root->array ) { for( int i = 0, cnt = array_count(root->array); i < cnt; ++i ) { json5_free(root->array + i); } array_free(root->array); } if( root->type == JSON5_OBJECT && root->nodes ) { for( int i = 0, cnt = array_count(root->nodes); i < cnt; ++i ) { json5_free(root->nodes + i); } array_free(root->nodes); } *root = (json5) {0}; // needed? } void json5_write(FILE *fp, const json5 *o) { static __thread int indent = 0; int tabs = 1; // 0,1,2,4,8 if( o->name ) { fprintf(fp, "%*.s\"%s\"%s", indent * tabs, "", o->name, tabs ? ": " : ":"); } /**/ if( o->type == JSON5_NULL ) fprintf(fp, "%s", "null"); else if( o->type == JSON5_BOOL ) fprintf(fp, "%s", o->boolean ? "true" : "false"); else if( o->type == JSON5_INTEGER ) fprintf(fp, "%lld", o->integer); else if( o->type == JSON5_REAL ) { /**/ if( isnan(o->real) ) fprintf(fp, "%s", signbit(o->real) ? "-nan" : "nan" ); else if( isinf(o->real) ) fprintf(fp, "%s", signbit(o->real) ? "-inf" : "inf" ); else fprintf(fp, "%1.8e", o->real); // %1.8e from google:"randomascii 100 digits" ; %.4llf for compactness } #if 0 else if( o->type == JSON5_STRING ) { // write (escaped) string char chars[] = "\\\"\n\r\b\f\v", remap[] = "\\\"nrbfv", esc[256]; for( int i = 0; chars[i]; ++i ) esc[ chars[i] ] = remap[i]; const char *b = o->string, *e = strpbrk(b, chars), *sep = "\""; while( e ) { fprintf(fp, "%s%.*s\\%c", sep, (int)(e - b), b, esc[(unsigned char)*e] ); e = strpbrk( b = e + 1, chars); sep = ""; } fprintf(fp, "%s%s\"", sep, b); } #else else if( o->type == JSON5_STRING ) { // write string fprintf(fp, "\"%s\"", o->string); } #endif else if( o->type == JSON5_ARRAY ) { const char *sep = ""; fprintf(fp, "%s", tabs ? "[ " : "["); for( int i = 0, cnt = o->count; i < cnt; ++i ) { fprintf(fp, "%s", sep); sep = tabs ? ", " : ","; json5_write(fp, o->array + i); } fprintf(fp, "%s", tabs ? " ]" : "]"); } else if( o->type == JSON5_OBJECT ) { const char *sep = ""; fprintf(fp, "%*.s{%s", 0 * (++indent) * tabs, "", tabs ? "\n":""); for( int i = 0, cnt = o->count; i < cnt; ++i ) { fprintf(fp, "%s", sep); sep = tabs ? ",\n" : ","; json5_write(fp, o->nodes + i); } fprintf(fp, "%s%*.s}", tabs ? "\n":"", (--indent) * tabs, ""); } else { char p[16] = {0}; JSON5_ASSERT; /* "json5_error_invalid_value"; */ } } #ifdef JSON5_BENCH #include int main() { // https://www.reddit.com/r/datasets/comments/1uyd0t/200000_jeopardy_questions_in_a_json_file/ char *content = 0; for( FILE *fp = fopen("jeopardy.json", "rb"); fp; fclose(fp), fp = 0 ) { fseek(fp, 0L, SEEK_END); size_t pos = ftell(fp); fseek(fp, 0L, SEEK_SET); content = (char*)malloc( pos + 1 ); fread(content, 1, pos, fp); content[pos] = 0; } if( content ) { clock_t start = clock(); json5 root = {0}; char *error = json5_parse(&root, content, 0); clock_t end = clock(); double delta = ( end - start ) / (double)CLOCKS_PER_SEC; if( !error ) { printf("Parsing time: %.3fms\n", delta*1000); printf("Total nodes: %d\n", array_count(root.array)); printf("Category: %s, air date: %s\nQuestion: %s\n", root.array[0].nodes[0].string, root.array[0].nodes[1].string, root.array[0].nodes[2].string); } else { printf("Error: %s\n", error); } json5_free(&root); free(content); } } #define main main__ #endif #ifdef JSON5_DEMO int main() { char source5[] = " // comments\n" /* json5 sample */ " unquoted: 'and you can quote me on that',\n" " singleQuotes: 'I can use \"double quotes\" here',\n" " lineBreaks : \"Look, Mom! \\\n" "No \\n's!\",\n" " hexadecimal: 0x100,\n" " leadingDecimalPoint: .8675309, andTrailing: 8675309.,\n" " positiveSign: +1,\n" " trailingComma: 'in objects', andIn: ['arrays', ],\n" " \"backwardsCompatible\": \"with JSON\",\n" "" " ip = \"127.0.0.1\"\n" /* sjson sample */ " port = 8888\n" "" " /* comment //nested comment*/\n" /* tests */ " // comment /*nested comment*/\n" " nil: null," " \"+lšctžýáíé=:\": true,,,," " huge: 2.2239333e5, " " array: [+1,2,-3,4,5], " " hello: 'world /*comment in string*/ //again', " " abc: 42.67, def: false, " " children : { a: 1, b: 2, }," " invalids : [ nan, NaN, -nan, -NaN, inf, Infinity, -inf, -Infinity ]," "" " multiline: `this is\n" "a multiline string\n" "yeah`" "}\n"; json5 root = { 0 }; char *error = json5_parse(&root, source5, 0); if( error ) { printf("Error: %s\n", error); } else { json5_write(stdout, &root); } json5_free(&root); } #define main main__ #endif #endif // JSON5_C