v4k-git-backup/engine/split/3rd_json5.h

439 lines
13 KiB
C

// JSON5 + SJSON parser module
//
// License:
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
// No warranty is implied, use at your own risk.
//
// Credits:
// r-lyeh (fork)
// Dominik Madarasz (@zaklaus) (original code)
#ifndef JSON5_H
#define JSON5_H
#ifndef JSON5_ASSERT
#define JSON5_ASSERT do { printf("JSON5: Error L%d while parsing '%c' in '%.16s'\n", __LINE__, p[0], p); assert(0); } while(0)
#endif
#include <stdint.h>
#include <stdio.h>
typedef enum json5_type {
JSON5_UNDEFINED, // 0
JSON5_NULL, // 1
JSON5_BOOL, // 2
JSON5_OBJECT, // 3
JSON5_STRING, // 4
JSON5_ARRAY, // 5
JSON5_INTEGER, // 6
JSON5_REAL, // 7
} json5_type;
typedef struct json5 {
char* name;
#ifdef NDEBUG
unsigned type : 3;
#else
json5_type type;
#endif
unsigned count : 29;
union {
struct json5* array;
struct json5* nodes;
int64_t integer;
double real;
char* string;
int boolean;
};
} json5;
char* json5_parse(json5 *root, char *source, int flags);
void json5_write(FILE *fp, const json5 *root);
void json5_free(json5 *root);
#endif // JSON5_H
// json5 ----------------------------------------------------------------------
#ifdef JSON5_C
//#pragma once
#include <assert.h>
#include <ctype.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
char *json5__trim(char *p) {
while (*p) {
/**/ if( isspace(*p) ) ++p;
else if( p[0] == '/' && p[1] == '*' ) { // skip C comment
for( p += 2; *p && !(p[0] == '*' && p[1] == '/'); ++p) {}
if( *p ) p += 2;
}
else if( p[0] == '/' && p[1] == '/' ) { // skip C++ comment
for( p += 2; *p && p[0] != '\n'; ++p) {}
if( *p ) ++p;
}
else break;
}
return p;
}
char *json5__parse_value(json5 *obj, char *p, char **err_code);
char *json5__parse_string(json5 *obj, char *p, char **err_code) {
assert(obj && p);
if( *p == '"' || *p == '\'' || *p == '`' ) {
obj->type = JSON5_STRING;
obj->string = p + 1;
char eos_char = *p, *b = obj->string, *e = b;
while (*e) {
/**/ if( *e == '\\' && (e[1] == eos_char) ) ++e;
else if( *e == '\\' && (e[1] == '\r' || e[1] == '\n') ) *e = ' ';
else if( *e == eos_char ) break;
++e;
}
*e = '\0';
return p = e + 1;
}
//JSON5_ASSERT; *err_code = "json5_error_invalid_value";
return NULL;
}
char *json5__parse_object(json5 *obj, char *p, char **err_code) {
assert(obj && p);
if( 1 /* *p == '{' */ ) { /* <-- for SJSON */
int skip = *p == '{'; /* <-- for SJSON */
obj->type = JSON5_OBJECT;
obj->nodes = 0;
obj->count = 0;
while (*p) {
json5 node = { 0 };
do { p = json5__trim(p + skip); skip = 1; } while( *p == ',' );
if( *p == '}' ) {
++p;
break;
}
// @todo: is_unicode() (s[0] == '\\' && isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4]))) {
else if( isalnum(*p) || *p == '_' || *p == '$' || *p == '.' ) { // also || is_unicode(p)
node.name = p;
do {
++p;
} while (*p && (isalnum(*p) || *p == '_' || *p == '$' || *p == '.') ); // also || is_unicode(p)
char *e = p;
p = json5__trim(p);
*e = '\0';
}
else { //if( *p == '"' || *p == '\'' || *p == '`' ) {
char *ps = json5__parse_string(&node, p, err_code);
if( !ps ) {
return NULL;
}
p = ps;
node.name = node.string;
p = json5__trim(p);
}
// @todo: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
if( !(node.name && node.name[0]) ) { // !json5__validate_name(node.name) ) {
JSON5_ASSERT; *err_code = "json5_error_invalid_name";
return NULL;
}
if( !p || (*p && (*p != ':' && *p != '=' /* <-- for SJSON */)) ) {
JSON5_ASSERT; *err_code = "json5_error_invalid_name";
return NULL;
}
p = json5__trim(p + 1);
p = json5__parse_value(&node, p, err_code);
if( *err_code[0] ) {
return NULL;
}
if( node.type != JSON5_UNDEFINED ) {
array_push(obj->nodes, node);
++obj->count;
}
if( *p == '}') { ++p; break; }
}
return p;
}
JSON5_ASSERT; *err_code = "json5_error_invalid_value";
return NULL;
}
char *json5__parse_value(json5 *obj, char *p, char **err_code) {
assert(obj && p);
p = json5__trim(p);
char *is_string = json5__parse_string(obj, p, err_code);
if( is_string ) {
p = is_string;
if( *err_code[0] ) {
return NULL;
}
}
else if( *p == '{' ) {
p = json5__parse_object( obj, p, err_code );
if( *err_code[0] ) {
return NULL;
}
}
else if( *p == '[' ) {
obj->type = JSON5_ARRAY;
obj->array = 0;
obj->count = 0;
while (*p) {
json5 elem = { 0 };
do { p = json5__trim(p + 1); } while( *p == ',' );
if( *p == ']') { ++p; break; }
p = json5__parse_value(&elem, p, err_code);
if( *err_code[0] ) {
return NULL;
}
if( elem.type != JSON5_UNDEFINED ) {
array_push(obj->array, elem);
++obj->count;
}
if (*p == ']') { ++p; break; }
}
}
else if( isalpha(*p) || (*p == '-' && !isdigit(p[1])) ) {
const char *labels[] = { "null", "on","true", "off","false", "nan","NaN", "-nan","-NaN", "inf","Infinity", "-inf","-Infinity", 0 };
const int lenghts[] = { 4, 2,4, 3,5, 3,3, 4,4, 3,8, 4,9 };
for( int i = 0; labels[i]; ++i ) {
if( !strncmp(p, labels[i], lenghts[i] ) ) {
p += lenghts[i];
#ifdef _MSC_VER // somehow, NaN is apparently signed in MSC
/**/ if( i >= 5 ) obj->type = JSON5_REAL, obj->real = i >= 11 ? -INFINITY : i >= 9 ? INFINITY : i >= 7 ? NAN :-NAN;
#else
/**/ if( i >= 5 ) obj->type = JSON5_REAL, obj->real = i >= 11 ? -INFINITY : i >= 9 ? INFINITY : i >= 7 ? -NAN : NAN;
#endif
else if( i >= 1 ) obj->type = JSON5_BOOL, obj->boolean = i <= 2;
else obj->type = JSON5_NULL;
break;
}
}
if( obj->type == JSON5_UNDEFINED ) {
JSON5_ASSERT; *err_code = "json5_error_invalid_value";
return NULL;
}
}
else if( isdigit(*p) || *p == '+' || *p == '-' || *p == '.' ) {
char buffer[32] = {0}, *buf = buffer, is_hex = 0, is_dbl = 0;
while( *p && strchr("+-.xX0123456789aAbBcCdDeEfF", *p)) {
is_hex |= (*p | 32) == 'x';
is_dbl |= *p == '.';
*buf++ = *p++;
}
obj->type = is_dbl ? JSON5_REAL : JSON5_INTEGER;
/**/ if( is_dbl ) sscanf( buffer, "%lf", &obj->real );
else if( is_hex ) sscanf( buffer, "%llx", &obj->integer ); // SCNx64 -> inttypes.h
else sscanf( buffer, "%lld", &obj->integer ); // SCNd64 -> inttypes.h
}
else {
return NULL;
}
return p;
}
char *json5_parse(json5 *root, char *p, int flags) {
char *err_code = "";
*root = (json5) {0};
if( p && p[0] ) {
p = json5__trim(p);
if( *p == '[' ) { /* <-- for SJSON */
json5__parse_value(root, p, &err_code);
} else {
json5__parse_object(root, p, &err_code); /* <-- for SJSON */
}
} else {
root->type = JSON5_OBJECT;
}
return err_code[0] ? err_code : 0;
}
void json5_free(json5 *root) {
if( root->type == JSON5_ARRAY && root->array ) {
for( int i = 0, cnt = array_count(root->array); i < cnt; ++i ) {
json5_free(root->array + i);
}
array_free(root->array);
}
if( root->type == JSON5_OBJECT && root->nodes ) {
for( int i = 0, cnt = array_count(root->nodes); i < cnt; ++i ) {
json5_free(root->nodes + i);
}
array_free(root->nodes);
}
*root = (json5) {0}; // needed?
}
void json5_write(FILE *fp, const json5 *o) {
static __thread int indent = 0;
int tabs = 1; // 0,1,2,4,8
if( o->name ) {
fprintf(fp, "%*.s\"%s\"%s", indent * tabs, "", o->name, tabs ? ": " : ":");
}
/**/ if( o->type == JSON5_NULL ) fprintf(fp, "%s", "null");
else if( o->type == JSON5_BOOL ) fprintf(fp, "%s", o->boolean ? "true" : "false");
else if( o->type == JSON5_INTEGER ) fprintf(fp, "%lld", o->integer);
else if( o->type == JSON5_REAL ) {
/**/ if( isnan(o->real) ) fprintf(fp, "%s", signbit(o->real) ? "-nan" : "nan" );
else if( isinf(o->real) ) fprintf(fp, "%s", signbit(o->real) ? "-inf" : "inf" );
else fprintf(fp, "%1.8e", o->real); // %1.8e from google:"randomascii 100 digits" ; %.4llf for compactness
}
#if 0
else if( o->type == JSON5_STRING ) { // write (escaped) string
char chars[] = "\\\"\n\r\b\f\v", remap[] = "\\\"nrbfv", esc[256];
for( int i = 0; chars[i]; ++i ) esc[ chars[i] ] = remap[i];
const char *b = o->string, *e = strpbrk(b, chars), *sep = "\"";
while( e ) {
fprintf(fp, "%s%.*s\\%c", sep, (int)(e - b), b, esc[(unsigned char)*e] );
e = strpbrk( b = e + 1, chars);
sep = "";
}
fprintf(fp, "%s%s\"", sep, b);
}
#else
else if( o->type == JSON5_STRING ) { // write string
fprintf(fp, "\"%s\"", o->string);
}
#endif
else if( o->type == JSON5_ARRAY ) {
const char *sep = "";
fprintf(fp, "%s", tabs ? "[ " : "[");
for( int i = 0, cnt = o->count; i < cnt; ++i ) {
fprintf(fp, "%s", sep); sep = tabs ? ", " : ",";
json5_write(fp, o->array + i);
}
fprintf(fp, "%s", tabs ? " ]" : "]");
}
else if( o->type == JSON5_OBJECT ) {
const char *sep = "";
fprintf(fp, "%*.s{%s", 0 * (++indent) * tabs, "", tabs ? "\n":"");
for( int i = 0, cnt = o->count; i < cnt; ++i ) {
fprintf(fp, "%s", sep); sep = tabs ? ",\n" : ",";
json5_write(fp, o->nodes + i);
}
fprintf(fp, "%s%*.s}", tabs ? "\n":"", (--indent) * tabs, "");
} else {
char p[16] = {0};
JSON5_ASSERT; /* "json5_error_invalid_value"; */
}
}
#ifdef JSON5_BENCH
#include <time.h>
int main() {
// https://www.reddit.com/r/datasets/comments/1uyd0t/200000_jeopardy_questions_in_a_json_file/
char *content = 0;
for( FILE *fp = fopen("jeopardy.json", "rb"); fp; fclose(fp), fp = 0 ) {
fseek(fp, 0L, SEEK_END);
size_t pos = ftell(fp);
fseek(fp, 0L, SEEK_SET);
content = (char*)malloc( pos + 1 );
fread(content, 1, pos, fp);
content[pos] = 0;
}
if( content ) {
clock_t start = clock();
json5 root = {0};
char *error = json5_parse(&root, content, 0);
clock_t end = clock();
double delta = ( end - start ) / (double)CLOCKS_PER_SEC;
if( !error ) {
printf("Parsing time: %.3fms\n", delta*1000);
printf("Total nodes: %d\n", array_count(root.array));
printf("Category: %s, air date: %s\nQuestion: %s\n", root.array[0].nodes[0].string,
root.array[0].nodes[1].string,
root.array[0].nodes[2].string);
} else {
printf("Error: %s\n", error);
}
json5_free(&root);
free(content);
}
}
#define main main__
#endif
#ifdef JSON5_DEMO
int main() {
char source5[] =
" // comments\n" /* json5 sample */
" unquoted: 'and you can quote me on that',\n"
" singleQuotes: 'I can use \"double quotes\" here',\n"
" lineBreaks : \"Look, Mom! \\\n"
"No \\n's!\",\n"
" hexadecimal: 0x100,\n"
" leadingDecimalPoint: .8675309, andTrailing: 8675309.,\n"
" positiveSign: +1,\n"
" trailingComma: 'in objects', andIn: ['arrays', ],\n"
" \"backwardsCompatible\": \"with JSON\",\n"
""
" ip = \"127.0.0.1\"\n" /* sjson sample */
" port = 8888\n"
""
" /* comment //nested comment*/\n" /* tests */
" // comment /*nested comment*/\n"
" nil: null,"
" \"+lšctžýáíé=:\": true,,,,"
" huge: 2.2239333e5, "
" array: [+1,2,-3,4,5], "
" hello: 'world /*comment in string*/ //again', "
" abc: 42.67, def: false, "
" children : { a: 1, b: 2, },"
" invalids : [ nan, NaN, -nan, -NaN, inf, Infinity, -inf, -Infinity ],"
""
" multiline: `this is\n"
"a multiline string\n"
"yeah`"
"}\n";
json5 root = { 0 };
char *error = json5_parse(&root, source5, 0);
if( error ) {
printf("Error: %s\n", error);
} else {
json5_write(stdout, &root);
}
json5_free(&root);
}
#define main main__
#endif
#endif // JSON5_C