+/*
+ * Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
+ *
+ * Jansson is free software; you can redistribute it and/or modify
+ * it under the terms of the MIT license. See LICENSE for details.
+ */
+
#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
-#include <unistd.h>
#include <assert.h>
+#include <stdint.h>
#include <jansson.h>
#include "jansson_private.h"
error->line = lex->line;
if(saved_text && saved_text[0])
{
- snprintf(error->text, JSON_ERROR_TEXT_LENGTH,
- "%s near '%s'", text, saved_text);
+ if(lex->saved_text.length <= 20) {
+ snprintf(error->text, JSON_ERROR_TEXT_LENGTH,
+ "%s near '%s'", text, saved_text);
+ }
+ else
+ snprintf(error->text, JSON_ERROR_TEXT_LENGTH, "%s", text);
}
else
{
c = stream->buffer[0];
- if(c == EOF && stream->eof(stream->data))
- return EOF;
-
- if(c < 0)
+ if((unsigned char)c >= 0x80 && c != (char)EOF)
{
/* multi-byte UTF-8 sequence */
int i, count;
for(i = 1; i < count; i++)
stream->buffer[i] = stream->get(stream->data);
- if(!utf8_check_full(stream->buffer, count))
+ if(!utf8_check_full(stream->buffer, count, NULL))
goto out;
stream->stream_pos += count;
}
}
- return (char)stream->buffer[stream->buffer_pos++];
+ return stream->buffer[stream->buffer_pos++];
out:
error_set(error, NULL, "unable to decode byte 0x%x at position %d",
(unsigned char)c, stream->stream_pos);
+
+ stream->buffer[0] = EOF;
+ stream->buffer[1] = '\0';
+ stream->buffer_pos = 1;
+
return EOF;
}
{
assert(stream->buffer_pos > 0);
stream->buffer_pos--;
- assert(stream->buffer[stream->buffer_pos] == (unsigned char)c);
+ assert(stream->buffer[stream->buffer_pos] == c);
}
static void lex_unget_unsave(lex_t *lex, char c)
{
char d;
- if(c != EOF)
- stream_unget(&lex->stream, c);
+ stream_unget(&lex->stream, c);
d = strbuffer_pop(&lex->saved_text);
assert(c == d);
}
+static void lex_save_cached(lex_t *lex)
+{
+ while(lex->stream.buffer[lex->stream.buffer_pos] != '\0')
+ {
+ lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
+ lex->stream.buffer_pos++;
+ }
+}
+
+/* assumes that str points to 'u' plus at least 4 valid hex digits */
+static int32_t decode_unicode_escape(const char *str)
+{
+ int i;
+ int32_t value = 0;
+
+ assert(str[0] == 'u');
+
+ for(i = 1; i <= 4; i++) {
+ char c = str[i];
+ value <<= 4;
+ if(isdigit(c))
+ value += c - '0';
+ else if(islower(c))
+ value += c - 'a' + 10;
+ else if(isupper(c))
+ value += c - 'A' + 10;
+ else
+ assert(0);
+ }
+
+ return value;
+}
+
static void lex_scan_string(lex_t *lex, json_error_t *error)
{
char c;
const char *p;
char *t;
+ int i;
+ lex->value.string = NULL;
lex->token = TOKEN_INVALID;
- /* skip the " */
c = lex_get_save(lex, error);
while(c != '"') {
- if(c == EOF) {
+ if(c == (char)EOF) {
+ lex_unget_unsave(lex, c);
if(lex_eof(lex))
error_set(error, lex, "premature end of input");
goto out;
}
- else if(0 <= c && c <= 0x1F) {
+ else if((unsigned char)c <= 0x1F) {
/* control character */
lex_unget_unsave(lex, c);
if(c == '\n')
c = lex_get_save(lex, error);
if(c == 'u') {
c = lex_get_save(lex, error);
- for(int i = 0; i < 4; i++) {
+ for(i = 0; i < 4; i++) {
if(!isxdigit(c)) {
lex_unget_unsave(lex, c);
error_set(error, lex, "invalid escape");
if(*p == '\\') {
p++;
if(*p == 'u') {
- /* TODO */
- error_set(error, lex, "\\u escapes are not yet supported");
- free(lex->value.string);
- lex->value.string = NULL;
- goto out;
- } else {
+ char buffer[4];
+ int length;
+ int32_t value;
+
+ value = decode_unicode_escape(p);
+ p += 5;
+
+ if(0xD800 <= value && value <= 0xDBFF) {
+ /* surrogate pair */
+ if(*p == '\\' && *(p + 1) == 'u') {
+ int32_t value2 = decode_unicode_escape(++p);
+ p += 5;
+
+ if(0xDC00 <= value2 && value2 <= 0xDFFF) {
+ /* valid second surrogate */
+ value =
+ ((value - 0xD800) << 10) +
+ (value2 - 0xDC00) +
+ 0x10000;
+ }
+ else {
+ /* invalid second surrogate */
+ error_set(error, lex,
+ "invalid Unicode '\\u%04X\\u%04X'",
+ value, value2);
+ goto out;
+ }
+ }
+ else {
+ /* no second surrogate */
+ error_set(error, lex, "invalid Unicode '\\u%04X'",
+ value);
+ goto out;
+ }
+ }
+ else if(0xDC00 <= value && value <= 0xDFFF) {
+ error_set(error, lex, "invalid Unicode '\\u%04X'", value);
+ goto out;
+ }
+ else if(value == 0)
+ {
+ error_set(error, lex, "\\u0000 is not allowed");
+ goto out;
+ }
+
+ if(utf8_encode(value, buffer, &length))
+ assert(0);
+
+ memcpy(t, buffer, length);
+ t += length;
+ }
+ else {
switch(*p) {
case '"': case '\\': case '/':
*t = *p; break;
case 't': *t = '\t'; break;
default: assert(0);
}
+ t++;
+ p++;
}
}
else
- *t = *p;
-
- t++;
- p++;
+ *(t++) = *(p++);
}
*t = '\0';
lex->token = TOKEN_STRING;
+ return;
out:
- return;
+ free(lex->value.string);
}
-static void lex_scan_number(lex_t *lex, char c, json_error_t *error)
+static int lex_scan_number(lex_t *lex, char c, json_error_t *error)
{
const char *saved_text;
char *end;
+ double value;
lex->token = TOKEN_INVALID;
goto out;
}
}
- else /* c != '0' */ {
+ else if(isdigit(c)) {
c = lex_get_save(lex, error);
while(isdigit(c))
c = lex_get_save(lex, error);
}
+ else {
+ lex_unget_unsave(lex, c);
+ goto out;
+ }
if(c != '.' && c != 'E' && c != 'e') {
+ long value;
+
lex_unget_unsave(lex, c);
- lex->token = TOKEN_INTEGER;
saved_text = strbuffer_value(&lex->saved_text);
- lex->value.integer = strtol(saved_text, &end, 10);
+ value = strtol(saved_text, &end, 10);
assert(end == saved_text + lex->saved_text.length);
- return;
+ if((value == LONG_MAX && errno == ERANGE) || value > INT_MAX) {
+ error_set(error, lex, "too big integer");
+ goto out;
+ }
+ else if((value == LONG_MIN && errno == ERANGE) || value < INT_MIN) {
+ error_set(error, lex, "too big negative integer");
+ goto out;
+ }
+
+ lex->token = TOKEN_INTEGER;
+ lex->value.integer = (int)value;
+ return 0;
}
if(c == '.') {
}
lex_unget_unsave(lex, c);
- lex->token = TOKEN_REAL;
saved_text = strbuffer_value(&lex->saved_text);
- lex->value.real = strtod(saved_text, &end);
+ value = strtod(saved_text, &end);
assert(end == saved_text + lex->saved_text.length);
+ if(value == 0 && errno == ERANGE) {
+ error_set(error, lex, "real number underflow");
+ goto out;
+ }
+
+ /* Cannot test for +/-HUGE_VAL because the HUGE_VAL constant is
+ only defined in C99 mode. So let's trust in sole errno. */
+ else if(errno == ERANGE) {
+ error_set(error, lex, "real number overflow");
+ goto out;
+ }
+
+ lex->token = TOKEN_REAL;
+ lex->value.real = value;
+ return 0;
+
out:
- return;
+ return -1;
}
static int lex_scan(lex_t *lex, json_error_t *error)
strbuffer_clear(&lex->saved_text);
if(lex->token == TOKEN_STRING) {
- free(lex->value.string);
- lex->value.string = NULL;
+ free(lex->value.string);
+ lex->value.string = NULL;
}
c = lex_get(lex, error);
c = lex_get(lex, error);
}
- if(c == EOF) {
+ if(c == (char)EOF) {
if(lex_eof(lex))
lex->token = TOKEN_EOF;
else
else if(c == '"')
lex_scan_string(lex, error);
- else if(isdigit(c) || c == '-')
- lex_scan_number(lex, c, error);
+ else if(isdigit(c) || c == '-') {
+ if(lex_scan_number(lex, c, error))
+ goto out;
+ }
else if(isupper(c) || islower(c)) {
/* eat up the whole identifier for clearer error messages */
lex->token = TOKEN_INVALID;
}
- else
+ else {
+ /* save the rest of the input UTF-8 sequence to get an error
+ message of valid UTF-8 */
+ lex_save_cached(lex);
lex->token = TOKEN_INVALID;
+ }
out:
return lex->token;
}
+static char *lex_steal_string(lex_t *lex)
+{
+ char *result = NULL;
+ if(lex->token == TOKEN_STRING)
+ {
+ result = lex->value.string;
+ lex->value.string = NULL;
+ }
+ return result;
+}
+
static int lex_init(lex_t *lex, get_func get, eof_func eof, void *data)
{
stream_init(&lex->stream, get, eof, data);
{
if(lex->token == TOKEN_STRING)
free(lex->value.string);
+ strbuffer_close(&lex->saved_text);
}
goto error;
}
- key = strdup(lex->value.string);
+ key = lex_steal_string(lex);
if(!key)
return NULL;
break;
case '{':
- json = parse_object(lex, error);
+ json = parse_object(lex, error);
break;
case '[':
return json;
}
-json_t *parse_json(lex_t *lex, json_error_t *error)
+static json_t *parse_json(lex_t *lex, json_error_t *error)
{
error_init(error);
return parse_value(lex, error);
}
-json_t *json_load(const char *path, json_error_t *error)
-{
- json_t *result;
- FILE *fp;
-
- fp = fopen(path, "r");
- if(!fp)
- {
- error_set(error, NULL, "unable to open %s: %s",
- path, strerror(errno));
- return NULL;
- }
-
- result = json_loadf(fp, error);
-
- fclose(fp);
- return result;
-}
-
typedef struct
{
const char *data;
{
char c;
string_data_t *stream = (string_data_t *)data;
- c = stream->data[stream->pos++];
+ c = stream->data[stream->pos];
if(c == '\0')
return EOF;
else
+ {
+ stream->pos++;
return c;
+ }
}
static int string_eof(void *data)
return NULL;
result = parse_json(&lex, error);
+ if(!result)
+ goto out;
+
+ lex_scan(&lex, error);
+ if(lex.token != TOKEN_EOF) {
+ error_set(error, &lex, "end of file expected");
+ json_decref(result);
+ result = NULL;
+ }
+out:
lex_close(&lex);
return result;
}
+
+json_t *json_load_file(const char *path, json_error_t *error)
+{
+ json_t *result;
+ FILE *fp;
+
+ error_init(error);
+
+ fp = fopen(path, "r");
+ if(!fp)
+ {
+ error_set(error, NULL, "unable to open %s: %s",
+ path, strerror(errno));
+ return NULL;
+ }
+
+ result = json_loadf(fp, error);
+
+ fclose(fp);
+ return result;
+}