Show the whole UTF-8 sequence in error message, not just the first byte
authorPetri Lehtinen <petri@digip.org>
Tue, 14 Jul 2009 17:45:28 +0000 (20:45 +0300)
committerPetri Lehtinen <petri@digip.org>
Thu, 16 Jul 2009 06:57:08 +0000 (09:57 +0300)
src/load.c

index bad4be4..f9bcf7b 100644 (file)
@@ -199,6 +199,15 @@ static void lex_unget_unsave(lex_t *lex, char c)
     assert(c == d);
 }
 
+static void lex_save_cached(lex_t *lex)
+{
+    while(lex->stream.buffer[lex->stream.buffer_pos] != '\0')
+    {
+        lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
+        lex->stream.buffer_pos++;
+    }
+}
+
 static void lex_scan_string(lex_t *lex, json_error_t *error)
 {
     char c;
@@ -438,8 +447,12 @@ static int lex_scan(lex_t *lex, json_error_t *error)
             lex->token = TOKEN_INVALID;
     }
 
-    else
+    else {
+        /* save the rest of the input UTF-8 sequence to get an error
+           message of valid UTF-8 */
+        lex_save_cached(lex);
         lex->token = TOKEN_INVALID;
+    }
 
 out:
     return lex->token;