From 6b14df13cc1b453d0cc277d0ba50cbabc2745089 Mon Sep 17 00:00:00 2001 From: Petri Lehtinen Date: Sat, 4 Jul 2009 22:02:16 +0300 Subject: [PATCH 1/1] Fix dumping of Unicode control codes Inside strings, All UTF-8 characters except for \, " and Unicode control codes are dumped as-is. The control codes that have a special one-character escape use that escape, and other control codes are dumped using the \uXXXX escape. --- src/dump.c | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/dump.c b/src/dump.c index 02692f7..b8bd460 100644 --- a/src/dump.c +++ b/src/dump.c @@ -66,27 +66,47 @@ static int dump_string(const char *str, dump_func dump, void *data) return -1; end = str; - while(*end) + while(1) { - while(*end && *end != '\\' && *end != '"') + const char *text; + char seq[7]; + int length; + + while(*end && *end != '\\' && *end != '"' && (*end < 0 || *end > 0x1F)) end++; - if(end != str) + if(end != str) { if(dump(str, end - str, data)) return -1; - - if(*end == '\\') - { - if(dump("\\\\", 2, data)) - return -1; - end++; } - else if(*end == '"') + + if(!*end) + break; + + /* handle \, ", and control codes */ + length = 2; + switch(*end) { - if(dump("\\\"", 2, data)) - return -1; - end++; + case '\\': text = "\\\\"; break; + case '\"': text = "\\\""; break; + case '\b': text = "\\b"; break; + case '\f': text = "\\f"; break; + case '\n': text = "\\n"; break; + case '\r': text = "\\r"; break; + case '\t': text = "\\t"; break; + default: + { + sprintf(seq, "\\u00%02x", *end); + text = seq; + length = 6; + break; + } } + + if(dump(text, length, data)) + return -1; + + end++; str = end; } -- 2.1.4