Fix dumping of Unicode control codes
authorPetri Lehtinen <petri@digip.org>
Sat, 4 Jul 2009 19:02:16 +0000 (22:02 +0300)
committerPetri Lehtinen <petri@digip.org>
Thu, 9 Jul 2009 17:59:52 +0000 (20:59 +0300)
Inside strings, All UTF-8 characters except for \, " and Unicode
control codes are dumped as-is. The control codes that have a special
one-character escape use that escape, and other control codes are
dumped using the \uXXXX escape.

src/dump.c

index 02692f7..b8bd460 100644 (file)
@@ -66,27 +66,47 @@ static int dump_string(const char *str, dump_func dump, void *data)
         return -1;
 
     end = str;
-    while(*end)
+    while(1)
     {
-        while(*end && *end != '\\' && *end != '"')
+        const char *text;
+        char seq[7];
+        int length;
+
+        while(*end && *end != '\\' && *end != '"' && (*end < 0 || *end > 0x1F))
             end++;
 
-        if(end != str)
+        if(end != str) {
             if(dump(str, end - str, data))
                 return -1;
-
-        if(*end == '\\')
-        {
-            if(dump("\\\\", 2, data))
-                return -1;
-            end++;
         }
-        else if(*end == '"')
+
+        if(!*end)
+            break;
+
+        /* handle \, ", and control codes */
+        length = 2;
+        switch(*end)
         {
-            if(dump("\\\"", 2, data))
-                return -1;
-            end++;
+            case '\\': text = "\\\\"; break;
+            case '\"': text = "\\\""; break;
+            case '\b': text = "\\b"; break;
+            case '\f': text = "\\f"; break;
+            case '\n': text = "\\n"; break;
+            case '\r': text = "\\r"; break;
+            case '\t': text = "\\t"; break;
+            default:
+            {
+                sprintf(seq, "\\u00%02x", *end);
+                text = seq;
+                length = 6;
+                break;
+            }
         }
+
+        if(dump(text, length, data))
+            return -1;
+
+        end++;
         str = end;
     }