Optimize UTF8 operations.
authorcantor <cantor@de75baf8-a10c-0410-a50a-987c0e22f00f>
Fri, 3 Aug 2007 22:56:03 +0000 (22:56 +0000)
committercantor <cantor@de75baf8-a10c-0410-a50a-987c0e22f00f>
Fri, 3 Aug 2007 22:56:03 +0000 (22:56 +0000)
git-svn-id: https://svn.middleware.georgetown.edu/cpp-xmltooling/trunk@370 de75baf8-a10c-0410-a50a-987c0e22f00f

xmltooling/unicode.cpp
xmltooling/unicode.h
xmltooling/xmltooling-lite.vcproj
xmltooling/xmltooling.vcproj

index c9a0831..b8deeb3 100644 (file)
 
 static const XMLCh UTF8[]={ chLatin_U, chLatin_T, chLatin_F, chDigit_8, chNull };
 
-char* xmltooling::toUTF8(const XMLCh* src)
+char* xmltooling::toUTF8(const XMLCh* src, bool use_malloc)
 {
-    unsigned int eaten;
+    unsigned int eaten,factor=1,bufsize;
     unsigned int srclen=XMLString::stringLen(src);
-    XMLUTF8Transcoder t(UTF8, srclen*4 + 1);
-    char* buf=new char[srclen*4 + 1];
-    memset(buf,0,srclen*4 + 1);
-    t.transcodeTo(
-        src,srclen,
-        reinterpret_cast<XMLByte*>(buf),srclen*4,
-        eaten,XMLTranscoder::UnRep_RepChar);
-    return buf;
+    XMLUTF8Transcoder t(UTF8, 4096);    // block size isn't used any more anyway
+    do {
+        bufsize = factor*srclen + 10;
+        char* buf = use_malloc ? reinterpret_cast<char*>(malloc(bufsize)) : new char[bufsize];
+        memset(buf,0,bufsize);
+        try {
+            t.transcodeTo(
+                src,srclen,
+                reinterpret_cast<XMLByte*>(buf),bufsize-1,
+                eaten,
+                XMLTranscoder::UnRep_Throw);
+        }
+        catch (XMLException&) {
+            if (use_malloc)
+                free(buf);
+            else
+                delete[] buf;
+            throw XMLToolingException("Source string contained an unrepresentable character.");
+        }
+        if (eaten >= srclen)
+            return buf;
+        if (use_malloc)
+            free(buf);
+        else
+            delete[] buf;
+        factor++;
+    } while (1);
 }
 
-XMLCh* xmltooling::fromUTF8(const char* src)
+XMLCh* xmltooling::fromUTF8(const char* src, bool use_malloc)
 {
     unsigned int eaten;
     unsigned int srclen=strlen(src);
-    XMLUTF8Transcoder t(UTF8, srclen + 1);
-    XMLCh* buf=new XMLCh[srclen + 1];
+    XMLUTF8Transcoder t(UTF8, 4096);    // block size isn't used any more anyway
+    XMLCh* buf = use_malloc ? reinterpret_cast<XMLCh*>(malloc((srclen+1)*sizeof(XMLCh))) : new XMLCh[srclen + 1];
     unsigned char* sizes=new unsigned char[srclen];
     memset(buf,0,(srclen+1)*sizeof(XMLCh));
     t.transcodeFrom(
index ee8fdba..6f6893e 100644 (file)
@@ -41,17 +41,21 @@ namespace xmltooling {
 
     /**
      * Transcodes a 16-bit Unicode string into UTF-8.
-     * @param src   the 16-bit string to transcode
+     * 
+     * @param src           the 16-bit string to transcode
+     * @param use_malloc    true iff the result should be allocated with malloc, false to use new
      * @return      a UTF-8 string allocated by the Xerces memory manager 
      */
-    extern XMLTOOL_API char* toUTF8(const XMLCh* src);
+    extern XMLTOOL_API char* toUTF8(const XMLCh* src, bool use_malloc=false);
 
     /**
      * Transcodes a UTF-8 string into 16-bit Unicode.
-     * @param src   the UTF-8 string to transcode
+     * 
+     * @param src           the UTF-8 string to transcode
+     * @param use_malloc    true iff the result should be allocated with malloc, false to use new
      * @return      a 16-bit Unicode string allocated by the Xerces memory manager 
      */
-    extern XMLTOOL_API XMLCh* fromUTF8(const char* src);
+    extern XMLTOOL_API XMLCh* fromUTF8(const char* src, bool use_malloc=false);
 
     /**
      * Writes a Unicode string to an ASCII stream by transcoding to UTF8.
index 9094fa2..3e5af8a 100644 (file)
                                >\r
                        </File>\r
                        <File\r
+                               RelativePath=".\logging.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
                                RelativePath=".\Namespace.h"\r
                                >\r
                        </File>\r
index fc8628e..41b221a 100644 (file)
                                >\r
                        </File>\r
                        <File\r
+                               RelativePath=".\logging.h"\r
+                               >\r
+                       </File>\r
+                       <File\r
                                RelativePath=".\Namespace.h"\r
                                >\r
                        </File>\r