Rework support for libcurl-based input to parser.
authorScott Cantor <cantor.2@osu.edu>
Wed, 4 Feb 2009 04:15:57 +0000 (04:15 +0000)
committerScott Cantor <cantor.2@osu.edu>
Wed, 4 Feb 2009 04:15:57 +0000 (04:15 +0000)
xmltooling/Makefile.am
xmltooling/util/CurlNetAccessor.cpp [deleted file]
xmltooling/util/CurlNetAccessor.hpp [deleted file]
xmltooling/util/CurlURLInputStream.cpp
xmltooling/util/CurlURLInputStream.h [new file with mode: 0644]
xmltooling/util/CurlURLInputStream.hpp [deleted file]
xmltooling/util/ParserPool.cpp
xmltooling/util/ParserPool.h
xmltooling/util/ReloadableXMLFile.cpp
xmltooling/util/ReloadableXMLFile.h
xmltooling/xmltooling.vcproj

index 6096ce1..15e4378 100644 (file)
@@ -100,8 +100,7 @@ soapinclude_HEADERS = \
        soap/OpenSSLSOAPTransport.h
 
 utilinclude_HEADERS = \
-    util/CurlNetAccessor.hpp \
-    util/CurlURLInputStream.hpp \
+    util/CurlURLInputStream.h \
        util/DateTime.h \
        util/NDC.h \
        util/ParserPool.h \
@@ -151,7 +150,6 @@ xmlsec_sources = \
        signature/impl/SignatureValidator.cpp \
        signature/impl/XMLSecSignatureImpl.cpp \
        soap/impl/CURLSOAPTransport.cpp \
-       util/CurlNetAccessor.cpp \
        util/CurlURLInputStream.cpp \
        util/ReplayCache.cpp \
        util/StorageService.cpp
diff --git a/xmltooling/util/CurlNetAccessor.cpp b/xmltooling/util/CurlNetAccessor.cpp
deleted file mode 100644 (file)
index 1774259..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * $Id$
- */
-
-#include "internal.h"
-
-#include <xercesc/util/XMLUniDefs.hpp>
-#include <xercesc/util/XMLUni.hpp>
-#include <xercesc/util/XMLString.hpp>
-#include <xercesc/util/XMLExceptMsgs.hpp>
-#include <xmltooling/util/CurlURLInputStream.hpp>
-#include <xmltooling/util/CurlNetAccessor.hpp>
-
-using namespace xmltooling;
-using namespace xercesc;
-
-const XMLCh xmltooling::CurlNetAccessor::fgMyName[] =
-{
-    chLatin_C, chLatin_u, chLatin_r, chLatin_l, chLatin_N, chLatin_e,
-    chLatin_t, chLatin_A, chLatin_c, chLatin_c, chLatin_e, chLatin_s,
-    chLatin_s, chLatin_o, chLatin_r, chNull
-};
-
-
-CurlNetAccessor::CurlNetAccessor()
-{
-}
-
-
-CurlNetAccessor::~CurlNetAccessor()
-{
-}
-
-BinInputStream*
-CurlNetAccessor::makeNew(const XMLURL&  urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
-{
-       // Just create a CurlURLInputStream
-       // We defer any checking of the url type for curl in CurlURLInputStream
-       CurlURLInputStream* retStrm =
-               new (urlSource.getMemoryManager()) CurlURLInputStream(urlSource, httpInfo);
-       return retStrm;            
-}
diff --git a/xmltooling/util/CurlNetAccessor.hpp b/xmltooling/util/CurlNetAccessor.hpp
deleted file mode 100644 (file)
index 72ab0dd..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * $Id$
- */
-
-#if !defined(XERCESC_INCLUDE_GUARD_CURLNETACCESSOR_HPP) && !defined(XMLTOOLING_LITE)
-#define XERCESC_INCLUDE_GUARD_CURLNETACCESSOR_HPP
-
-#include <xmltooling/base.h>
-
-#include <xercesc/util/XercesDefs.hpp>
-#include <xercesc/util/XMLURL.hpp>
-#include <xercesc/util/BinInputStream.hpp>
-#include <xercesc/util/XMLNetAccessor.hpp>
-
-namespace xmltooling {
-
-//
-// This class is the wrapper for the socket based code which
-// provides the ability to fetch a resource specified using
-// a HTTP or FTP URL.
-//
-
-class XMLTOOL_API CurlNetAccessor : public xercesc::XMLNetAccessor
-{
-public :
-    CurlNetAccessor();
-    ~CurlNetAccessor();
-    
-    virtual xercesc::BinInputStream* makeNew(const xercesc::XMLURL&  urlSource, const xercesc::XMLNetHTTPInfo* httpInfo=0);
-    virtual const XMLCh* getId() const;
-
-private :
-    static const XMLCh fgMyName[];
-
-    CurlNetAccessor(const CurlNetAccessor&);
-    CurlNetAccessor& operator=(const CurlNetAccessor&);
-
-}; // CurlNetAccessor
-
-
-inline const XMLCh* CurlNetAccessor::getId() const
-{
-    return fgMyName;
-}
-
-
-};
-
-#endif // CURLNETACCESSOR_HPP
-
-
index fc716c1..59ad097 100644 (file)
  * limitations under the License.
  */
 
-/*
- * $Id$
+/**
+ * xmltooling/util/CurlURLInputStream.cpp
+ *
+ * Asynchronous use of curl to fetch data from a URL.
  */
 
 #include "internal.h"
 
-#include <curl/curl.h>
+#include <xmltooling/util/CurlURLInputStream.h>
+#include <xmltooling/util/XMLHelper.h>
 
 #include <xercesc/util/XercesDefs.hpp>
 #include <xercesc/util/XMLNetAccessor.hpp>
 #include <xercesc/util/TranscodingException.hpp>
 #include <xercesc/util/PlatformUtils.hpp>
 
-#include <xmltooling/util/CurlURLInputStream.hpp>
-
 using namespace xmltooling;
 using namespace xercesc;
 
-CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
-      : fMemoryManager(urlSource.getMemoryManager())
-      , fURLSource(urlSource)
-      , fURL(0)
-      , fInputStream(NULL)
-      , m_log(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.NetAccessor"))
+namespace {
+    static const XMLCh  _CURL[] =           UNICODE_LITERAL_4(C,U,R,L);
+    static const XMLCh _option[] =          UNICODE_LITERAL_6(o,p,t,i,o,n);
+    static const XMLCh _provider[] =        UNICODE_LITERAL_8(p,r,o,v,i,d,e,r);
+    static const XMLCh TransportOption[] =  UNICODE_LITERAL_15(T,r,a,n,s,p,o,r,t,O,p,t,i,o,n);
+    static const XMLCh uri[] =              UNICODE_LITERAL_3(u,r,i);
+    static const XMLCh url[] =              UNICODE_LITERAL_3(u,r,l);
+    static const XMLCh verifyHost[] =       UNICODE_LITERAL_10(v,e,r,i,f,y,H,o,s,t);
+}
+
+CurlURLInputStream::CurlURLInputStream(const char* url)
+    : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
+    , fURL(url)
+    , fMulti(0)
+    , fEasy(0)
+    , fTotalBytesRead(0)
+    , fWritePtr(0)
+    , fBytesRead(0)
+    , fBytesToRead(0)
+    , fDataAvailable(false)
+    , fBufferHeadPtr(fBuffer)
+    , fBufferTailPtr(fBuffer)
+    , fContentType(0)
 {
-       // Get the text of the URL we're going to use
-       fURL.reset(XMLString::transcode(fURLSource.getURLText(), fMemoryManager), fMemoryManager);
+    init();
 }
 
+CurlURLInputStream::CurlURLInputStream(const XMLCh* url)
+    : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
+    , fMulti(0)
+    , fEasy(0)
+    , fTotalBytesRead(0)
+    , fWritePtr(0)
+    , fBytesRead(0)
+    , fBytesToRead(0)
+    , fDataAvailable(false)
+    , fBufferHeadPtr(fBuffer)
+    , fBufferTailPtr(fBuffer)
+    , fContentType(0)
+{
+    auto_ptr_char temp(url);
+    fURL = temp.get();
+    init();
+}
+
+CurlURLInputStream::CurlURLInputStream(const DOMElement* e)
+    : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
+    , fMulti(0)
+    , fEasy(0)
+    , fTotalBytesRead(0)
+    , fWritePtr(0)
+    , fBytesRead(0)
+    , fBytesToRead(0)
+    , fDataAvailable(false)
+    , fBufferHeadPtr(fBuffer)
+    , fBufferTailPtr(fBuffer)
+    , fContentType(0)
+{
+    const XMLCh* attr = e->getAttributeNS(NULL, url);
+    if (!attr || !*attr) {
+        attr = e->getAttributeNS(NULL, uri);
+        if (!attr || !*attr)
+            throw IOException("No URL supplied via DOM to CurlURLInputStream constructor.");
+    }
+
+    auto_ptr_char temp(attr);
+    fURL = temp.get();
+    init(e);
+}
 
 CurlURLInputStream::~CurlURLInputStream()
 {
-    delete fInputStream;
+    if (fEasy) {
+        // Remove the easy handle from the multi stack
+        curl_multi_remove_handle(fMulti, fEasy);
+
+        // Cleanup the easy handle
+        curl_easy_cleanup(fEasy);
+    }
+
+    if (fMulti) {
+        // Cleanup the multi handle
+        curl_multi_cleanup(fMulti);
+    }
+
+    XMLString::release(&fContentType);
 }
 
+void CurlURLInputStream::init(const DOMElement* e)
+{
+    // Allocate the curl multi handle
+    fMulti = curl_multi_init();
+
+    // Allocate the curl easy handle
+    fEasy = curl_easy_init();
+
+    if (!fMulti || !fEasy)
+        throw IOException("Failed to allocate libcurl handles.");
+
+    curl_easy_setopt(fEasy, CURLOPT_URL, fURL.c_str());
+
+    // Set up a way to recieve the data
+    curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this);                       // Pass this pointer to write function
+    curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback);    // Our static write function
+
+    // Do redirects
+    curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, 1);
+    curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, 6);
+
+    // Default settings.
+    curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT,15);
+    curl_easy_setopt(fEasy, CURLOPT_TIMEOUT,30);
+    curl_easy_setopt(fEasy, CURLOPT_HTTPAUTH,0);
+    curl_easy_setopt(fEasy, CURLOPT_USERPWD,NULL);
+    curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 2);
+    curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0);
+    curl_easy_setopt(fEasy, CURLOPT_SSL_CIPHER_LIST, "ALL:!aNULL:!LOW:!EXPORT:!SSLv2");
+    curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1);
+    curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1);
+    curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1);
+
+    fError[0] = 0;
+    curl_easy_setopt(fEasy, CURLOPT_ERRORBUFFER, fError);
+
+    if (e) {
+        const XMLCh* flag = e->getAttributeNS(NULL, verifyHost);
+        if (flag && (*flag == chLatin_f || *flag == chDigit_0))
+            curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 2);
+
+        // Process TransportOption elements.
+        bool success;
+        DOMElement* child = XMLHelper::getLastChildElement(e, TransportOption);
+        while (child) {
+            if (child->hasChildNodes() && XMLString::equals(child->getAttributeNS(NULL,_provider), _CURL)) {
+                auto_ptr_char option(child->getAttributeNS(NULL,_option));
+                auto_ptr_char value(child->getFirstChild()->getNodeValue());
+                if (option.get() && *option.get() && value.get() && *value.get()) {
+                    // For libcurl, the option is an enum and the value type depends on the option.
+                    CURLoption opt = static_cast<CURLoption>(strtol(option.get(), NULL, 10));
+                    if (opt < CURLOPTTYPE_OBJECTPOINT)
+                        success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), NULL, 10)) == CURLE_OK);
+#ifdef CURLOPTTYPE_OFF_T
+                    else if (opt < CURLOPTTYPE_OFF_T)
+                        success = (curl_easy_setopt(fEasy, opt, value.get()) == CURLE_OK);
+                    else if (sizeof(curl_off_t) == sizeof(long))
+                        success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), NULL, 10)) == CURLE_OK);
+                    else
+                        success = false;
+#else
+                    else
+                        success = (curl_easy_setopt(fEasy, opt, value.get()) == CURLE_OK);
+#endif
+                    if (!success)
+                        fLog.error("failed to set transport option (%s)", option.get());
+                }
+            }
+            child = XMLHelper::getPreviousSiblingElement(child, TransportOption);
+        }
+    }
+
+    // Add easy handle to the multi stack
+    curl_multi_add_handle(fMulti, fEasy);
+
+    fLog.debug("libcurl trying to fetch %s", fURL.c_str());
+
+    // Start reading, to get the content type
+    while(fBufferHeadPtr == fBuffer) {
+        int runningHandles = 0;
+        try {
+            readMore(&runningHandles);
+        }
+        catch (XMLException& ex) {
+            curl_multi_remove_handle(fMulti, fEasy);
+            curl_easy_cleanup(fEasy);
+            fEasy = NULL;
+            curl_multi_cleanup(fMulti);
+            fMulti = NULL;
+            auto_ptr_char msg(ex.getMessage());
+            throw IOException(msg.get());
+        }
+        if(runningHandles == 0) break;
+    }
 
-size_t CurlURLInputStream::staticWriteCallback(void* ptr, size_t size, size_t nmemb, void* stream)
+    // Find the content type
+    char* contentType8 = NULL;
+    curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8);
+    if(contentType8)
+        fContentType = XMLString::transcode(contentType8);
+}
+
+
+size_t CurlURLInputStream::staticWriteCallback(char* buffer, size_t size, size_t nitems, void* outstream)
 {
-    size_t len = size*nmemb;
-    reinterpret_cast<std::stringstream*>(stream)->write(reinterpret_cast<const char*>(ptr),len);
-    return len;
+    return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
 }
 
+size_t CurlURLInputStream::writeCallback(char* buffer, size_t size, size_t nitems)
+{
+    size_t cnt = size * nitems;
+    size_t totalConsumed = 0;
 
-xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
+    // Consume as many bytes as possible immediately into the buffer
+    size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
+    memcpy(fWritePtr, buffer, consume);
+    fWritePtr       += consume;
+    fBytesRead      += consume;
+    fTotalBytesRead += consume;
+    fBytesToRead    -= consume;
+
+    //fLog.debug("write callback consuming %d bytes", consume);
+
+    // If bytes remain, rebuffer as many as possible into our holding buffer
+    buffer          += consume;
+    totalConsumed   += consume;
+    cnt             -= consume;
+    if (cnt > 0)
+    {
+        size_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);
+        consume = (cnt > bufAvail) ? bufAvail : cnt;
+        memcpy(fBufferHeadPtr, buffer, consume);
+        fBufferHeadPtr  += consume;
+        buffer          += consume;
+        totalConsumed   += consume;
+        //fLog.debug("write callback rebuffering %d bytes", consume);
+    }
+
+    // Return the total amount we've consumed. If we don't consume all the bytes
+    // then an error will be generated. Since our buffer size is equal to the
+    // maximum size that curl will write, this should never happen unless there
+    // is a logic error somewhere here.
+    return totalConsumed;
+}
+
+bool CurlURLInputStream::readMore(int* runningHandles)
 {
-    if (!fInputStream) {
-        // Allocate the curl easy handle.
-        CURL* fEasy = curl_easy_init();
-        if (!fEasy)
-            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, "unable to allocate libcurl handle", fMemoryManager);
-
-        m_log.debug("libcurl trying to fetch %s", fURL.get());
-
-        // Set URL option
-        curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get());
-        curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, &fUnderlyingStream);
-        curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback);
-        curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 30);
-        curl_easy_setopt(fEasy, CURLOPT_TIMEOUT, 60);
-        curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0);
-        curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0);
-        curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1);
-        curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1);
-        curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1);
-
-        char curl_errorbuf[CURL_ERROR_SIZE];
-        curl_errorbuf[0]=0;
-        curl_easy_setopt(fEasy,CURLOPT_ERRORBUFFER,curl_errorbuf);
-
-        // Fetch the data.
-        if (curl_easy_perform(fEasy) != CURLE_OK) {
-            curl_easy_cleanup(fEasy);
-            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, curl_errorbuf, fMemoryManager);
-        }
+    // Ask the curl to do some work
+    CURLMcode curlResult = curl_multi_perform(fMulti, runningHandles);
 
-        curl_easy_cleanup(fEasy);
+    // Process messages from curl
+    int msgsInQueue = 0;
+    for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )
+    {
+        //fLog.debug("msg %d, %d from curl", msg->msg, msg->data.result);
+
+        if (msg->msg != CURLMSG_DONE)
+            return true;
 
-        /*
         switch (msg->data.result)
         {
         case CURLE_OK:
@@ -106,31 +293,96 @@ xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t
             break;
 
         case CURLE_UNSUPPORTED_PROTOCOL:
-            ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);
+            ThrowXML(MalformedURLException, XMLExcepts::URL_UnsupportedProto);
             break;
 
         case CURLE_COULDNT_RESOLVE_HOST:
         case CURLE_COULDNT_RESOLVE_PROXY:
-            ThrowXMLwithMemMgr1(NetAccessorException,  XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager);
+            ThrowXML1(NetAccessorException,  XMLExcepts::NetAcc_TargetResolution, fURL.c_str());
             break;
 
         case CURLE_COULDNT_CONNECT:
-            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager);
+            ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str());
+            break;
 
         case CURLE_RECV_ERROR:
-            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager);
+            ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURL.c_str());
             break;
 
         default:
-            m_log.error("curl NetAccessor encountered error from libcurl (%d)", msg->data.result);
-            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager);
+            fLog.error("error while fetching %s: (%d) %s", fURL.c_str(), msg->data.result, fError);
+            ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURL.c_str());
             break;
         }
-        */
+    }
+
+    // If nothing is running any longer, bail out
+    if(*runningHandles == 0)
+        return false;
+
+    // If there is no further data to read, and we haven't
+    // read any yet on this invocation, call select to wait for data
+    if (curlResult != CURLM_CALL_MULTI_PERFORM && fBytesRead == 0)
+    {
+        fd_set readSet;
+        fd_set writeSet;
+        fd_set exceptSet;
+        int fdcnt=0;
+
+        FD_ZERO(&readSet);
+        FD_ZERO(&writeSet);
+        FD_ZERO(&exceptSet);
+
+        // Ask curl for the file descriptors to wait on
+        curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
+
+        // Wait on the file descriptors
+        timeval tv;
+        tv.tv_sec  = 2;
+        tv.tv_usec = 0;
+        select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
+    }
+
+    return curlResult == CURLM_CALL_MULTI_PERFORM;
+}
+
+xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
+{
+    fBytesRead = 0;
+    fBytesToRead = maxToRead;
+    fWritePtr = toFill;
+
+    for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
+    {
+        // First, any buffered data we have available
+        size_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
+        bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
+        if (bufCnt > 0)
+        {
+            memcpy(fWritePtr, fBufferTailPtr, bufCnt);
+            fWritePtr       += bufCnt;
+            fBytesRead      += bufCnt;
+            fTotalBytesRead += bufCnt;
+            fBytesToRead    -= bufCnt;
+
+            fBufferTailPtr  += bufCnt;
+            if (fBufferTailPtr == fBufferHeadPtr)
+                fBufferHeadPtr = fBufferTailPtr = fBuffer;
+
+            //fLog.debug("consuming %d buffered bytes", bufCnt);
+
+            tryAgain = true;
+            continue;
+        }
 
-        fInputStream = new (fMemoryManager) StreamInputSource::StreamBinInputStream(fUnderlyingStream);
+        // Ask the curl to do some work
+        int runningHandles = 0;
+        tryAgain = readMore(&runningHandles);
+
+        // If nothing is running any longer, bail out
+        if (runningHandles == 0)
+            break;
     }
 
-    // Defer to the stream wrapper.
-    return fInputStream->readBytes(toFill, maxToRead);
+    return fBytesRead;
 }
diff --git a/xmltooling/util/CurlURLInputStream.h b/xmltooling/util/CurlURLInputStream.h
new file mode 100644 (file)
index 0000000..e397331
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file xmltooling/util/CurlURLInputStream.h
+ *
+ * Asynchronous use of curl to fetch data from a URL.
+ */
+
+#if !defined(__xmltooling_curlinstr_h__) && !defined(XMLTOOLING_LITE)
+#define __xmltooling_curlinstr_h__
+
+#include <xmltooling/logging.h>
+
+#include <curl/curl.h>
+#include <xercesc/util/BinInputStream.hpp>
+
+namespace xmltooling {
+
+    /**
+     * Adapted from Xerces-C as a more advanced input stream implementation
+     * for subsequent use in parsing remote documents.
+     */
+    class XMLTOOL_API CurlURLInputStream : public xercesc::BinInputStream
+    {
+    public :
+        /**
+         * Constructor.
+         *
+         * @param url   the URL of the resource to fetch
+         */
+        CurlURLInputStream(const char* url);
+
+        /**
+         * Constructor.
+         *
+         * @param url   the URL of the resource to fetch
+         */
+        CurlURLInputStream(const XMLCh* url);
+
+        /**
+         * Constructor taking a DOM element supporting the following content:
+         * 
+         * <dl>
+         *  <dt>uri | url</dt>
+         *  <dd>identifies the remote resource</dd>
+         *  <dt>verifyHost</dt>
+         *  <dd>true iff name of host should be matched against TLS/SSL certificate</dd>
+         *  <dt>TransportOption elements, like so:</dt>
+         *  <dd>&lt;TransportOption provider="CURL" option="150"&gt;0&lt;/TransportOption&gt;</dd>
+         * </dl>
+         * 
+         * @param e     DOM to supply configuration
+         */
+        CurlURLInputStream(const xercesc::DOMElement* e);
+
+        ~CurlURLInputStream();
+
+#ifdef XMLTOOLING_XERCESC_64BITSAFE
+        XMLFilePos
+#else
+        unsigned int
+#endif
+        curPos() const {
+            return fTotalBytesRead;
+        }
+
+#ifdef XMLTOOLING_XERCESC_INPUTSTREAM_HAS_CONTENTTYPE
+        const XMLCh* getContentType() const {
+            return fContentType;
+        }
+#endif
+
+        xsecsize_t readBytes(XMLByte* const toFill, const xsecsize_t maxToRead);
+
+    private :
+        CurlURLInputStream(const CurlURLInputStream&);
+        CurlURLInputStream& operator=(const CurlURLInputStream&);
+
+        // libcurl callbacks for data read/write
+        static size_t staticWriteCallback(char *buffer, size_t size, size_t nitems, void *outstream);
+        size_t writeCallback(char *buffer, size_t size, size_t nitems);
+
+        void init(const xercesc::DOMElement* e=NULL);
+        bool readMore(int *runningHandles);
+
+        logging::Category&  fLog;\r
+        std::string         fURL;\r
+
+        CURLM*              fMulti;
+        CURL*               fEasy;
+
+        unsigned long       fTotalBytesRead;
+        XMLByte*            fWritePtr;
+        xsecsize_t          fBytesRead;
+        xsecsize_t          fBytesToRead;
+        bool                fDataAvailable;
+
+        // Overflow buffer for when curl writes more data to us
+        // than we've asked for.
+        XMLByte             fBuffer[CURL_MAX_WRITE_SIZE];
+        XMLByte*            fBufferHeadPtr;
+        XMLByte*            fBufferTailPtr;
+
+        XMLCh*              fContentType;
+
+        char                fError[CURL_ERROR_SIZE];
+    };
+};
+
+#endif // __xmltooling_curlinstr_h__
diff --git a/xmltooling/util/CurlURLInputStream.hpp b/xmltooling/util/CurlURLInputStream.hpp
deleted file mode 100644 (file)
index 9b708c1..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * $Id$
- */
-
-#if !defined(XERCESC_INCLUDE_GUARD_CURLURLINPUTSTREAM_HPP) && !defined(XMLTOOLING_LITE)
-#define XERCESC_INCLUDE_GUARD_CURLURLINPUTSTREAM_HPP
-
-#include <xmltooling/logging.h>
-#include <xmltooling/util/ParserPool.h>
-
-#include <sstream>
-
-#include <xercesc/util/XMLURL.hpp>
-#include <xercesc/util/XMLExceptMsgs.hpp>
-#include <xercesc/util/Janitor.hpp>
-#include <xercesc/util/BinInputStream.hpp>
-#include <xercesc/util/XMLNetAccessor.hpp>
-
-namespace xmltooling {
-
-//
-// This class implements the BinInputStream interface specified by the XML
-// parser.
-//
-
-class XMLTOOL_API CurlURLInputStream : public xercesc::BinInputStream
-{
-public :
-    CurlURLInputStream(const xercesc::XMLURL&  urlSource, const xercesc::XMLNetHTTPInfo* httpInfo=0);
-    ~CurlURLInputStream();
-
-#ifdef XMLTOOLING_XERCESC_64BITSAFE
-    XMLFilePos
-#else
-    unsigned int
-#endif
-        curPos() const;
-    xsecsize_t readBytes(XMLByte* const toFill, const xsecsize_t maxToRead);
-
-#ifdef XMLTOOLING_XERCESC_INPUTSTREAM_HAS_CONTENTTYPE
-    const XMLCh* getContentType() const {
-        return NULL;
-    }
-#endif
-
-private :
-    // -----------------------------------------------------------------------
-    //  Unimplemented constructors and operators
-    // -----------------------------------------------------------------------
-    CurlURLInputStream(const CurlURLInputStream&);
-    CurlURLInputStream& operator=(const CurlURLInputStream&);
-
-    static size_t staticWriteCallback(void* ptr, size_t size, size_t nmemb, void* stream);
-
-    std::stringstream           fUnderlyingStream;
-    xercesc::MemoryManager*     fMemoryManager;
-    xercesc::XMLURL                fURLSource;
-    xercesc::ArrayJanitor<char> fURL;
-    StreamInputSource::StreamBinInputStream* fInputStream;
-    logging::Category&  m_log;
-
-}; // CurlURLInputStream
-
-
-inline
-#ifdef XMLTOOLING_XERCESC_64BITSAFE
-    XMLFilePos
-#else
-    unsigned int
-#endif
-CurlURLInputStream::curPos() const
-{
-    return fInputStream ? fInputStream->curPos() : 0;
-}
-
-};
-
-#endif // CURLURLINPUTSTREAM_HPP
index 9397978..ee64178 100644 (file)
@@ -23,6 +23,7 @@
 #include "internal.h"
 #include "exceptions.h"
 #include "logging.h"
+#include "util/CurlURLInputStream.h"
 #include "util/NDC.h"
 #include "util/ParserPool.h"
 #include "util/XMLHelper.h"
@@ -523,3 +524,49 @@ xsecsize_t StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toF
     }
     return bytes_read;
 }
+
+#ifdef XMLTOOLING_LITE
+
+URLInputSource::URLInputSource(const XMLCh* url, const char* systemId) : InputSource(systemId), m_url(url)
+{
+}
+
+URLInputSource::URLInputSource(const DOMElement* e, const char* systemId) : InputSource(systemId)
+{
+    static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
+    static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l);
+
+    const XMLCh* attr = e->getAttributeNS(NULL, url);
+    if (!attr || !*attr) {
+        attr = e->getAttributeNS(NULL, uri);
+        if (!attr || !*attr)
+            throw IOException("No URL supplied via DOM to URLInputSource constructor.");
+    }
+
+    m_url.setURL(attr);
+}
+
+BinInputStream* URLInputSource::makeStream() const
+{
+    // Ask the URL to create us an appropriate input stream
+    return m_url.makeNewStream();
+}
+
+#else
+
+URLInputSource::URLInputSource(const XMLCh* url, const char* systemId)
+    : InputSource(systemId), m_url(url), m_root(NULL)
+{
+}
+
+URLInputSource::URLInputSource(const DOMElement* e, const char* systemId)
+    : InputSource(systemId), m_root(e)
+{
+}
+
+BinInputStream* URLInputSource::makeStream() const
+{
+    return m_root ? new CurlURLInputStream(m_root) : new CurlURLInputStream(m_url.get());
+}
+
+#endif
index ec9b5b5..fefdddc 100644 (file)
@@ -33,6 +33,7 @@
 #include <xercesc/sax/InputSource.hpp>
 #include <xercesc/util/BinInputStream.hpp>
 #include <xercesc/util/SecurityManager.hpp>
+#include <xercesc/util/XMLURL.hpp>
 
 #ifndef XMLTOOLING_NO_XMLSEC
 # include <xsec/framework/XSECDefs.hpp>
@@ -216,6 +217,51 @@ namespace xmltooling {
     private:
         std::istream& m_is;
     };
+
+    /**
+     * A URL-based parser source that supports a more advanced input stream.
+     */
+    class XMLTOOL_API URLInputSource : public xercesc::InputSource
+    {
+    MAKE_NONCOPYABLE(URLInputSource);
+    public:
+        /**
+         * Constructor.
+         * 
+         * @param url       source of input
+         * @param systemId  optional system identifier to attach to the source
+         */
+        URLInputSource(const XMLCh* url, const char* systemId=NULL);
+
+        /**
+         * Constructor taking a DOM element supporting the following content:
+         * 
+         * <dl>
+         *  <dt>uri | url</dt>
+         *  <dd>identifies the remote resource</dd>
+         *  <dt>verifyHost</dt>
+         *  <dd>true iff name of host should be matched against TLS/SSL certificate</dd>
+         *  <dt>TransportOption elements, like so:</dt>
+         *  <dd>&lt;TransportOption provider="CURL" option="150"&gt;0&lt;/TransportOption&gt;</dd>
+         * </dl>
+         * 
+         * @param e         DOM to supply configuration
+         * @param systemId  optional system identifier to attach to the source
+         */
+        URLInputSource(const xercesc::DOMElement* e, const char* systemId=NULL);
+
+        /// @cond off
+        virtual xercesc::BinInputStream* makeStream() const;
+        /// @endcond
+
+    private:
+#ifdef XMLTOOLING_LITE
+        xercesc::XMLURL m_url;
+#else
+        xmltooling::auto_ptr_char m_url;
+        const xercesc::DOMElement* m_root;
+#endif
+    };
 };
 
 #if defined (_MSC_VER)
index ad43f6f..0aad73b 100644 (file)
@@ -1,5 +1,5 @@
 /*\r
- *  Copyright 2001-2007 Internet2\r
+ *  Copyright 2001-2009 Internet2\r
  * \r
  * Licensed under the Apache License, Version 2.0 (the "License");\r
  * you may not use this file except in compliance with the License.\r
@@ -33,7 +33,6 @@
 \r
 #include <xercesc/framework/LocalFileInputSource.hpp>\r
 #include <xercesc/framework/Wrapper4InputSource.hpp>\r
-#include <xercesc/framework/URLInputSource.hpp>\r
 #include <xercesc/util/XMLUniDefs.hpp>\r
 \r
 using namespace xmltooling::logging;\r
@@ -157,8 +156,8 @@ pair<bool,DOMElement*> ReloadableXMLFile::load(bool backup)
                 m_log.debug("loading configuration from external resource...");\r
 \r
             DOMDocument* doc=NULL;\r
-            auto_ptr_XMLCh widenit(backup ? m_backing.c_str() : m_source.c_str());\r
             if (m_local || backup) {\r
+                auto_ptr_XMLCh widenit(backup ? m_backing.c_str() : m_source.c_str());\r
                 LocalFileInputSource src(widenit.get());\r
                 Wrapper4InputSource dsrc(&src,false);\r
                 if (m_validate)\r
@@ -167,7 +166,7 @@ pair<bool,DOMElement*> ReloadableXMLFile::load(bool backup)
                     doc=XMLToolingConfig::getConfig().getParser().parse(dsrc);\r
             }\r
             else {\r
-                URLInputSource src(widenit.get());\r
+                URLInputSource src(m_root);\r
                 Wrapper4InputSource dsrc(&src,false);\r
                 if (m_validate)\r
                     doc=XMLToolingConfig::getConfig().getValidatingParser().parse(dsrc);\r
index cb4c5c3..d8dda9e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2001-2007 Internet2
+ *  Copyright 2001-2009 Internet2
  * 
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -53,6 +53,10 @@ namespace xmltooling {
          *  <dd>use a validating parser</dd>
          *  <dt>reloadChanges</dt>
          *  <dd>enables monitoring of local file for changes</dd>
+         *  <dt>reloadInterval</dt>
+         *  <dd>enables periodic refresh of remote file</dd>
+         *  <dt>backingFilePath</dt>
+         *  <dd>location for backup of remote resource</dd>
          * </dl>
          * 
          * @param e     DOM to supply configuration
index 1cb272f..b6f8676 100644 (file)
                                Name="util"\r
                                >\r
                                <File\r
-                                       RelativePath=".\util\CurlNetAccessor.cpp"\r
-                                       >\r
-                               </File>\r
-                               <File\r
                                        RelativePath=".\util\CurlURLInputStream.cpp"\r
                                        >\r
                                </File>\r
                                Name="util"\r
                                >\r
                                <File\r
-                                       RelativePath=".\util\CurlNetAccessor.hpp"\r
-                                       >\r
-                               </File>\r
-                               <File\r
-                                       RelativePath=".\util\CurlURLInputStream.hpp"\r
+                                       RelativePath=".\util\CurlURLInputStream.h"\r
                                        >\r
                                </File>\r
                                <File\r