From 2430f9d49125f22590b4f5a35c9e22aea1bd6be2 Mon Sep 17 00:00:00 2001 From: cantor Date: Thu, 3 May 2007 20:54:45 +0000 Subject: [PATCH] Add internal copy of the Xerces net accessor for libcurl, to get SSL support. git-svn-id: https://svn.middleware.georgetown.edu/cpp-xmltooling/trunk@299 de75baf8-a10c-0410-a50a-987c0e22f00f --- xmltooling/Makefile.am | 4 + xmltooling/XMLToolingConfig.cpp | 12 +- xmltooling/util/CurlNetAccessor.cpp | 49 +++++++ xmltooling/util/CurlNetAccessor.h | 66 +++++++++ xmltooling/util/CurlURLInputStream.cpp | 246 +++++++++++++++++++++++++++++++++ xmltooling/util/CurlURLInputStream.h | 121 ++++++++++++++++ xmltooling/xmltooling.vcproj | 20 ++- 7 files changed, 512 insertions(+), 6 deletions(-) create mode 100644 xmltooling/util/CurlNetAccessor.cpp create mode 100644 xmltooling/util/CurlNetAccessor.h create mode 100644 xmltooling/util/CurlURLInputStream.cpp create mode 100644 xmltooling/util/CurlURLInputStream.h diff --git a/xmltooling/Makefile.am b/xmltooling/Makefile.am index 080f2cf..a930ab2 100644 --- a/xmltooling/Makefile.am +++ b/xmltooling/Makefile.am @@ -88,6 +88,8 @@ soapinclude_HEADERS = \ soap/OpenSSLSOAPTransport.h utilinclude_HEADERS = \ + util/CurlNetAccessor.h \ + util/CurlURLInputStream.h \ util/DateTime.h \ util/NDC.h \ util/ParserPool.h \ @@ -162,6 +164,8 @@ libxmltooling_la_SOURCES = \ soap/impl/SOAPImpl.cpp \ soap/impl/SOAPSchemaValidators.cpp \ soap/impl/CURLSOAPTransport.cpp \ + util/CurlNetAccessor.cpp \ + util/CurlURLInputStream.cpp \ util/DateTime.cpp \ util/NDC.cpp \ util/ParserPool.cpp \ diff --git a/xmltooling/XMLToolingConfig.cpp b/xmltooling/XMLToolingConfig.cpp index 6066437..e192b75 100644 --- a/xmltooling/XMLToolingConfig.cpp +++ b/xmltooling/XMLToolingConfig.cpp @@ -32,6 +32,7 @@ #include "signature/Signature.h" #include "soap/SOAP.h" #include "soap/SOAPTransport.h" +#include "util/CurlNetAccessor.h" #include "util/NDC.h" #include "util/ReplayCache.h" #include "util/StorageService.h" @@ -200,7 +201,10 @@ bool XMLToolingInternalConfig::init() } log.debug("libcurl %s initialization complete", LIBCURL_VERSION); - xercesc::XMLPlatformUtils::Initialize(); + XMLPlatformUtils::Initialize(); + auto_ptr curler(new CurlNetAccessor()); + delete XMLPlatformUtils::fgNetAccessor; + XMLPlatformUtils::fgNetAccessor = curler.release(); log.debug("Xerces initialization complete"); #ifndef XMLTOOLING_NO_XMLSEC @@ -211,7 +215,7 @@ bool XMLToolingInternalConfig::init() m_parserPool=new ParserPool(); m_validatingPool=new ParserPool(true,true); - m_lock=xercesc::XMLPlatformUtils::makeMutex(); + m_lock=XMLPlatformUtils::makeMutex(); // Load catalogs from path. if (!catalog_path.empty()) { @@ -345,9 +349,9 @@ void XMLToolingInternalConfig::term() XSECPlatformUtils::Terminate(); #endif - xercesc::XMLPlatformUtils::closeMutex(m_lock); + XMLPlatformUtils::closeMutex(m_lock); m_lock=NULL; - xercesc::XMLPlatformUtils::Terminate(); + XMLPlatformUtils::Terminate(); curl_global_cleanup(); diff --git a/xmltooling/util/CurlNetAccessor.cpp b/xmltooling/util/CurlNetAccessor.cpp new file mode 100644 index 0000000..505e9d0 --- /dev/null +++ b/xmltooling/util/CurlNetAccessor.cpp @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * $Id: CurlNetAccessor.cpp 471747 2006-11-06 14:31:56Z amassari $ + */ + +#include "internal.h" +#include "util/CurlURLInputStream.h" +#include "util/CurlNetAccessor.h" + +#include +#include +#include +#include + +using namespace xmltooling; + +const XMLCh CurlNetAccessor::fgMyName[] = +{ + chLatin_C, chLatin_u, chLatin_r, chLatin_l, chLatin_N, chLatin_e, + chLatin_t, chLatin_A, chLatin_c, chLatin_c, chLatin_e, chLatin_s, + chLatin_s, chLatin_o, chLatin_r, chNull +}; + + +BinInputStream* +CurlNetAccessor::makeNew(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/) +{ + // Just create a CurlURLInputStream + // We defer any checking of the url type for curl in CurlURLInputStream + CurlURLInputStream* retStrm = + new (urlSource.getMemoryManager()) CurlURLInputStream(urlSource, httpInfo); + return retStrm; +} diff --git a/xmltooling/util/CurlNetAccessor.h b/xmltooling/util/CurlNetAccessor.h new file mode 100644 index 0000000..523da70 --- /dev/null +++ b/xmltooling/util/CurlNetAccessor.h @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * $Id: CurlNetAccessor.hpp 527149 2007-04-10 14:56:39Z amassari $ + */ + +#if !defined(XERCESC_INCLUDE_GUARD_CURLNETACCESSOR_HPP) +#define XERCESC_INCLUDE_GUARD_CURLNETACCESSOR_HPP + + +#include +#include +#include +#include + +namespace xmltooling { + +// +// This class is the wrapper for the socket based code which +// provides the ability to fetch a resource specified using +// a HTTP or FTP URL. +// + +class XMLTOOL_DLLLOCAL CurlNetAccessor : public XMLNetAccessor +{ +public : + CurlNetAccessor() {} + ~CurlNetAccessor() {} + + virtual BinInputStream* makeNew(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo=0); + virtual const XMLCh* getId() const; + +private : + static const XMLCh fgMyName[]; + + CurlNetAccessor(const CurlNetAccessor&); + CurlNetAccessor& operator=(const CurlNetAccessor&); + +}; // CurlNetAccessor + + +inline const XMLCh* CurlNetAccessor::getId() const +{ + return fgMyName; +} + +}; + +#endif // CURLNETACCESSOR_HPP + + diff --git a/xmltooling/util/CurlURLInputStream.cpp b/xmltooling/util/CurlURLInputStream.cpp new file mode 100644 index 0000000..e03f5c4 --- /dev/null +++ b/xmltooling/util/CurlURLInputStream.cpp @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * $Id: CurlURLInputStream.cpp 471747 2006-11-06 14:31:56Z amassari $ + */ + +#include "internal.h" +#include "util/CurlURLInputStream.h" + +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +# include +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace xmltooling; + +CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/) + : fMulti(0) + , fEasy(0) + , fMemoryManager(urlSource.getMemoryManager()) + , fURLSource(urlSource) + , fURL(0) + , fTotalBytesRead(0) + , fWritePtr(0) + , fBytesRead(0) + , fBytesToRead(0) + , fDataAvailable(false) + , fBufferHeadPtr(fBuffer) + , fBufferTailPtr(fBuffer) +{ + // Allocate the curl multi handle + fMulti = curl_multi_init(); + + // Allocate the curl easy handle + fEasy = curl_easy_init(); + + // Get the text of the URL we're going to use + fURL.reset(XMLString::transcode(fURLSource.getURLText(), fMemoryManager), fMemoryManager); + + //printf("Curl trying to fetch %s\n", fURL.get()); + + // Set URL option + curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get()); + curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function + curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function + + // Add easy handle to the multi stack + curl_multi_add_handle(fMulti, fEasy); +} + + +CurlURLInputStream::~CurlURLInputStream() +{ + // Remove the easy handle from the multi stack + curl_multi_remove_handle(fMulti, fEasy); + + // Cleanup the easy handle + curl_easy_cleanup(fEasy); + + // Cleanup the multi handle + curl_multi_cleanup(fMulti); +} + + +size_t +CurlURLInputStream::staticWriteCallback(char *buffer, + size_t size, + size_t nitems, + void *outstream) +{ + return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems); +} + + + +size_t +CurlURLInputStream::writeCallback(char *buffer, + size_t size, + size_t nitems) +{ + XMLSize_t cnt = size * nitems; + XMLSize_t totalConsumed = 0; + + // Consume as many bytes as possible immediately into the buffer + XMLSize_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt; + memcpy(fWritePtr, buffer, consume); + fWritePtr += consume; + fBytesRead += consume; + fTotalBytesRead += consume; + fBytesToRead -= consume; + + //printf("write callback consuming %d bytes\n", consume); + + // If bytes remain, rebuffer as many as possible into our holding buffer + buffer += consume; + totalConsumed += consume; + cnt -= consume; + if (cnt > 0) + { + XMLSize_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer); + consume = (cnt > bufAvail) ? bufAvail : cnt; + memcpy(fBufferHeadPtr, buffer, consume); + fBufferHeadPtr += consume; + buffer += consume; + totalConsumed += consume; + //printf("write callback rebuffering %d bytes\n", consume); + } + + // Return the total amount we've consumed. If we don't consume all the bytes + // then an error will be generated. Since our buffer size is equal to the + // maximum size that curl will write, this should never happen unless there + // is a logic error somewhere here. + return totalConsumed; +} + + + +unsigned int +CurlURLInputStream::readBytes(XMLByte* const toFill + , const unsigned int maxToRead) +{ + fBytesRead = 0; + fBytesToRead = maxToRead; + fWritePtr = toFill; + + for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); ) + { + // First, any buffered data we have available + XMLSize_t bufCnt = fBufferHeadPtr - fBufferTailPtr; + bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt; + if (bufCnt > 0) + { + memcpy(fWritePtr, fBufferTailPtr, bufCnt); + fWritePtr += bufCnt; + fBytesRead += bufCnt; + fTotalBytesRead += bufCnt; + fBytesToRead -= bufCnt; + + fBufferTailPtr += bufCnt; + if (fBufferTailPtr == fBufferHeadPtr) + fBufferHeadPtr = fBufferTailPtr = fBuffer; + + //printf("consuming %d buffered bytes\n", bufCnt); + + tryAgain = true; + continue; + } + + // Ask the curl to do some work + int runningHandles = 0; + CURLMcode curlResult = curl_multi_perform(fMulti, &runningHandles); + tryAgain = (curlResult == CURLM_CALL_MULTI_PERFORM); + + // Process messages from curl + int msgsInQueue = 0; + for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; ) + { + //printf("msg %d, %d from curl\n", msg->msg, msg->data.result); + + if (msg->msg != CURLMSG_DONE) + continue; + + switch (msg->data.result) + { + case CURLE_OK: + // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below... + break; + + case CURLE_UNSUPPORTED_PROTOCOL: + ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager); + break; + + case CURLE_COULDNT_RESOLVE_HOST: + case CURLE_COULDNT_RESOLVE_PROXY: + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager); + break; + + case CURLE_COULDNT_CONNECT: + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager); + + case CURLE_RECV_ERROR: + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager); + break; + + default: + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager); + break; + } + } + + // If nothing is running any longer, bail out + if (runningHandles == 0) + break; + + // If there is no further data to read, and we haven't + // read any yet on this invocation, call select to wait for data + if (!tryAgain && fBytesRead == 0) + { + fd_set readSet[16]; + fd_set writeSet[16]; + fd_set exceptSet[16]; + int fdcnt = 16; + + // As curl for the file descriptors to wait on + (void) curl_multi_fdset(fMulti, readSet, writeSet, exceptSet, &fdcnt); + + // Wait on the file descriptors + timeval tv; + tv.tv_sec = 2; + tv.tv_usec = 0; + (void) select(fdcnt, readSet, writeSet, exceptSet, &tv); + } + } + + return fBytesRead; +} diff --git a/xmltooling/util/CurlURLInputStream.h b/xmltooling/util/CurlURLInputStream.h new file mode 100644 index 0000000..a1a6035 --- /dev/null +++ b/xmltooling/util/CurlURLInputStream.h @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * $Id: CurlURLInputStream.hpp 527149 2007-04-10 14:56:39Z amassari $ + */ + +#if !defined(XERCESC_INCLUDE_GUARD_CURLURLINPUTSTREAM_HPP) +#define XERCESC_INCLUDE_GUARD_CURLURLINPUTSTREAM_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace xmltooling { + +// +// This class implements the BinInputStream interface specified by the XML +// parser. +// + +class XMLTOOL_DLLLOCAL CurlURLInputStream : public BinInputStream +{ +public : + CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo=0); + ~CurlURLInputStream(); + + unsigned int curPos() const; + unsigned int readBytes + ( + XMLByte* const toFill + , const unsigned int maxToRead + ); + + +private : + // ----------------------------------------------------------------------- + // Unimplemented constructors and operators + // ----------------------------------------------------------------------- + CurlURLInputStream(const CurlURLInputStream&); + CurlURLInputStream& operator=(const CurlURLInputStream&); + + static size_t staticWriteCallback(char *buffer, + size_t size, + size_t nitems, + void *outstream); + size_t writeCallback( char *buffer, + size_t size, + size_t nitems); + + + // ----------------------------------------------------------------------- + // Private data members + // + // fSocket + // The socket representing the connection to the remote file. + // fBytesProcessed + // Its a rolling count of the number of bytes processed off this + // input stream. + // fBuffer + // Holds the http header, plus the first part of the actual + // data. Filled at the time the stream is opened, data goes + // out to user in response to readBytes(). + // fBufferPos, fBufferEnd + // Pointers into fBuffer, showing start and end+1 of content + // that readBytes must return. + // ----------------------------------------------------------------------- + + CURLM* fMulti; + CURL* fEasy; + + MemoryManager* fMemoryManager; + + XMLURL fURLSource; + ArrayJanitor fURL; + + unsigned long fTotalBytesRead; + XMLByte* fWritePtr; + unsigned long fBytesRead; + unsigned long fBytesToRead; + bool fDataAvailable; + + // Overflow buffer for when curl writes more data to us + // than we've asked for. + XMLByte fBuffer[CURL_MAX_WRITE_SIZE]; + XMLByte* fBufferHeadPtr; + XMLByte* fBufferTailPtr; + +}; // CurlURLInputStream + + +inline unsigned int +CurlURLInputStream::curPos() const +{ + return fTotalBytesRead; +} + +}; + +#endif // CURLURLINPUTSTREAM_HPP + diff --git a/xmltooling/xmltooling.vcproj b/xmltooling/xmltooling.vcproj index 837e651..bf2b136 100644 --- a/xmltooling/xmltooling.vcproj +++ b/xmltooling/xmltooling.vcproj @@ -62,7 +62,7 @@ /> + + + + @@ -545,6 +553,14 @@ Name="util" > + + + + -- 2.1.4