From 211d157271f527521d46875c1c220734f2fa1136 Mon Sep 17 00:00:00 2001 From: Scott Cantor Date: Mon, 23 Jan 2012 21:46:48 +0000 Subject: [PATCH] Language matching support in request API --- xmltooling/io/GenericRequest.h | 70 ++++++++++++++++++++- xmltooling/io/HTTPRequest.cpp | 135 ++++++++++++++++++++++++++++++++++++++++- xmltooling/io/HTTPRequest.h | 3 +- 3 files changed, 205 insertions(+), 3 deletions(-) diff --git a/xmltooling/io/GenericRequest.h b/xmltooling/io/GenericRequest.h index 82bbcc3..11cd82b 100644 --- a/xmltooling/io/GenericRequest.h +++ b/xmltooling/io/GenericRequest.h @@ -27,8 +27,9 @@ #ifndef __xmltooling_genreq_h__ #define __xmltooling_genreq_h__ -#include +#include +#include #include #include @@ -38,6 +39,11 @@ namespace xmltooling { +#if defined (_MSC_VER) + #pragma warning( push ) + #pragma warning( disable : 4251 ) +#endif + /** * Interface to generic protocol requests that transport XML messages. * @@ -157,7 +163,69 @@ namespace xmltooling { std::vector& #endif getClientCertificates() const=0; + + /** + * Returns a language range to use in selecting language-specific + * content for this request. + *

The syntax is that of the HTTP 1.1 Accept-Language header, even + * if the underlying request is not HTTP. + * + * @return an HTTP 1.1 syntax language range specifier + */ + virtual std::string getLanguageRange() const { + return ""; + } + + /** + * Initializes the language matching process; call this method to begin the + * matching process by calling the matchLang method. + *

The language matching process is not thread-safe and must be externally + * syncronized. + * + * @return true iff language matching is possible + */ + bool startLangMatching() const; + + /** + * Continues the language matching process; additional calls to matchLang can + * be done as long as this method returns true. + *

The language matching process is not thread-safe and must be externally + * syncronized. + * + * @return true iff more ranges are available to match against + */ + bool continueLangMatching() const; + + /** + * Matches a language tag against the currently active range. + *

The language matching process is not thread-safe and must be externally + * syncronized. + * + * @param tag a language tag (e.g., an xml:lang value) + * @return true iff the tag matches the active range + */ + bool matchLang(const XMLCh* tag) const; + + /** + * Establish default handling of language ranges. + * + * @param langFromClient honor client's language preferences if any + * @param defaultRange priority list of space-delimited language tags to use by default + */ + static void setLangDefaults(bool langFromClient, const XMLCh* defaultRange); + + private: + typedef std::multimap< float,std::vector > langrange_t; + mutable langrange_t m_langRange; + mutable langrange_t::const_reverse_iterator m_langRangeIter; + static langrange_t m_defaultRange; + static bool m_langFromClient; }; + +#if defined (_MSC_VER) + #pragma warning( pop ) +#endif + }; #endif /* __xmltooling_genreq_h__ */ diff --git a/xmltooling/io/HTTPRequest.cpp b/xmltooling/io/HTTPRequest.cpp index af790e9..a3c4162 100644 --- a/xmltooling/io/HTTPRequest.cpp +++ b/xmltooling/io/HTTPRequest.cpp @@ -26,16 +26,24 @@ #include "internal.h" #include "HTTPRequest.h" +#include "util/Threads.h" +#include #include #include +#include #include +#include using namespace xmltooling; +using namespace xercesc; using namespace boost; using namespace std; -GenericRequest::GenericRequest() +bool GenericRequest::m_langFromClient = true; +GenericRequest::langrange_t GenericRequest::m_defaultRange; + +GenericRequest::GenericRequest() : m_langRangeIter(m_langRange.crend()) { } @@ -43,6 +51,126 @@ GenericRequest::~GenericRequest() { } +void GenericRequest::setLangDefaults(bool langFromClient, const XMLCh* defaultRange) +{ + m_langFromClient = langFromClient; + m_defaultRange.clear(); + if (!defaultRange) + return; + float q = 0.0f; + XMLStringTokenizer tokens(defaultRange); + while (tokens.hasMoreTokens()) { + const XMLCh* t = tokens.nextToken(); + if (t && *t) { + vector tagArray; + static const XMLCh delims[] = {chDash, chNull}; + XMLStringTokenizer tags(t, delims); + while (tags.hasMoreTokens()) + tagArray.push_back(tags.nextToken()); + m_defaultRange.insert(langrange_t::value_type(q, tagArray)); + q -= 0.0001f; + } + } +} + +bool GenericRequest::startLangMatching() const +{ + // This is a no-op except on the first call, to populate the + // range information to use in matching. + if (m_langRange.empty()) { + if (m_langFromClient) { + string hdr(getLanguageRange()); + char_separator sep1(", "); // tags are split by commas or spaces + char_separator sep2("; "); // quality is separated by semicolon + tokenizer< char_separator > tokens(hdr, sep1); + for (tokenizer< char_separator >::iterator t = tokens.begin(); t != tokens.end(); ++t) { + string tag = trim_copy(*t); // handle any surrounding ws + tokenizer< char_separator > subtokens(tag, sep2); + tokenizer< char_separator >::iterator s = subtokens.begin(); + if (s != subtokens.end() && *s != "*") { + float q = 1.0f; + auto_ptr_XMLCh lang((s++)->c_str()); + + // Check for quality tag + if (s != subtokens.end() && starts_with(*s, "q=")) { + try { + q = lexical_cast(s->c_str() + 2); + } + catch (bad_lexical_cast&) { + q = 0.0f; + } + } + + // Split range into tokens. + vector tagArray; + static const XMLCh delims[] = {chDash, chNull}; + XMLStringTokenizer tags(lang.get(), delims); + const XMLCh* tag; + while (tags.hasMoreTokens()) { + tag = tags.nextToken(); + if (*tag != chAsterisk) + tagArray.push_back(tag); + } + + if (tagArray.empty()) + continue; + + // Adjust q using the server priority list. As long as the supplied q deltas are larger than + // factors like .0001, the client settings will always trump ours. + if (!m_defaultRange.empty()) { + float adj = (m_defaultRange.size() + 1) * 0.0001f; + for (langrange_t::const_iterator prio = m_defaultRange.begin(); prio != m_defaultRange.end(); ++prio) { + if (prio->second == tagArray) { + adj = prio->first; + break; + } + } + q -= adj; + } + m_langRange.insert(langrange_t::value_type(q, tagArray)); + } + } + } + else { + m_langRange = m_defaultRange; + } + } + + m_langRangeIter = m_langRange.crbegin(); + return (m_langRangeIter != m_langRange.crend()); +} + +bool GenericRequest::continueLangMatching() const +{ + return (++m_langRangeIter != m_langRange.crend()); +} + +bool GenericRequest::matchLang(const XMLCh* tag) const +{ + if (m_langRangeIter == m_langRange.crend()) + return false; + + // To match against a given range, the range has to be built up and then + // truncated segment by segment to look for a match against the tag. + // That allows more specific ranges like en-US to match the tag en. + // The "end" fence tells us how much of the original range to recompose + // into a hyphenated string, and we stop on a match, or when the fence + // moves back to the beginning of the array. + bool match = false; + vector::size_type end = m_langRangeIter->second.size(); + do { + // Skip single-character private extension separators. + while (end > 1 && m_langRangeIter->second[end-1].length() <= 1) + --end; + // Build a range from 0 to end - 1 of segments. + xstring compareTo(m_langRangeIter->second[0]); + for (vector::size_type ix = 1; ix <= end - 1; ++ix) + compareTo = compareTo + chDash + m_langRangeIter->second[ix]; + match = (compareTo.length() > 1 && XMLString::compareIStringASCII(compareTo.c_str(), tag) == 0); + } while (!match && --end > 0); + return match; +} + HTTPRequest::HTTPRequest() { } @@ -56,6 +184,11 @@ bool HTTPRequest::isSecure() const return strcmp(getScheme(),"https")==0; } +string HTTPRequest::getLanguageRange() const +{ + return getHeader("Accept-Language"); +} + namespace { void handle_cookie_fn(map& cookieMap, vector& nvpair, const string& s) { nvpair.clear(); diff --git a/xmltooling/io/HTTPRequest.h b/xmltooling/io/HTTPRequest.h index 666c862..7cca6c1 100644 --- a/xmltooling/io/HTTPRequest.h +++ b/xmltooling/io/HTTPRequest.h @@ -30,7 +30,7 @@ #include #include -#include +#include namespace xmltooling { @@ -55,6 +55,7 @@ namespace xmltooling { virtual ~HTTPRequest(); bool isSecure() const; + std::string getLanguageRange() const; /** * Returns the HTTP method of the request (GET, POST, etc.) -- 2.1.4