blob: 9166d92fb7bde8be6627f964789f709012540839 [file] [log] [blame]
#include <mbgl/storage/http_file_source.hpp>
#include <mbgl/storage/resource.hpp>
#include <mbgl/storage/response.hpp>
#include <mbgl/util/logging.hpp>
#include <mbgl/util/util.hpp>
#include <mbgl/util/optional.hpp>
#include <mbgl/util/run_loop.hpp>
#include <mbgl/util/string.hpp>
#include <mbgl/util/timer.hpp>
#include <mbgl/util/chrono.hpp>
#include <mbgl/util/http_header.hpp>
#include <curl/curl.h>
// Dynamically load all cURL functions. Debian-derived systems upgraded the OpenSSL version linked
// to in https://salsa.debian.org/debian/curl/commit/95c94957bb7e89e36e78b995fed468c42f64d18d
// They state:
// Rename libcurl3 to libcurl4, because libcurl exposes an SSL_CTX via
// CURLOPT_SSL_CTX_FUNCTION, and this object changes incompatibly between
// openssl 1.0 and openssl 1.1.
// Since we are not accessing the underlying OpenSSL context, we don't care whether we're linking
// against libcurl3 or libcurl4; both use the ABI version 4 which hasn't changed since 2006
// (see https://curl.haxx.se/libcurl/abi.html). In fact, cURL's ABI compatibility is very good as
// shown on https://abi-laboratory.pro/tracker/timeline/curl/
// Therefore, we're dynamically loading the cURL symbols we need to avoid linking against versioned
// symbols.
#include <dlfcn.h>
namespace curl {
#define CURL_FUNCTIONS \
X(global_init) \
X(getdate) \
X(easy_strerror) \
X(easy_init) \
X(easy_setopt) \
X(easy_cleanup) \
X(easy_getinfo) \
X(easy_reset) \
X(multi_init) \
X(multi_add_handle) \
X(multi_remove_handle) \
X(multi_cleanup) \
X(multi_info_read) \
X(multi_strerror) \
X(multi_socket_action) \
X(multi_setopt) \
X(share_init) \
X(share_cleanup) \
X(slist_append) \
X(slist_free_all)
#define X(name) static decltype(&curl_ ## name) name = nullptr;
CURL_FUNCTIONS
#undef X
static void* handle = nullptr;
static void* load(const char* name) {
void* symbol = dlsym(handle, name);
if (const char* error = dlerror()) {
fprintf(stderr, "Cannot load symbol '%s': %s\n", name, error);
dlclose(handle);
handle = nullptr;
abort();
}
return symbol;
}
__attribute__((constructor))
static void load() {
assert(!handle);
handle = dlopen("libcurl.so.4", RTLD_LAZY | RTLD_LOCAL);
if (!handle) {
fprintf(stderr, "Could not open shared library '%s'\n", "libcurl.so.4");
abort();
}
#define X(name) name = (decltype(&curl_ ## name))load("curl_" #name);
CURL_FUNCTIONS
#undef X
}
__attribute__((constructor))
static void unload() {
if (handle) {
dlclose(handle);
}
}
} // namespace curl
#include <queue>
#include <map>
#include <cassert>
#include <cstring>
#include <cstdio>
static void handleError(CURLMcode code) {
if (code != CURLM_OK) {
throw std::runtime_error(std::string("CURL multi error: ") + curl::multi_strerror(code));
}
}
static void handleError(CURLcode code) {
if (code != CURLE_OK) {
throw std::runtime_error(std::string("CURL easy error: ") + curl::easy_strerror(code));
}
}
namespace mbgl {
class HTTPFileSource::Impl {
public:
Impl();
~Impl();
static int handleSocket(CURL *handle, curl_socket_t s, int action, void *userp, void *socketp);
static int startTimeout(CURLM *multi, long timeout_ms, void *userp);
static void onTimeout(HTTPFileSource::Impl *context);
void perform(curl_socket_t s, util::RunLoop::Event event);
CURL *getHandle();
void returnHandle(CURL *handle);
void checkMultiInfo();
// Used as the CURL timer function to periodically check for socket updates.
util::Timer timeout;
// CURL multi handle that we use to request multiple URLs at the same time, without having to
// block and spawn threads.
CURLM *multi = nullptr;
// CURL share handles are used for sharing session state (e.g.)
CURLSH *share = nullptr;
// A queue that we use for storing resuable CURL easy handles to avoid creating and destroying
// them all the time.
std::queue<CURL *> handles;
};
class HTTPRequest : public AsyncRequest {
public:
HTTPRequest(HTTPFileSource::Impl*, Resource, FileSource::Callback);
~HTTPRequest() override;
void handleResult(CURLcode code);
private:
static size_t headerCallback(char *const buffer, const size_t size, const size_t nmemb, void *userp);
static size_t writeCallback(void *const contents, const size_t size, const size_t nmemb, void *userp);
HTTPFileSource::Impl* context = nullptr;
Resource resource;
FileSource::Callback callback;
// Will store the current response.
std::shared_ptr<std::string> data;
std::unique_ptr<Response> response;
optional<std::string> retryAfter;
optional<std::string> xRateLimitReset;
CURL *handle = nullptr;
curl_slist *headers = nullptr;
char error[CURL_ERROR_SIZE] = { 0 };
};
HTTPFileSource::Impl::Impl() {
if (curl::global_init(CURL_GLOBAL_ALL)) {
throw std::runtime_error("Could not init cURL");
}
share = curl::share_init();
multi = curl::multi_init();
handleError(curl::multi_setopt(multi, CURLMOPT_SOCKETFUNCTION, handleSocket));
handleError(curl::multi_setopt(multi, CURLMOPT_SOCKETDATA, this));
handleError(curl::multi_setopt(multi, CURLMOPT_TIMERFUNCTION, startTimeout));
handleError(curl::multi_setopt(multi, CURLMOPT_TIMERDATA, this));
}
HTTPFileSource::Impl::~Impl() {
while (!handles.empty()) {
curl::easy_cleanup(handles.front());
handles.pop();
}
curl::multi_cleanup(multi);
multi = nullptr;
curl::share_cleanup(share);
share = nullptr;
timeout.stop();
}
CURL *HTTPFileSource::Impl::getHandle() {
if (!handles.empty()) {
auto handle = handles.front();
handles.pop();
return handle;
} else {
return curl::easy_init();
}
}
void HTTPFileSource::Impl::returnHandle(CURL *handle) {
curl::easy_reset(handle);
handles.push(handle);
}
void HTTPFileSource::Impl::checkMultiInfo() {
CURLMsg *message = nullptr;
int pending = 0;
while ((message = curl::multi_info_read(multi, &pending))) {
switch (message->msg) {
case CURLMSG_DONE: {
HTTPRequest *baton = nullptr;
curl::easy_getinfo(message->easy_handle, CURLINFO_PRIVATE, (char *)&baton);
assert(baton);
baton->handleResult(message->data.result);
} break;
default:
// This should never happen, because there are no other message types.
throw std::runtime_error("CURLMsg returned unknown message type");
}
}
}
void HTTPFileSource::Impl::perform(curl_socket_t s, util::RunLoop::Event events) {
int flags = 0;
if (events == util::RunLoop::Event::Read) {
flags |= CURL_CSELECT_IN;
}
if (events == util::RunLoop::Event::Write) {
flags |= CURL_CSELECT_OUT;
}
int running_handles = 0;
curl::multi_socket_action(multi, s, flags, &running_handles);
checkMultiInfo();
}
int HTTPFileSource::Impl::handleSocket(CURL * /* handle */, curl_socket_t s, int action, void *userp,
void * /* socketp */) {
assert(userp);
auto context = reinterpret_cast<Impl *>(userp);
switch (action) {
case CURL_POLL_IN: {
using namespace std::placeholders;
util::RunLoop::Get()->addWatch(s, util::RunLoop::Event::Read,
std::bind(&Impl::perform, context, _1, _2));
break;
}
case CURL_POLL_OUT: {
using namespace std::placeholders;
util::RunLoop::Get()->addWatch(s, util::RunLoop::Event::Write,
std::bind(&Impl::perform, context, _1, _2));
break;
}
case CURL_POLL_REMOVE:
util::RunLoop::Get()->removeWatch(s);
break;
default:
throw std::runtime_error("Unhandled CURL socket action");
}
return 0;
}
void HTTPFileSource::Impl::onTimeout(Impl *context) {
int running_handles;
CURLMcode error = curl::multi_socket_action(context->multi, CURL_SOCKET_TIMEOUT, 0, &running_handles);
if (error != CURLM_OK) {
throw std::runtime_error(std::string("CURL multi error: ") + curl::multi_strerror(error));
}
context->checkMultiInfo();
}
int HTTPFileSource::Impl::startTimeout(CURLM * /* multi */, long timeout_ms, void *userp) {
assert(userp);
auto context = reinterpret_cast<Impl *>(userp);
if (timeout_ms < 0) {
// A timeout of 0 ms means that the timer will invoked in the next loop iteration.
timeout_ms = 0;
}
context->timeout.stop();
context->timeout.start(mbgl::Milliseconds(timeout_ms), Duration::zero(),
std::bind(&Impl::onTimeout, context));
return 0;
}
HTTPRequest::HTTPRequest(HTTPFileSource::Impl* context_, Resource resource_, FileSource::Callback callback_)
: context(context_),
resource(std::move(resource_)),
callback(std::move(callback_)),
handle(context->getHandle()) {
// If there's already a response, set the correct etags/modified headers to make sure we are
// getting a 304 response if possible. This avoids redownloading unchanged data.
if (resource.priorEtag) {
const std::string header = std::string("If-None-Match: ") + *resource.priorEtag;
headers = curl::slist_append(headers, header.c_str());
} else if (resource.priorModified) {
const std::string time =
std::string("If-Modified-Since: ") + util::rfc1123(*resource.priorModified);
headers = curl::slist_append(headers, time.c_str());
}
if (headers) {
curl::easy_setopt(handle, CURLOPT_HTTPHEADER, headers);
}
handleError(curl::easy_setopt(handle, CURLOPT_PRIVATE, this));
handleError(curl::easy_setopt(handle, CURLOPT_ERRORBUFFER, error));
handleError(curl::easy_setopt(handle, CURLOPT_CAINFO, "ca-bundle.crt"));
handleError(curl::easy_setopt(handle, CURLOPT_FOLLOWLOCATION, 1));
handleError(curl::easy_setopt(handle, CURLOPT_URL, resource.url.c_str()));
handleError(curl::easy_setopt(handle, CURLOPT_WRITEFUNCTION, writeCallback));
handleError(curl::easy_setopt(handle, CURLOPT_WRITEDATA, this));
handleError(curl::easy_setopt(handle, CURLOPT_HEADERFUNCTION, headerCallback));
handleError(curl::easy_setopt(handle, CURLOPT_HEADERDATA, this));
#if LIBCURL_VERSION_NUM >= ((7) << 16 | (21) << 8 | 6) // Renamed in 7.21.6
handleError(curl::easy_setopt(handle, CURLOPT_ACCEPT_ENCODING, "gzip, deflate"));
#else
handleError(curl::easy_setopt(handle, CURLOPT_ENCODING, "gzip, deflate"));
#endif
handleError(curl::easy_setopt(handle, CURLOPT_USERAGENT, "MapboxGL/1.0"));
handleError(curl::easy_setopt(handle, CURLOPT_SHARE, context->share));
// Start requesting the information.
handleError(curl::multi_add_handle(context->multi, handle));
}
HTTPRequest::~HTTPRequest() {
handleError(curl::multi_remove_handle(context->multi, handle));
context->returnHandle(handle);
handle = nullptr;
if (headers) {
curl::slist_free_all(headers);
headers = nullptr;
}
}
// This function is called when we have new data for a request. We just append it to the string
// containing the previous data.
size_t HTTPRequest::writeCallback(void *const contents, const size_t size, const size_t nmemb, void *userp) {
assert(userp);
auto impl = reinterpret_cast<HTTPRequest *>(userp);
if (!impl->data) {
impl->data = std::make_shared<std::string>();
}
impl->data->append((char *)contents, size * nmemb);
return size * nmemb;
}
// Compares the beginning of the (non-zero-terminated!) data buffer with the (zero-terminated!)
// header string. If the data buffer contains the header string at the beginning, it returns
// the length of the header string == begin of the value, otherwise it returns npos.
// The comparison of the header is ASCII-case-insensitive.
size_t headerMatches(const char *const header, const char *const buffer, const size_t length) {
const size_t headerLength = strlen(header);
if (length < headerLength) {
return std::string::npos;
}
size_t i = 0;
while (i < length && i < headerLength && std::tolower(buffer[i]) == std::tolower(header[i])) {
i++;
}
return i == headerLength ? i : std::string::npos;
}
size_t HTTPRequest::headerCallback(char *const buffer, const size_t size, const size_t nmemb, void *userp) {
assert(userp);
auto baton = reinterpret_cast<HTTPRequest *>(userp);
if (!baton->response) {
baton->response = std::make_unique<Response>();
}
const size_t length = size * nmemb;
size_t begin = std::string::npos;
if ((begin = headerMatches("last-modified: ", buffer, length)) != std::string::npos) {
// Always overwrite the modification date; We might already have a value here from the
// Date header, but this one is more accurate.
const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
baton->response->modified = Timestamp{ Seconds(curl::getdate(value.c_str(), nullptr)) };
} else if ((begin = headerMatches("etag: ", buffer, length)) != std::string::npos) {
baton->response->etag = std::string(buffer + begin, length - begin - 2); // remove \r\n
} else if ((begin = headerMatches("cache-control: ", buffer, length)) != std::string::npos) {
const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
const auto cc = http::CacheControl::parse(value.c_str());
baton->response->expires = cc.toTimePoint();
baton->response->mustRevalidate = cc.mustRevalidate;
} else if ((begin = headerMatches("expires: ", buffer, length)) != std::string::npos) {
const std::string value { buffer + begin, length - begin - 2 }; // remove \r\n
baton->response->expires = Timestamp{ Seconds(curl::getdate(value.c_str(), nullptr)) };
} else if ((begin = headerMatches("retry-after: ", buffer, length)) != std::string::npos) {
baton->retryAfter = std::string(buffer + begin, length - begin - 2); // remove \r\n
} else if ((begin = headerMatches("x-rate-limit-reset: ", buffer, length)) != std::string::npos) {
baton->xRateLimitReset = std::string(buffer + begin, length - begin - 2); // remove \r\n
}
return length;
}
void HTTPRequest::handleResult(CURLcode code) {
// Make sure a response object exists in case we haven't got any headers or content.
if (!response) {
response = std::make_unique<Response>();
}
using Error = Response::Error;
// Add human-readable error code
if (code != CURLE_OK) {
switch (code) {
case CURLE_COULDNT_RESOLVE_PROXY:
case CURLE_COULDNT_RESOLVE_HOST:
case CURLE_COULDNT_CONNECT:
case CURLE_OPERATION_TIMEDOUT:
response->error = std::make_unique<Error>(
Error::Reason::Connection, std::string{ curl::easy_strerror(code) } + ": " + error);
break;
default:
response->error = std::make_unique<Error>(
Error::Reason::Other, std::string{ curl::easy_strerror(code) } + ": " + error);
break;
}
} else {
long responseCode = 0;
curl::easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &responseCode);
if (responseCode == 200) {
if (data) {
response->data = std::move(data);
} else {
response->data = std::make_shared<std::string>();
}
} else if (responseCode == 204 || (responseCode == 404 && resource.kind == Resource::Kind::Tile)) {
response->noContent = true;
} else if (responseCode == 304) {
response->notModified = true;
} else if (responseCode == 404) {
response->error =
std::make_unique<Error>(Error::Reason::NotFound, "HTTP status code 404");
} else if (responseCode == 429) {
response->error =
std::make_unique<Error>(Error::Reason::RateLimit, "HTTP status code 429",
http::parseRetryHeaders(retryAfter, xRateLimitReset));
} else if (responseCode >= 500 && responseCode < 600) {
response->error =
std::make_unique<Error>(Error::Reason::Server, std::string{ "HTTP status code " } +
util::toString(responseCode));
} else {
response->error =
std::make_unique<Error>(Error::Reason::Other, std::string{ "HTTP status code " } +
util::toString(responseCode));
}
}
// Calling `callback` may result in deleting `this`. Copy data to temporaries first.
auto callback_ = callback;
auto response_ = *response;
callback_(response_);
}
HTTPFileSource::HTTPFileSource()
: impl(std::make_unique<Impl>()) {
}
HTTPFileSource::~HTTPFileSource() = default;
std::unique_ptr<AsyncRequest> HTTPFileSource::request(const Resource& resource, Callback callback) {
return std::make_unique<HTTPRequest>(impl.get(), resource, callback);
}
uint32_t HTTPFileSource::maximumConcurrentRequests() {
return 20;
}
} // namespace mbgl