diff options
Diffstat (limited to 'xfa_test/pdf/document_loader.cc')
-rw-r--r-- | xfa_test/pdf/document_loader.cc | 513 |
1 files changed, 0 insertions, 513 deletions
diff --git a/xfa_test/pdf/document_loader.cc b/xfa_test/pdf/document_loader.cc deleted file mode 100644 index b2628a6271..0000000000 --- a/xfa_test/pdf/document_loader.cc +++ /dev/null @@ -1,513 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "pdf/document_loader.h" - -#include "base/logging.h" -#include "base/strings/string_util.h" -#include "net/http/http_util.h" -#include "ppapi/c/pp_errors.h" -#include "ppapi/cpp/url_loader.h" -#include "ppapi/cpp/url_request_info.h" -#include "ppapi/cpp/url_response_info.h" - -namespace chrome_pdf { - -// Document below size will be downloaded in one chunk. -const uint32 kMinFileSize = 64*1024; - -DocumentLoader::DocumentLoader(Client* client) - : client_(client), partial_document_(false), request_pending_(false), - current_pos_(0), current_chunk_size_(0), current_chunk_read_(0), - document_size_(0), header_request_(true), is_multipart_(false) { - loader_factory_.Initialize(this); -} - -DocumentLoader::~DocumentLoader() { -} - -bool DocumentLoader::Init(const pp::URLLoader& loader, - const std::string& url, - const std::string& headers) { - DCHECK(url_.empty()); - url_ = url; - loader_ = loader; - - std::string response_headers; - if (!headers.empty()) { - response_headers = headers; - } else { - pp::URLResponseInfo response = loader_.GetResponseInfo(); - pp::Var headers_var = response.GetHeaders(); - - if (headers_var.is_string()) { - response_headers = headers_var.AsString(); - } - } - - bool accept_ranges_bytes = false; - bool content_encoded = false; - uint32 content_length = 0; - std::string type; - std::string disposition; - if (!response_headers.empty()) { - net::HttpUtil::HeadersIterator it(response_headers.begin(), - response_headers.end(), "\n"); - while (it.GetNext()) { - if (LowerCaseEqualsASCII(it.name(), "content-length")) { - content_length = atoi(it.values().c_str()); - } else if (LowerCaseEqualsASCII(it.name(), "accept-ranges")) { - accept_ranges_bytes = LowerCaseEqualsASCII(it.values(), "bytes"); - } else if (LowerCaseEqualsASCII(it.name(), "content-encoding")) { - content_encoded = true; - } else if (LowerCaseEqualsASCII(it.name(), "content-type")) { - type = it.values(); - size_t semi_colon_pos = type.find(';'); - if (semi_colon_pos != std::string::npos) { - type = type.substr(0, semi_colon_pos); - } - TrimWhitespace(type, base::TRIM_ALL, &type); - } else if (LowerCaseEqualsASCII(it.name(), "content-disposition")) { - disposition = it.values(); - } - } - } - if (!type.empty() && - !EndsWith(type, "/pdf", false) && - !EndsWith(type, ".pdf", false) && - !EndsWith(type, "/x-pdf", false) && - !EndsWith(type, "/*", false) && - !EndsWith(type, "/acrobat", false) && - !EndsWith(type, "/unknown", false)) { - return false; - } - if (StartsWithASCII(disposition, "attachment", false)) { - return false; - } - - if (content_length > 0) - chunk_stream_.Preallocate(content_length); - - document_size_ = content_length; - requests_count_ = 0; - - // Enable partial loading only if file size is above the threshold. - // It will allow avoiding latency for multiple requests. - if (content_length > kMinFileSize && - accept_ranges_bytes && - !content_encoded) { - LoadPartialDocument(); - } else { - LoadFullDocument(); - } - return true; -} - -void DocumentLoader::LoadPartialDocument() { - partial_document_ = true; - // Force the main request to be cancelled, since if we're a full-frame plugin - // there could be other references to the loader. - loader_.Close(); - loader_ = pp::URLLoader(); - // Download file header. - header_request_ = true; - RequestData(0, std::min(GetRequestSize(), document_size_)); -} - -void DocumentLoader::LoadFullDocument() { - partial_document_ = false; - chunk_buffer_.clear(); - ReadMore(); -} - -bool DocumentLoader::IsDocumentComplete() const { - if (document_size_ == 0) // Document size unknown. - return false; - return IsDataAvailable(0, document_size_); -} - -uint32 DocumentLoader::GetAvailableData() const { - if (document_size_ == 0) { // If document size is unknown. - return current_pos_; - } - - std::vector<std::pair<size_t, size_t> > ranges; - chunk_stream_.GetMissedRanges(0, document_size_, &ranges); - uint32 available = document_size_; - std::vector<std::pair<size_t, size_t> >::iterator it; - for (it = ranges.begin(); it != ranges.end(); ++it) { - available -= it->second; - } - return available; -} - -void DocumentLoader::ClearPendingRequests() { - // The first item in the queue is pending (need to keep it in the queue). - if (pending_requests_.size() > 1) { - // Remove all elements except the first one. - pending_requests_.erase(++pending_requests_.begin(), - pending_requests_.end()); - } -} - -bool DocumentLoader::GetBlock(uint32 position, uint32 size, void* buf) const { - return chunk_stream_.ReadData(position, size, buf); -} - -bool DocumentLoader::IsDataAvailable(uint32 position, uint32 size) const { - return chunk_stream_.IsRangeAvailable(position, size); -} - -void DocumentLoader::RequestData(uint32 position, uint32 size) { - DCHECK(partial_document_); - - // We have some artefact request from - // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after - // document is complete. - // We need this fix in PDFIum. Adding this as a work around. - // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 - // Test url: - // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf - if (IsDocumentComplete()) - return; - - pending_requests_.push_back(std::pair<size_t, size_t>(position, size)); - DownloadPendingRequests(); -} - -void DocumentLoader::DownloadPendingRequests() { - if (request_pending_ || pending_requests_.empty()) - return; - - // Remove already completed requests. - // By design DownloadPendingRequests() should have at least 1 request in the - // queue. ReadComplete() will remove the last pending comment from the queue. - while (pending_requests_.size() > 1) { - if (IsDataAvailable(pending_requests_.front().first, - pending_requests_.front().second)) { - pending_requests_.pop_front(); - } else { - break; - } - } - - uint32 pos = pending_requests_.front().first; - uint32 size = pending_requests_.front().second; - if (IsDataAvailable(pos, size)) { - ReadComplete(); - return; - } - - // If current request has been partially downloaded already, split it into - // a few smaller requests. - std::vector<std::pair<size_t, size_t> > ranges; - chunk_stream_.GetMissedRanges(pos, size, &ranges); - if (ranges.size() > 0) { - pending_requests_.pop_front(); - pending_requests_.insert(pending_requests_.begin(), - ranges.begin(), ranges.end()); - pos = pending_requests_.front().first; - size = pending_requests_.front().second; - } - - uint32 cur_request_size = GetRequestSize(); - // If size is less than default request, try to expand download range for - // more optimal download. - if (size < cur_request_size && partial_document_) { - // First, try to expand block towards the end of the file. - uint32 new_pos = pos; - uint32 new_size = cur_request_size; - if (pos + new_size > document_size_) - new_size = document_size_ - pos; - - std::vector<std::pair<size_t, size_t> > ranges; - if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) { - new_pos = ranges[0].first; - new_size = ranges[0].second; - } - - // Second, try to expand block towards the beginning of the file. - if (new_size < cur_request_size) { - uint32 block_end = new_pos + new_size; - if (block_end > cur_request_size) { - new_pos = block_end - cur_request_size; - } else { - new_pos = 0; - } - new_size = block_end - new_pos; - - if (chunk_stream_.GetMissedRanges(new_pos, new_size, &ranges)) { - new_pos = ranges.back().first; - new_size = ranges.back().second; - } - } - pos = new_pos; - size = new_size; - } - - size_t last_byte_before = chunk_stream_.GetLastByteBefore(pos); - size_t first_byte_after = chunk_stream_.GetFirstByteAfter(pos + size - 1); - if (pos - last_byte_before < cur_request_size) { - size = pos + size - last_byte_before; - pos = last_byte_before; - } - - if ((pos + size < first_byte_after) && - (pos + size + cur_request_size >= first_byte_after)) - size = first_byte_after - pos; - - request_pending_ = true; - - // Start downloading first pending request. - loader_.Close(); - loader_ = client_->CreateURLLoader(); - pp::CompletionCallback callback = - loader_factory_.NewCallback(&DocumentLoader::DidOpen); - pp::URLRequestInfo request = GetRequest(pos, size); - requests_count_++; - int rv = loader_.Open(request, callback); - if (rv != PP_OK_COMPLETIONPENDING) - callback.Run(rv); -} - -pp::URLRequestInfo DocumentLoader::GetRequest(uint32 position, - uint32 size) const { - pp::URLRequestInfo request(client_->GetPluginInstance()); - request.SetURL(url_.c_str()); - request.SetMethod("GET"); - request.SetFollowRedirects(true); - - const size_t kBufSize = 100; - char buf[kBufSize]; - // According to rfc2616, byte range specifies position of the first and last - // bytes in the requested range inclusively. Therefore we should subtract 1 - // from the position + size, to get index of the last byte that needs to be - // downloaded. - base::snprintf(buf, kBufSize, "Range: bytes=%d-%d", position, - position + size - 1); - pp::Var header(buf); - request.SetHeaders(header); - - return request; -} - -void DocumentLoader::DidOpen(int32_t result) { - if (result != PP_OK) { - NOTREACHED(); - return; - } - - int32_t http_code = loader_.GetResponseInfo().GetStatusCode(); - if (http_code >= 400 && http_code < 500) { - // Error accessing resource. 4xx error indicate subsequent requests - // will fail too. - // E.g. resource has been removed from the server while loading it. - // https://code.google.com/p/chromium/issues/detail?id=414827 - return; - } - - is_multipart_ = false; - current_chunk_size_ = 0; - current_chunk_read_ = 0; - - pp::Var headers_var = loader_.GetResponseInfo().GetHeaders(); - std::string headers; - if (headers_var.is_string()) - headers = headers_var.AsString(); - - std::string boundary = GetMultiPartBoundary(headers); - if (boundary.size()) { - // Leave position untouched for now, when we read the data we'll get it. - is_multipart_ = true; - multipart_boundary_ = boundary; - } else { - // Need to make sure that the server returned a byte-range, since it's - // possible for a server to just ignore our bye-range request and just - // return the entire document even if it supports byte-range requests. - // i.e. sniff response to - // http://www.act.org/compass/sample/pdf/geometry.pdf - current_pos_ = 0; - uint32 start_pos, end_pos; - if (GetByteRange(headers, &start_pos, &end_pos)) { - current_pos_ = start_pos; - if (end_pos && end_pos > start_pos) - current_chunk_size_ = end_pos - start_pos + 1; - } - } - - ReadMore(); -} - -bool DocumentLoader::GetByteRange(const std::string& headers, uint32* start, - uint32* end) { - net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); - while (it.GetNext()) { - if (LowerCaseEqualsASCII(it.name(), "content-range")) { - std::string range = it.values().c_str(); - if (StartsWithASCII(range, "bytes", false)) { - range = range.substr(strlen("bytes")); - std::string::size_type pos = range.find('-'); - std::string range_end; - if (pos != std::string::npos) - range_end = range.substr(pos + 1); - TrimWhitespaceASCII(range, base::TRIM_LEADING, &range); - TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end); - *start = atoi(range.c_str()); - *end = atoi(range_end.c_str()); - return true; - } - } - } - return false; -} - -std::string DocumentLoader::GetMultiPartBoundary(const std::string& headers) { - net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n"); - while (it.GetNext()) { - if (LowerCaseEqualsASCII(it.name(), "content-type")) { - std::string type = base::StringToLowerASCII(it.values()); - if (StartsWithASCII(type, "multipart/", true)) { - const char* boundary = strstr(type.c_str(), "boundary="); - if (!boundary) { - NOTREACHED(); - break; - } - - return std::string(boundary + 9); - } - } - } - return std::string(); -} - -void DocumentLoader::ReadMore() { - pp::CompletionCallback callback = - loader_factory_.NewCallback(&DocumentLoader::DidRead); - int rv = loader_.ReadResponseBody(buffer_, sizeof(buffer_), callback); - if (rv != PP_OK_COMPLETIONPENDING) - callback.Run(rv); -} - -void DocumentLoader::DidRead(int32_t result) { - if (result > 0) { - char* start = buffer_; - size_t length = result; - if (is_multipart_ && result > 2) { - for (int i = 2; i < result; ++i) { - if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') || - (i >= 4 && - buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' && - buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) { - uint32 start_pos, end_pos; - if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) { - current_pos_ = start_pos; - start += i; - length -= i; - if (end_pos && end_pos > start_pos) - current_chunk_size_ = end_pos - start_pos + 1; - } - break; - } - } - - // Reset this flag so we don't look inside the buffer in future calls of - // DidRead for this response. Note that this code DOES NOT handle multi- - // part responses with more than one part (we don't issue them at the - // moment, so they shouldn't arrive). - is_multipart_ = false; - } - - if (current_chunk_size_ && - current_chunk_read_ + length > current_chunk_size_) - length = current_chunk_size_ - current_chunk_read_; - - if (length) { - if (document_size_ > 0) { - chunk_stream_.WriteData(current_pos_, start, length); - } else { - // If we did not get content-length in the response, we can't - // preallocate buffer for the entire document. Resizing array causing - // memory fragmentation issues on the large files and OOM exceptions. - // To fix this, we collect all chunks of the file to the list and - // concatenate them together after request is complete. - chunk_buffer_.push_back(std::vector<unsigned char>()); - chunk_buffer_.back().resize(length); - memcpy(&(chunk_buffer_.back()[0]), start, length); - } - current_pos_ += length; - current_chunk_read_ += length; - client_->OnNewDataAvailable(); - } - ReadMore(); - } else if (result == PP_OK) { - ReadComplete(); - } else { - NOTREACHED(); - } -} - -void DocumentLoader::ReadComplete() { - if (!partial_document_) { - if (document_size_ == 0) { - // For the document with no 'content-length" specified we've collected all - // the chunks already. Let's allocate final document buffer and copy them - // over. - chunk_stream_.Preallocate(current_pos_); - uint32 pos = 0; - std::list<std::vector<unsigned char> >::iterator it; - for (it = chunk_buffer_.begin(); it != chunk_buffer_.end(); ++it) { - chunk_stream_.WriteData(pos, &((*it)[0]), it->size()); - pos += it->size(); - } - chunk_buffer_.clear(); - } - document_size_ = current_pos_; - client_->OnDocumentComplete(); - return; - } - - request_pending_ = false; - pending_requests_.pop_front(); - - // If there are more pending request - continue downloading. - if (!pending_requests_.empty()) { - DownloadPendingRequests(); - return; - } - - if (IsDocumentComplete()) { - client_->OnDocumentComplete(); - return; - } - - if (header_request_) - client_->OnPartialDocumentLoaded(); - else - client_->OnPendingRequestComplete(); - header_request_ = false; - - // The OnPendingRequestComplete could have added more requests. - if (!pending_requests_.empty()) { - DownloadPendingRequests(); - } else { - // Document is not complete and we have no outstanding requests. - // Let's keep downloading PDF file in small chunks. - uint32 pos = chunk_stream_.GetFirstMissingByte(); - std::vector<std::pair<size_t, size_t> > ranges; - chunk_stream_.GetMissedRanges(pos, GetRequestSize(), &ranges); - DCHECK(ranges.size() > 0); - RequestData(ranges[0].first, ranges[0].second); - } -} - -uint32 DocumentLoader::GetRequestSize() const { - // Document loading strategy: - // For first 10 requests, we use 32k chunk sizes, for the next 10 requests we - // double the size (64k), and so on, until we cap max request size at 2M for - // 71 or more requests. - uint32 limited_count = std::min(std::max(requests_count_, 10u), 70u); - return 32*1024 * (1 << ((limited_count - 1) / 10u)); -} - -} // namespace chrome_pdf |