diff options
-rw-r--r-- | core/fpdfapi/parser/fpdf_parser_decode.cpp | 39 | ||||
-rw-r--r-- | fpdfsdk/fpdf_doc_embeddertest.cpp | 11 | ||||
-rw-r--r-- | testing/resources/bug_182.pdf | bin | 0 -> 17012 bytes |
3 files changed, 37 insertions, 13 deletions
diff --git a/core/fpdfapi/parser/fpdf_parser_decode.cpp b/core/fpdfapi/parser/fpdf_parser_decode.cpp index 928d650103..063d21ae5f 100644 --- a/core/fpdfapi/parser/fpdf_parser_decode.cpp +++ b/core/fpdfapi/parser/fpdf_parser_decode.cpp @@ -29,8 +29,12 @@ namespace { const uint32_t kMaxStreamSize = 20 * 1024 * 1024; -uint16_t GetUnicodeFromBytes(const uint8_t* bytes, bool bBE) { - return bBE ? (bytes[0] << 8 | bytes[1]) : (bytes[1] << 8 | bytes[0]); +uint16_t GetUnicodeFromBigEndianBytes(const uint8_t* bytes) { + return bytes[0] << 8 | bytes[1]; +} + +uint16_t GetUnicodeFromLittleEndianBytes(const uint8_t* bytes) { + return bytes[1] << 8 | bytes[0]; } bool CheckFlateDecodeParams(int Colors, int BitsPerComponent, int Columns) { @@ -436,21 +440,30 @@ WideString PDF_DecodeText(const uint8_t* src_data, uint32_t src_len) { return result; pdfium::span<wchar_t> dest_buf = result.GetBuffer(max_chars); - bool bBE = src_data[0] == 0xfe || (src_data[0] == 0xff && !src_data[2]); - const uint8_t* uni_str = src_data + 2; + uint16_t (*GetUnicodeFromBytes)(const uint8_t*) = + src_data[0] == 0xfe ? GetUnicodeFromBigEndianBytes + : GetUnicodeFromLittleEndianBytes; + const uint8_t* unicode_str = src_data + 2; for (uint32_t i = 0; i < max_chars * 2; i += 2) { - uint16_t unicode = GetUnicodeFromBytes(uni_str + i, bBE); - if (unicode != 0x1b) { - dest_buf[dest_pos++] = unicode; - continue; - } - i += 2; - while (i < max_chars * 2) { - uint16_t unicode2 = GetUnicodeFromBytes(uni_str + i, bBE); + uint16_t unicode = GetUnicodeFromBytes(unicode_str + i); + + // 0x001B is a begin/end marker for language metadata region that + // should not be in the decoded text. + if (unicode == 0x001B) { i += 2; - if (unicode2 == 0x1b) + for (; i < max_chars * 2; i += 2) { + unicode = GetUnicodeFromBytes(unicode_str + i); + if (unicode == 0x001B) { + i += 2; + unicode = GetUnicodeFromBytes(unicode_str + i); + break; + } + } + if (i >= max_chars * 2) break; } + + dest_buf[dest_pos++] = unicode; } } else { pdfium::span<wchar_t> dest_buf = result.GetBuffer(src_len); diff --git a/fpdfsdk/fpdf_doc_embeddertest.cpp b/fpdfsdk/fpdf_doc_embeddertest.cpp index a9eb4b8cb7..5c0223ee46 100644 --- a/fpdfsdk/fpdf_doc_embeddertest.cpp +++ b/fpdfsdk/fpdf_doc_embeddertest.cpp @@ -358,6 +358,17 @@ TEST_F(FPDFDocEmbeddertest, GetMetaText) { WideString::FromUTF16LE(buf, FXSYS_len(kExpectedModDate))); } +TEST_F(FPDFDocEmbeddertest, Bug_182) { + ASSERT_TRUE(OpenDocument("bug_182.pdf")); + + unsigned short buf[128]; + constexpr wchar_t kExpectedTitle[] = L"Super Visual Formade 印刷"; + + ASSERT_EQ(48u, FPDF_GetMetaText(document(), "Title", buf, sizeof(buf))); + EXPECT_EQ(WideString(kExpectedTitle), + WideString::FromUTF16LE(buf, FXSYS_len(kExpectedTitle))); +} + TEST_F(FPDFDocEmbeddertest, GetMetaTextSameObjectNumber) { ASSERT_TRUE(OpenDocument("annotation_highlight_square_with_ap.pdf")); diff --git a/testing/resources/bug_182.pdf b/testing/resources/bug_182.pdf Binary files differnew file mode 100644 index 0000000000..bf35cc918a --- /dev/null +++ b/testing/resources/bug_182.pdf |