diff options
author | Jane Liu <janeliulwq@google.com> | 2017-06-01 18:56:09 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-06-01 23:17:03 +0000 |
commit | 4fd9a47465c0f2f17aa2a840d8452b2bee6bec33 (patch) | |
tree | 4dbd633596216ceddf817e693376a3626e28f112 | |
parent | 8cb884102c17ef0530277126fd8da054d329d065 (diff) | |
download | pdfium-4fd9a47465c0f2f17aa2a840d8452b2bee6bec33.tar.xz |
Basic APIs and tests for extracting annotations
1. Added API for extracting annotation properties:
* Added testing flag "--annot" that outputs the annotation properties
into a .txt file.
* Added two embedder tests covering all the API functions.
Bug=pdfium:737
Change-Id: I95943a9b2b3d5d431bc8a74a31b27b4f4b521026
Reviewed-on: https://pdfium-review.googlesource.com/6092
Commit-Queue: Jane Liu <janeliulwq@google.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: dsinclair <dsinclair@chromium.org>
-rw-r--r-- | BUILD.gn | 3 | ||||
-rw-r--r-- | fpdfsdk/fpdfannot.cpp | 238 | ||||
-rw-r--r-- | fpdfsdk/fpdfannot_embeddertest.cpp | 132 | ||||
-rw-r--r-- | fpdfsdk/fpdfview.cpp | 9 | ||||
-rw-r--r-- | fpdfsdk/fpdfview_c_api_test.c | 10 | ||||
-rw-r--r-- | fpdfsdk/fsdk_define.h | 2 | ||||
-rw-r--r-- | public/fpdf_annot.h | 152 | ||||
-rw-r--r-- | public/fpdfview.h | 4 | ||||
-rw-r--r-- | samples/pdfium_test.cc | 161 | ||||
-rw-r--r-- | testing/resources/annotation_highlight_long_content.pdf | bin | 0 -> 38455 bytes | |||
-rw-r--r-- | testing/resources/annotation_ink_multiple.pdf | bin | 0 -> 52263 bytes |
11 files changed, 711 insertions, 0 deletions
@@ -135,6 +135,7 @@ static_library("pdfium") { "fpdfsdk/fpdf_structtree.cpp", "fpdfsdk/fpdf_sysfontinfo.cpp", "fpdfsdk/fpdf_transformpage.cpp", + "fpdfsdk/fpdfannot.cpp", "fpdfsdk/fpdfdoc.cpp", "fpdfsdk/fpdfeditimg.cpp", "fpdfsdk/fpdfeditpage.cpp", @@ -154,6 +155,7 @@ static_library("pdfium") { "fpdfsdk/pdfsdk_fieldaction.cpp", "fpdfsdk/pdfsdk_fieldaction.h", "public/cpp/fpdf_deleters.h", + "public/fpdf_annot.h", "public/fpdf_dataavail.h", "public/fpdf_doc.h", "public/fpdf_edit.h", @@ -1979,6 +1981,7 @@ test("pdfium_embeddertests") { "fpdfsdk/fpdf_dataavail_embeddertest.cpp", "fpdfsdk/fpdf_flatten_embeddertest.cpp", "fpdfsdk/fpdf_structtree_embeddertest.cpp", + "fpdfsdk/fpdfannot_embeddertest.cpp", "fpdfsdk/fpdfdoc_embeddertest.cpp", "fpdfsdk/fpdfedit_embeddertest.cpp", "fpdfsdk/fpdfext_embeddertest.cpp", diff --git a/fpdfsdk/fpdfannot.cpp b/fpdfsdk/fpdfannot.cpp new file mode 100644 index 0000000000..6e5b7ba063 --- /dev/null +++ b/fpdfsdk/fpdfannot.cpp @@ -0,0 +1,238 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "public/fpdf_annot.h" + +#include "core/fpdfapi/page/cpdf_page.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfdoc/cpdf_annot.h" +#include "core/fpdfdoc/cpvt_color.h" +#include "core/fpdfdoc/cpvt_generateap.h" +#include "fpdfsdk/fsdk_define.h" + +// These checks ensure the consistency of annotation subtype values across core/ +// and public. +static_assert(static_cast<int>(CPDF_Annot::Subtype::UNKNOWN) == + FPDF_ANNOT_UNKNOWN, + "CPDF_Annot::UNKNOWN value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::TEXT) == FPDF_ANNOT_TEXT, + "CPDF_Annot::TEXT value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::LINK) == FPDF_ANNOT_LINK, + "CPDF_Annot::LINK value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::FREETEXT) == + FPDF_ANNOT_FREETEXT, + "CPDF_Annot::FREETEXT value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::LINE) == FPDF_ANNOT_LINE, + "CPDF_Annot::LINE value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::SQUARE) == + FPDF_ANNOT_SQUARE, + "CPDF_Annot::SQUARE value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::CIRCLE) == + FPDF_ANNOT_CIRCLE, + "CPDF_Annot::CIRCLE value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::POLYGON) == + FPDF_ANNOT_POLYGON, + "CPDF_Annot::POLYGON value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::POLYLINE) == + FPDF_ANNOT_POLYLINE, + "CPDF_Annot::POLYLINE value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::HIGHLIGHT) == + FPDF_ANNOT_HIGHLIGHT, + "CPDF_Annot::HIGHLIGHT value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::UNDERLINE) == + FPDF_ANNOT_UNDERLINE, + "CPDF_Annot::UNDERLINE value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::SQUIGGLY) == + FPDF_ANNOT_SQUIGGLY, + "CPDF_Annot::SQUIGGLY value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::STRIKEOUT) == + FPDF_ANNOT_STRIKEOUT, + "CPDF_Annot::STRIKEOUT value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::STAMP) == FPDF_ANNOT_STAMP, + "CPDF_Annot::STAMP value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::CARET) == FPDF_ANNOT_CARET, + "CPDF_Annot::CARET value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::INK) == FPDF_ANNOT_INK, + "CPDF_Annot::INK value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::POPUP) == FPDF_ANNOT_POPUP, + "CPDF_Annot::POPUP value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::FILEATTACHMENT) == + FPDF_ANNOT_FILEATTACHMENT, + "CPDF_Annot::FILEATTACHMENT value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::SOUND) == FPDF_ANNOT_SOUND, + "CPDF_Annot::SOUND value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::MOVIE) == FPDF_ANNOT_MOVIE, + "CPDF_Annot::MOVIE value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::WIDGET) == + FPDF_ANNOT_WIDGET, + "CPDF_Annot::WIDGET value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::SCREEN) == + FPDF_ANNOT_SCREEN, + "CPDF_Annot::SCREEN value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::PRINTERMARK) == + FPDF_ANNOT_PRINTERMARK, + "CPDF_Annot::PRINTERMARK value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::TRAPNET) == + FPDF_ANNOT_TRAPNET, + "CPDF_Annot::TRAPNET value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::WATERMARK) == + FPDF_ANNOT_WATERMARK, + "CPDF_Annot::WATERMARK value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::THREED) == + FPDF_ANNOT_THREED, + "CPDF_Annot::THREED value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::RICHMEDIA) == + FPDF_ANNOT_RICHMEDIA, + "CPDF_Annot::RICHMEDIA value mismatch"); +static_assert(static_cast<int>(CPDF_Annot::Subtype::XFAWIDGET) == + FPDF_ANNOT_XFAWIDGET, + "CPDF_Annot::XFAWIDGET value mismatch"); + +DLLEXPORT int STDCALL FPDFPage_GetAnnotCount(FPDF_PAGE page) { + CPDF_Page* pPage = CPDFPageFromFPDFPage(page); + if (!pPage || !pPage->m_pFormDict) + return 0; + CPDF_Array* pAnnots = pPage->m_pFormDict->GetArrayFor("Annots"); + return pAnnots ? pAnnots->GetCount() : 0; +} + +DLLEXPORT FPDF_BOOL STDCALL FPDFPage_GetAnnot(FPDF_PAGE page, + int index, + FPDF_ANNOTATION* annot) { + CPDF_Page* pPage = CPDFPageFromFPDFPage(page); + if (!pPage || !pPage->m_pFormDict || index < 0 || !annot) + return false; + CPDF_Array* pAnnots = pPage->m_pFormDict->GetArrayFor("Annots"); + if (!pAnnots || static_cast<size_t>(index) >= pAnnots->GetCount()) + return false; + + CPDF_Dictionary* pDict = ToDictionary(pAnnots->GetDirectObjectAt(index)); + *annot = FPDFAnnotationFromCPDFDictionary(pDict); + return *annot ? true : false; +} + +DLLEXPORT FPDF_ANNOTATION_SUBTYPE STDCALL +FPDFAnnot_GetSubtype(FPDF_ANNOTATION annot) { + CPDF_Dictionary* pAnnotDict = CPDFDictionaryFromFPDFAnnotation(annot); + if (!pAnnotDict) + return FPDF_ANNOT_UNKNOWN; + return static_cast<FPDF_ANNOTATION_SUBTYPE>( + CPDF_Annot::StringToAnnotSubtype(pAnnotDict->GetStringFor("Subtype"))); +} + +DLLEXPORT FPDF_BOOL STDCALL FPDFAnnot_GetColor(FPDF_ANNOTATION annot, + FPDFANNOT_COLORTYPE type, + unsigned int* R, + unsigned int* G, + unsigned int* B, + unsigned int* A) { + CPDF_Dictionary* pAnnotDict = CPDFDictionaryFromFPDFAnnotation(annot); + if (!pAnnotDict || !R || !G || !B || !A) + return false; + + CPDF_Array* pColor = pAnnotDict->GetArrayFor( + type == FPDFANNOT_COLORTYPE_InteriorColor ? "IC" : "C"); + *A = + (pAnnotDict->KeyExist("CA") ? pAnnotDict->GetNumberFor("CA") : 1) * 255.f; + if (!pColor) { + // Use default color. The default colors must be consistent with the ones + // used to generate AP. See calls to GetColorStringWithDefault() in + // CPVT_GenerateAP::Generate*AP(). + if (pAnnotDict->GetStringFor("Subtype") == "Highlight") { + *R = 255; + *G = 255; + *B = 0; + } else { + *R = 0; + *G = 0; + *B = 0; + } + return true; + } + CPVT_Color color = CPVT_Color::ParseColor(*pColor); + switch (color.nColorType) { + case CPVT_Color::kRGB: + *R = color.fColor1 * 255.f; + *G = color.fColor2 * 255.f; + *B = color.fColor3 * 255.f; + break; + case CPVT_Color::kGray: + *R = 255.f * color.fColor1; + *G = 255.f * color.fColor1; + *B = 255.f * color.fColor1; + break; + case CPVT_Color::kCMYK: + *R = 255.f * (1 - color.fColor1) * (1 - color.fColor4); + *G = 255.f * (1 - color.fColor2) * (1 - color.fColor4); + *B = 255.f * (1 - color.fColor3) * (1 - color.fColor4); + break; + case CPVT_Color::kTransparent: + *R = 0; + *G = 0; + *B = 0; + break; + } + return true; +} + +DLLEXPORT FPDF_BOOL STDCALL +FPDFAnnot_HasAttachmentPoints(FPDF_ANNOTATION annot) { + if (!annot) + return false; + FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot); + return subtype == FPDF_ANNOT_LINK || subtype == FPDF_ANNOT_HIGHLIGHT || + subtype == FPDF_ANNOT_UNDERLINE || subtype == FPDF_ANNOT_SQUIGGLY || + subtype == FPDF_ANNOT_STRIKEOUT; +} + +DLLEXPORT FPDF_BOOL STDCALL +FPDFAnnot_GetAttachmentPoints(FPDF_ANNOTATION annot, + FS_QUADPOINTSF* quadPoints) { + if (!annot || !quadPoints || !FPDFAnnot_HasAttachmentPoints(annot)) + return false; + CPDF_Array* pArray = + CPDFDictionaryFromFPDFAnnotation(annot)->GetArrayFor("QuadPoints"); + if (!pArray) + return false; + quadPoints->x1 = pArray->GetNumberAt(0); + quadPoints->y1 = pArray->GetNumberAt(1); + quadPoints->x2 = pArray->GetNumberAt(2); + quadPoints->y2 = pArray->GetNumberAt(3); + quadPoints->x3 = pArray->GetNumberAt(4); + quadPoints->y3 = pArray->GetNumberAt(5); + quadPoints->x4 = pArray->GetNumberAt(6); + quadPoints->y4 = pArray->GetNumberAt(7); + return true; +} + +DLLEXPORT FPDF_BOOL STDCALL FPDFAnnot_GetRect(FPDF_ANNOTATION annot, + FS_RECTF* rect) { + CPDF_Dictionary* pAnnotDict = CPDFDictionaryFromFPDFAnnotation(annot); + if (!rect || !pAnnotDict) + return false; + CFX_FloatRect rt = pAnnotDict->GetRectFor("Rect"); + rect->left = rt.left; + rect->bottom = rt.bottom; + rect->right = rt.right; + rect->top = rt.top; + return true; +} + +DLLEXPORT unsigned long STDCALL FPDFAnnot_GetText(FPDF_ANNOTATION annot, + FPDFANNOT_TEXTTYPE type, + char* buffer, + unsigned long buflen) { + CPDF_Dictionary* pAnnotDict = CPDFDictionaryFromFPDFAnnotation(annot); + if (!pAnnotDict) + return 0; + CFX_ByteString key = type == FPDFANNOT_TEXTTYPE_Author ? "T" : "Contents"; + CFX_ByteString contents = pAnnotDict->GetUnicodeTextFor(key).UTF16LE_Encode(); + unsigned long len = contents.GetLength(); + if (buffer && buflen >= len) + memcpy(buffer, contents.c_str(), len); + return len; +} diff --git a/fpdfsdk/fpdfannot_embeddertest.cpp b/fpdfsdk/fpdfannot_embeddertest.cpp new file mode 100644 index 0000000000..27a76eecfa --- /dev/null +++ b/fpdfsdk/fpdfannot_embeddertest.cpp @@ -0,0 +1,132 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <vector> + +#include "public/fpdf_annot.h" +#include "public/fpdfview.h" +#include "testing/embedder_test.h" +#include "testing/gtest/include/gtest/gtest.h" + +class FPDFAnnotEmbeddertest : public EmbedderTest {}; + +TEST_F(FPDFAnnotEmbeddertest, ExtractHighlightLongContent) { + // Open a file with one annotation and load its first page. + ASSERT_TRUE(OpenDocument("annotation_highlight_long_content.pdf")); + FPDF_PAGE page = FPDF_LoadPage(document(), 0); + ASSERT_TRUE(page); + + // Check that there is a total of 1 annotation on its first page. + EXPECT_EQ(1, FPDFPage_GetAnnotCount(page)); + + // Check that the annotation is of type "highlight". + FPDF_ANNOTATION annot; + ASSERT_TRUE(FPDFPage_GetAnnot(page, 0, &annot)); + EXPECT_EQ(FPDF_ANNOT_HIGHLIGHT, FPDFAnnot_GetSubtype(annot)); + + // Check that the annotation color is yellow. + unsigned int R; + unsigned int G; + unsigned int B; + unsigned int A; + EXPECT_TRUE( + FPDFAnnot_GetColor(annot, FPDFANNOT_COLORTYPE_Color, &R, &G, &B, &A)); + EXPECT_EQ(255u, R); + EXPECT_EQ(255u, G); + EXPECT_EQ(0u, B); + EXPECT_EQ(255u, A); + + // Check that the author is correct. + unsigned long len = + FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Author, nullptr, 0); + std::vector<char> buf(len); + EXPECT_EQ(28u, FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Author, buf.data(), + len)); + EXPECT_STREQ(L"Jae Hyun Park", + GetPlatformWString(reinterpret_cast<unsigned short*>(buf.data())) + .c_str()); + + // Check that the content is correct. + len = FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Contents, nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(2690u, FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Contents, + buf.data(), len)); + const wchar_t contents[] = + L"This is a note for that highlight annotation. Very long highlight " + "annotation. Long long long Long long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long long. END"; + EXPECT_STREQ(contents, + GetPlatformWString(reinterpret_cast<unsigned short*>(buf.data())) + .c_str()); + + // Check that the quadpoints are correct. + FS_QUADPOINTSF quadpoints; + ASSERT_TRUE(FPDFAnnot_GetAttachmentPoints(annot, &quadpoints)); + EXPECT_EQ(115.802643f, quadpoints.x1); + EXPECT_EQ(718.913940f, quadpoints.y1); + EXPECT_EQ(157.211182f, quadpoints.x4); + EXPECT_EQ(706.264465f, quadpoints.y4); + + UnloadPage(page); +} + +TEST_F(FPDFAnnotEmbeddertest, ExtractInkMultiple) { + // Open a file with three annotations and load its first page. + ASSERT_TRUE(OpenDocument("annotation_ink_multiple.pdf")); + FPDF_PAGE page = FPDF_LoadPage(document(), 0); + ASSERT_TRUE(page); + + // Check that there is a total of 3 annotation on its first page. + EXPECT_EQ(3, FPDFPage_GetAnnotCount(page)); + + // Check that the third annotation of type "ink". + FPDF_ANNOTATION annot; + ASSERT_TRUE(FPDFPage_GetAnnot(page, 2, &annot)); + EXPECT_EQ(FPDF_ANNOT_INK, FPDFAnnot_GetSubtype(annot)); + + // Check that the annotation color is blue with opacity. + unsigned int R; + unsigned int G; + unsigned int B; + unsigned int A; + EXPECT_TRUE( + FPDFAnnot_GetColor(annot, FPDFANNOT_COLORTYPE_Color, &R, &G, &B, &A)); + EXPECT_EQ(0u, R); + EXPECT_EQ(0u, G); + EXPECT_EQ(255u, B); + EXPECT_EQ(76u, A); + + // Check that there is no content. + EXPECT_EQ(2u, + FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Contents, nullptr, 0)); + + // Check that the rectange coordinates are correct. + // Note that upon rendering, the rectangle coordinates will be adjusted. + FS_RECTF rect; + ASSERT_TRUE(FPDFAnnot_GetRect(annot, &rect)); + EXPECT_EQ(351.820404f, rect.left); + EXPECT_EQ(583.830688f, rect.bottom); + EXPECT_EQ(475.336090f, rect.right); + EXPECT_EQ(681.535034f, rect.top); + + UnloadPage(page); +} diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp index f20e8ab9f4..0dcb25aaca 100644 --- a/fpdfsdk/fpdfview.cpp +++ b/fpdfsdk/fpdfview.cpp @@ -17,6 +17,7 @@ #include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/page/cpdf_pageobject.h" #include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fpdfapi/render/cpdf_progressiverenderer.h" @@ -303,6 +304,14 @@ CPDF_Page* CPDFPageFromFPDFPage(FPDF_PAGE page) { #endif // PDF_ENABLE_XFA } +FPDF_ANNOTATION FPDFAnnotationFromCPDFDictionary(CPDF_Dictionary* pDict) { + return static_cast<FPDF_ANNOTATION>(pDict); +} + +CPDF_Dictionary* CPDFDictionaryFromFPDFAnnotation(FPDF_ANNOTATION annot) { + return static_cast<CPDF_Dictionary*>(annot); +} + CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap) { return static_cast<CFX_DIBitmap*>(bitmap); } diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c index 37f1d91de2..5a65fc4111 100644 --- a/fpdfsdk/fpdfview_c_api_test.c +++ b/fpdfsdk/fpdfview_c_api_test.c @@ -9,6 +9,7 @@ #include "fpdfsdk/fpdfview_c_api_test.h" +#include "public/fpdf_annot.h" #include "public/fpdf_dataavail.h" #include "public/fpdf_doc.h" #include "public/fpdf_edit.h" @@ -33,6 +34,15 @@ fnptr g_c_api_test_fnptr = NULL; // Extern, so can't know it doesn't change. // Function to call from gtest harness to ensure linker resolution. int CheckPDFiumCApi() { + //fpdf_annot.h + CHK(FPDFPage_GetAnnotCount); + CHK(FPDFPage_GetAnnot); + CHK(FPDFAnnot_GetSubtype); + CHK(FPDFAnnot_GetColor); + CHK(FPDFAnnot_GetAttachmentPoints); + CHK(FPDFAnnot_GetRect); + CHK(FPDFAnnot_GetText); + // fpdf_dataavail.h CHK(FPDFAvail_Create); CHK(FPDFAvail_Destroy); diff --git a/fpdfsdk/fsdk_define.h b/fpdfsdk/fsdk_define.h index e14cc19ea1..e49977976b 100644 --- a/fpdfsdk/fsdk_define.h +++ b/fpdfsdk/fsdk_define.h @@ -61,6 +61,8 @@ FPDF_DOCUMENT FPDFDocumentFromCPDFDocument(CPDF_Document* doc); CPDF_Page* CPDFPageFromFPDFPage(FPDF_PAGE page); +FPDF_ANNOTATION FPDFAnnotationFromCPDFDictionary(CPDF_Dictionary* pDict); +CPDF_Dictionary* CPDFDictionaryFromFPDFAnnotation(FPDF_ANNOTATION annot); CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap); void FSDK_SetSandBoxPolicy(FPDF_DWORD policy, FPDF_BOOL enable); diff --git a/public/fpdf_annot.h b/public/fpdf_annot.h new file mode 100644 index 0000000000..29207a6f60 --- /dev/null +++ b/public/fpdf_annot.h @@ -0,0 +1,152 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef PUBLIC_FPDF_ANNOT_H_ +#define PUBLIC_FPDF_ANNOT_H_ + +// NOLINTNEXTLINE(build/include) +#include "fpdfview.h" + +#include "public/fpdf_doc.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#define FPDF_ANNOT_UNKNOWN 0 +#define FPDF_ANNOT_TEXT 1 +#define FPDF_ANNOT_LINK 2 +#define FPDF_ANNOT_FREETEXT 3 +#define FPDF_ANNOT_LINE 4 +#define FPDF_ANNOT_SQUARE 5 +#define FPDF_ANNOT_CIRCLE 6 +#define FPDF_ANNOT_POLYGON 7 +#define FPDF_ANNOT_POLYLINE 8 +#define FPDF_ANNOT_HIGHLIGHT 9 +#define FPDF_ANNOT_UNDERLINE 10 +#define FPDF_ANNOT_SQUIGGLY 11 +#define FPDF_ANNOT_STRIKEOUT 12 +#define FPDF_ANNOT_STAMP 13 +#define FPDF_ANNOT_CARET 14 +#define FPDF_ANNOT_INK 15 +#define FPDF_ANNOT_POPUP 16 +#define FPDF_ANNOT_FILEATTACHMENT 17 +#define FPDF_ANNOT_SOUND 18 +#define FPDF_ANNOT_MOVIE 19 +#define FPDF_ANNOT_WIDGET 20 +#define FPDF_ANNOT_SCREEN 21 +#define FPDF_ANNOT_PRINTERMARK 22 +#define FPDF_ANNOT_TRAPNET 23 +#define FPDF_ANNOT_WATERMARK 24 +#define FPDF_ANNOT_THREED 25 +#define FPDF_ANNOT_RICHMEDIA 26 +#define FPDF_ANNOT_XFAWIDGET 27 + +// Get the number of annotations in |page|. +// +// page - handle to a page. +// +// Returns the number of annotations in |page|. +DLLEXPORT int STDCALL FPDFPage_GetAnnotCount(FPDF_PAGE page); + +// Get annotation in |page| at |index|. +// +// page - handle to a page. +// index - the index of the annotation. +// annot - receives the annotation +// +// Returns true if successful, false otherwise. +DLLEXPORT FPDF_BOOL STDCALL FPDFPage_GetAnnot(FPDF_PAGE page, + int index, + FPDF_ANNOTATION* annot); + +// Get the subtype of an annotation. +// +// annot - handle to an annotation. +// +// Returns the annotation subtype. +DLLEXPORT FPDF_ANNOTATION_SUBTYPE STDCALL +FPDFAnnot_GetSubtype(FPDF_ANNOTATION annot); + +typedef enum FPDFANNOT_COLORTYPE { + FPDFANNOT_COLORTYPE_Color = 0, + FPDFANNOT_COLORTYPE_InteriorColor +} FPDFANNOT_COLORTYPE; + +// Get the color of an annotation. If no color is specified, default to yellow +// for highlight annotation, black for all else. +// +// annot - handle to an annotation. +// type - type of the color requested. Default to Color. +// R, G, B - buffer to hold the RGB value of the color. Ranges from 0 to 255. +// A - buffer to hold the opacity. Ranges from 0 to 255. +// +// Returns true if successful, false otherwise. +DLLEXPORT FPDF_BOOL STDCALL FPDFAnnot_GetColor(FPDF_ANNOTATION annot, + FPDFANNOT_COLORTYPE type, + unsigned int* R, + unsigned int* G, + unsigned int* B, + unsigned int* A); + +// Check if the annotation is of a type that has attachment points +// (i.e. quadpoints). Quadpoints are the vertices of the rectange that +// encompasses the texts affected by the annotation. They provide the +// coordinates in the page where the annotation is attached. Only markup +// annotations (i.e. highlight, strikeout, squiggly, underline, and link) have +// quadpoints. +// +// annot - handle to an annotation. +// +// Returns true if the annotation is of a type that has quadpoints, false +// otherwise. +DLLEXPORT FPDF_BOOL STDCALL +FPDFAnnot_HasAttachmentPoints(FPDF_ANNOTATION annot); + +// Get the attachment points (i.e. quadpoints) of an annotation. +// +// annot - handle to an annotation. +// quadPoints - receives the attachment points +// +// Returns true if successful, false otherwise. +DLLEXPORT FPDF_BOOL STDCALL +FPDFAnnot_GetAttachmentPoints(FPDF_ANNOTATION annot, + FS_QUADPOINTSF* quadPoints); + +// Get the annotation rectangle defining the location of the annotation. +// +// annot - handle to an annotation. +// rect - receives the annotation rectangle +// +// Returns true if successful, false otherwise. +DLLEXPORT FPDF_BOOL STDCALL FPDFAnnot_GetRect(FPDF_ANNOTATION annot, + FS_RECTF* rect); + +typedef enum FPDFANNOT_TEXTTYPE { + FPDFANNOT_TEXTTYPE_Contents = 0, + FPDFANNOT_TEXTTYPE_Author +} FPDFANNOT_TEXTTYPE; + +// Get the contents of an annotation. |buffer| is only modified if |buflen| +// is longer than the length of contents. +// +// annot - handle to an annotation. +// type - type of the text requested. Default to Contents. +// buffer - buffer for holding the contents string, encoded in UTF16-LE. +// buflen - length of the buffer. +// +// Returns the length of the contents. + +DLLEXPORT unsigned long STDCALL FPDFAnnot_GetText(FPDF_ANNOTATION annot, + FPDFANNOT_TEXTTYPE type, + char* buffer, + unsigned long buflen); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // PUBLIC_FPDF_ANNOT_H_ diff --git a/public/fpdfview.h b/public/fpdfview.h index 13e4f1f5f1..ba5cb4ff66 100644 --- a/public/fpdfview.h +++ b/public/fpdfview.h @@ -22,6 +22,7 @@ // PDF types typedef void* FPDF_ACTION; +typedef void* FPDF_ANNOTATION; typedef void* FPDF_BITMAP; typedef void* FPDF_BOOKMARK; typedef void* FPDF_CLIPPATH; @@ -122,6 +123,9 @@ typedef struct _FS_RECTF_ { // Const Pointer to FS_RECTF structure. typedef const FS_RECTF* FS_LPCRECTF; +// Annotation subtype. +typedef int FPDF_ANNOTATION_SUBTYPE; + #if defined(_WIN32) && defined(FPDFSDK_EXPORTS) // On Windows system, functions are exported in a DLL #define DLLEXPORT __declspec(dllexport) diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc index e33ceadabf..5520fa4c26 100644 --- a/samples/pdfium_test.cc +++ b/samples/pdfium_test.cc @@ -19,6 +19,7 @@ #include "core/fdrm/crypto/fx_crypt.h" #include "public/cpp/fpdf_deleters.h" +#include "public/fpdf_annot.h" #include "public/fpdf_dataavail.h" #include "public/fpdf_edit.h" #include "public/fpdf_ext.h" @@ -28,6 +29,7 @@ #include "public/fpdfview.h" #include "samples/image_diff_png.h" #include "testing/test_support.h" +#include "third_party/base/logging.h" #ifdef _WIN32 #include <io.h> @@ -57,6 +59,7 @@ enum OutputFormat { OUTPUT_TEXT, OUTPUT_PPM, OUTPUT_PNG, + OUTPUT_ANNOT, #ifdef _WIN32 OUTPUT_BMP, OUTPUT_EMF, @@ -193,6 +196,154 @@ void WriteText(FPDF_PAGE page, const char* pdf_name, int num) { (void)fclose(fp); } +std::string AnnotSubtypeToString(FPDF_ANNOTATION_SUBTYPE subtype) { + if (subtype == FPDF_ANNOT_TEXT) + return "Text"; + if (subtype == FPDF_ANNOT_LINK) + return "Link"; + if (subtype == FPDF_ANNOT_FREETEXT) + return "FreeText"; + if (subtype == FPDF_ANNOT_LINE) + return "Line"; + if (subtype == FPDF_ANNOT_SQUARE) + return "Square"; + if (subtype == FPDF_ANNOT_CIRCLE) + return "Circle"; + if (subtype == FPDF_ANNOT_POLYGON) + return "Polygon"; + if (subtype == FPDF_ANNOT_POLYLINE) + return "PolyLine"; + if (subtype == FPDF_ANNOT_HIGHLIGHT) + return "Highlight"; + if (subtype == FPDF_ANNOT_UNDERLINE) + return "Underline"; + if (subtype == FPDF_ANNOT_SQUIGGLY) + return "Squiggly"; + if (subtype == FPDF_ANNOT_STRIKEOUT) + return "StrikeOut"; + if (subtype == FPDF_ANNOT_STAMP) + return "Stamp"; + if (subtype == FPDF_ANNOT_CARET) + return "Caret"; + if (subtype == FPDF_ANNOT_INK) + return "Ink"; + if (subtype == FPDF_ANNOT_POPUP) + return "Popup"; + if (subtype == FPDF_ANNOT_FILEATTACHMENT) + return "FileAttachment"; + if (subtype == FPDF_ANNOT_SOUND) + return "Sound"; + if (subtype == FPDF_ANNOT_MOVIE) + return "Movie"; + if (subtype == FPDF_ANNOT_WIDGET) + return "Widget"; + if (subtype == FPDF_ANNOT_SCREEN) + return "Screen"; + if (subtype == FPDF_ANNOT_PRINTERMARK) + return "PrinterMark"; + if (subtype == FPDF_ANNOT_TRAPNET) + return "TrapNet"; + if (subtype == FPDF_ANNOT_WATERMARK) + return "Watermark"; + if (subtype == FPDF_ANNOT_THREED) + return "3D"; + if (subtype == FPDF_ANNOT_RICHMEDIA) + return "RichMedia"; + if (subtype == FPDF_ANNOT_XFAWIDGET) + return "XFAWidget"; + NOTREACHED(); + return ""; +} + +void WriteAnnot(FPDF_PAGE page, const char* pdf_name, int num) { + // Open the output text file. + char filename[256]; + int chars_formatted = + snprintf(filename, sizeof(filename), "%s.%d.annot.txt", pdf_name, num); + if (chars_formatted < 0 || + static_cast<size_t>(chars_formatted) >= sizeof(filename)) { + fprintf(stderr, "Filename %s is too long\n", filename); + return; + } + FILE* fp = fopen(filename, "w"); + if (!fp) { + fprintf(stderr, "Failed to open %s for output\n", filename); + return; + } + + int annot_count = FPDFPage_GetAnnotCount(page); + fprintf(fp, "Number of annotations: %d\n\n", annot_count); + + // Iterate through all annotations on this page. + for (int i = 0; i < annot_count; i++) { + // Retrieve the annotation object and its subtype. + fprintf(fp, "Annotation #%d:\n", i + 1); + FPDF_ANNOTATION annot; + if (!FPDFPage_GetAnnot(page, i, &annot)) { + fprintf(fp, "Failed to retrieve annotation!\n\n"); + continue; + } + FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot); + fprintf(fp, "Subtype: %s\n", AnnotSubtypeToString(subtype).c_str()); + + // Retrieve the annotation's color and interior color. + unsigned int R; + unsigned int G; + unsigned int B; + unsigned int A; + if (!FPDFAnnot_GetColor(annot, FPDFANNOT_COLORTYPE_Color, &R, &G, &B, &A)) { + fprintf(fp, "Failed to retrieve color.\n"); + } else { + fprintf(fp, "Color in RGBA: %d %d %d %d\n", R, G, B, A); + } + if (!FPDFAnnot_GetColor(annot, FPDFANNOT_COLORTYPE_InteriorColor, &R, &G, + &B, &A)) { + fprintf(fp, "Failed to retrieve interior color.\n"); + } else { + fprintf(fp, "Interior color in RGBA: %d %d %d %d\n", R, G, B, A); + } + + // Retrieve the annotation's contents and author. + unsigned long len = + FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Contents, nullptr, 0); + std::vector<char> buf(len); + FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Contents, buf.data(), len); + fprintf(fp, "Content: %ls\n", + GetPlatformWString(reinterpret_cast<unsigned short*>(buf.data())) + .c_str()); + len = FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Author, nullptr, 0); + buf.clear(); + buf.resize(len); + FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Author, buf.data(), len); + fprintf(fp, "Author: %ls\n", + GetPlatformWString(reinterpret_cast<unsigned short*>(buf.data())) + .c_str()); + + // Retrieve the annotation's quadpoints if it is a markup annotation. + FS_QUADPOINTSF quadpoints; + if (FPDFAnnot_HasAttachmentPoints(annot)) { + if (!FPDFAnnot_GetAttachmentPoints(annot, &quadpoints)) { + fprintf(fp, "Failed to retrieve quadpoints.\n"); + } else { + fprintf(fp, "Quadpoints: (%f, %f), (%f, %f), (%f, %f), (%f, %f)\n", + quadpoints.x1, quadpoints.y1, quadpoints.x2, quadpoints.y2, + quadpoints.x3, quadpoints.y3, quadpoints.x4, quadpoints.y4); + } + } + + // Retrieve the annotation's rectangle coordinates. + FS_RECTF rect; + if (!FPDFAnnot_GetRect(annot, &rect)) { + fprintf(fp, "Failed to retrieve rectangle.\n\n"); + } else { + fprintf(fp, "Rectangle: l - %f, b - %f, r - %f, t - %f\n\n", rect.left, + rect.bottom, rect.right, rect.top); + } + } + + (void)fclose(fp); +} + static std::string WritePng(const char* pdf_name, int num, const void* buffer_void, @@ -494,6 +645,12 @@ bool ParseCommandLine(const std::vector<std::string>& args, return false; } options->output_format = OUTPUT_TEXT; + } else if (cur_arg == "--annot") { + if (options->output_format != OUTPUT_NONE) { + fprintf(stderr, "Duplicate or conflicting --annot argument\n"); + return false; + } + options->output_format = OUTPUT_ANNOT; #ifdef PDF_ENABLE_SKIA } else if (cur_arg == "--skp") { if (options->output_format != OUTPUT_NONE) { @@ -807,6 +964,10 @@ bool RenderPage(const std::string& name, WriteText(page.get(), name.c_str(), page_index); break; + case OUTPUT_ANNOT: + WriteAnnot(page.get(), name.c_str(), page_index); + break; + case OUTPUT_PNG: image_file_name = WritePng(name.c_str(), page_index, buffer, stride, width, height); diff --git a/testing/resources/annotation_highlight_long_content.pdf b/testing/resources/annotation_highlight_long_content.pdf Binary files differnew file mode 100644 index 0000000000..7fd9103c41 --- /dev/null +++ b/testing/resources/annotation_highlight_long_content.pdf diff --git a/testing/resources/annotation_ink_multiple.pdf b/testing/resources/annotation_ink_multiple.pdf Binary files differnew file mode 100644 index 0000000000..5c3d4a0f63 --- /dev/null +++ b/testing/resources/annotation_ink_multiple.pdf |