From 4fd9a47465c0f2f17aa2a840d8452b2bee6bec33 Mon Sep 17 00:00:00 2001 From: Jane Liu Date: Thu, 1 Jun 2017 18:56:09 -0400 Subject: Basic APIs and tests for extracting annotations 1. Added API for extracting annotation properties: * Added testing flag "--annot" that outputs the annotation properties into a .txt file. * Added two embedder tests covering all the API functions. Bug=pdfium:737 Change-Id: I95943a9b2b3d5d431bc8a74a31b27b4f4b521026 Reviewed-on: https://pdfium-review.googlesource.com/6092 Commit-Queue: Jane Liu Reviewed-by: Lei Zhang Reviewed-by: dsinclair --- fpdfsdk/fpdfannot.cpp | 238 +++++++++++++++++++++++++++++++++++++ fpdfsdk/fpdfannot_embeddertest.cpp | 132 ++++++++++++++++++++ fpdfsdk/fpdfview.cpp | 9 ++ fpdfsdk/fpdfview_c_api_test.c | 10 ++ fpdfsdk/fsdk_define.h | 2 + 5 files changed, 391 insertions(+) create mode 100644 fpdfsdk/fpdfannot.cpp create mode 100644 fpdfsdk/fpdfannot_embeddertest.cpp (limited to 'fpdfsdk') diff --git a/fpdfsdk/fpdfannot.cpp b/fpdfsdk/fpdfannot.cpp new file mode 100644 index 0000000000..6e5b7ba063 --- /dev/null +++ b/fpdfsdk/fpdfannot.cpp @@ -0,0 +1,238 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "public/fpdf_annot.h" + +#include "core/fpdfapi/page/cpdf_page.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfdoc/cpdf_annot.h" +#include "core/fpdfdoc/cpvt_color.h" +#include "core/fpdfdoc/cpvt_generateap.h" +#include "fpdfsdk/fsdk_define.h" + +// These checks ensure the consistency of annotation subtype values across core/ +// and public. +static_assert(static_cast(CPDF_Annot::Subtype::UNKNOWN) == + FPDF_ANNOT_UNKNOWN, + "CPDF_Annot::UNKNOWN value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::TEXT) == FPDF_ANNOT_TEXT, + "CPDF_Annot::TEXT value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::LINK) == FPDF_ANNOT_LINK, + "CPDF_Annot::LINK value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::FREETEXT) == + FPDF_ANNOT_FREETEXT, + "CPDF_Annot::FREETEXT value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::LINE) == FPDF_ANNOT_LINE, + "CPDF_Annot::LINE value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::SQUARE) == + FPDF_ANNOT_SQUARE, + "CPDF_Annot::SQUARE value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::CIRCLE) == + FPDF_ANNOT_CIRCLE, + "CPDF_Annot::CIRCLE value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::POLYGON) == + FPDF_ANNOT_POLYGON, + "CPDF_Annot::POLYGON value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::POLYLINE) == + FPDF_ANNOT_POLYLINE, + "CPDF_Annot::POLYLINE value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::HIGHLIGHT) == + FPDF_ANNOT_HIGHLIGHT, + "CPDF_Annot::HIGHLIGHT value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::UNDERLINE) == + FPDF_ANNOT_UNDERLINE, + "CPDF_Annot::UNDERLINE value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::SQUIGGLY) == + FPDF_ANNOT_SQUIGGLY, + "CPDF_Annot::SQUIGGLY value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::STRIKEOUT) == + FPDF_ANNOT_STRIKEOUT, + "CPDF_Annot::STRIKEOUT value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::STAMP) == FPDF_ANNOT_STAMP, + "CPDF_Annot::STAMP value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::CARET) == FPDF_ANNOT_CARET, + "CPDF_Annot::CARET value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::INK) == FPDF_ANNOT_INK, + "CPDF_Annot::INK value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::POPUP) == FPDF_ANNOT_POPUP, + "CPDF_Annot::POPUP value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::FILEATTACHMENT) == + FPDF_ANNOT_FILEATTACHMENT, + "CPDF_Annot::FILEATTACHMENT value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::SOUND) == FPDF_ANNOT_SOUND, + "CPDF_Annot::SOUND value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::MOVIE) == FPDF_ANNOT_MOVIE, + "CPDF_Annot::MOVIE value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::WIDGET) == + FPDF_ANNOT_WIDGET, + "CPDF_Annot::WIDGET value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::SCREEN) == + FPDF_ANNOT_SCREEN, + "CPDF_Annot::SCREEN value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::PRINTERMARK) == + FPDF_ANNOT_PRINTERMARK, + "CPDF_Annot::PRINTERMARK value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::TRAPNET) == + FPDF_ANNOT_TRAPNET, + "CPDF_Annot::TRAPNET value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::WATERMARK) == + FPDF_ANNOT_WATERMARK, + "CPDF_Annot::WATERMARK value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::THREED) == + FPDF_ANNOT_THREED, + "CPDF_Annot::THREED value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::RICHMEDIA) == + FPDF_ANNOT_RICHMEDIA, + "CPDF_Annot::RICHMEDIA value mismatch"); +static_assert(static_cast(CPDF_Annot::Subtype::XFAWIDGET) == + FPDF_ANNOT_XFAWIDGET, + "CPDF_Annot::XFAWIDGET value mismatch"); + +DLLEXPORT int STDCALL FPDFPage_GetAnnotCount(FPDF_PAGE page) { + CPDF_Page* pPage = CPDFPageFromFPDFPage(page); + if (!pPage || !pPage->m_pFormDict) + return 0; + CPDF_Array* pAnnots = pPage->m_pFormDict->GetArrayFor("Annots"); + return pAnnots ? pAnnots->GetCount() : 0; +} + +DLLEXPORT FPDF_BOOL STDCALL FPDFPage_GetAnnot(FPDF_PAGE page, + int index, + FPDF_ANNOTATION* annot) { + CPDF_Page* pPage = CPDFPageFromFPDFPage(page); + if (!pPage || !pPage->m_pFormDict || index < 0 || !annot) + return false; + CPDF_Array* pAnnots = pPage->m_pFormDict->GetArrayFor("Annots"); + if (!pAnnots || static_cast(index) >= pAnnots->GetCount()) + return false; + + CPDF_Dictionary* pDict = ToDictionary(pAnnots->GetDirectObjectAt(index)); + *annot = FPDFAnnotationFromCPDFDictionary(pDict); + return *annot ? true : false; +} + +DLLEXPORT FPDF_ANNOTATION_SUBTYPE STDCALL +FPDFAnnot_GetSubtype(FPDF_ANNOTATION annot) { + CPDF_Dictionary* pAnnotDict = CPDFDictionaryFromFPDFAnnotation(annot); + if (!pAnnotDict) + return FPDF_ANNOT_UNKNOWN; + return static_cast( + CPDF_Annot::StringToAnnotSubtype(pAnnotDict->GetStringFor("Subtype"))); +} + +DLLEXPORT FPDF_BOOL STDCALL FPDFAnnot_GetColor(FPDF_ANNOTATION annot, + FPDFANNOT_COLORTYPE type, + unsigned int* R, + unsigned int* G, + unsigned int* B, + unsigned int* A) { + CPDF_Dictionary* pAnnotDict = CPDFDictionaryFromFPDFAnnotation(annot); + if (!pAnnotDict || !R || !G || !B || !A) + return false; + + CPDF_Array* pColor = pAnnotDict->GetArrayFor( + type == FPDFANNOT_COLORTYPE_InteriorColor ? "IC" : "C"); + *A = + (pAnnotDict->KeyExist("CA") ? pAnnotDict->GetNumberFor("CA") : 1) * 255.f; + if (!pColor) { + // Use default color. The default colors must be consistent with the ones + // used to generate AP. See calls to GetColorStringWithDefault() in + // CPVT_GenerateAP::Generate*AP(). + if (pAnnotDict->GetStringFor("Subtype") == "Highlight") { + *R = 255; + *G = 255; + *B = 0; + } else { + *R = 0; + *G = 0; + *B = 0; + } + return true; + } + CPVT_Color color = CPVT_Color::ParseColor(*pColor); + switch (color.nColorType) { + case CPVT_Color::kRGB: + *R = color.fColor1 * 255.f; + *G = color.fColor2 * 255.f; + *B = color.fColor3 * 255.f; + break; + case CPVT_Color::kGray: + *R = 255.f * color.fColor1; + *G = 255.f * color.fColor1; + *B = 255.f * color.fColor1; + break; + case CPVT_Color::kCMYK: + *R = 255.f * (1 - color.fColor1) * (1 - color.fColor4); + *G = 255.f * (1 - color.fColor2) * (1 - color.fColor4); + *B = 255.f * (1 - color.fColor3) * (1 - color.fColor4); + break; + case CPVT_Color::kTransparent: + *R = 0; + *G = 0; + *B = 0; + break; + } + return true; +} + +DLLEXPORT FPDF_BOOL STDCALL +FPDFAnnot_HasAttachmentPoints(FPDF_ANNOTATION annot) { + if (!annot) + return false; + FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot); + return subtype == FPDF_ANNOT_LINK || subtype == FPDF_ANNOT_HIGHLIGHT || + subtype == FPDF_ANNOT_UNDERLINE || subtype == FPDF_ANNOT_SQUIGGLY || + subtype == FPDF_ANNOT_STRIKEOUT; +} + +DLLEXPORT FPDF_BOOL STDCALL +FPDFAnnot_GetAttachmentPoints(FPDF_ANNOTATION annot, + FS_QUADPOINTSF* quadPoints) { + if (!annot || !quadPoints || !FPDFAnnot_HasAttachmentPoints(annot)) + return false; + CPDF_Array* pArray = + CPDFDictionaryFromFPDFAnnotation(annot)->GetArrayFor("QuadPoints"); + if (!pArray) + return false; + quadPoints->x1 = pArray->GetNumberAt(0); + quadPoints->y1 = pArray->GetNumberAt(1); + quadPoints->x2 = pArray->GetNumberAt(2); + quadPoints->y2 = pArray->GetNumberAt(3); + quadPoints->x3 = pArray->GetNumberAt(4); + quadPoints->y3 = pArray->GetNumberAt(5); + quadPoints->x4 = pArray->GetNumberAt(6); + quadPoints->y4 = pArray->GetNumberAt(7); + return true; +} + +DLLEXPORT FPDF_BOOL STDCALL FPDFAnnot_GetRect(FPDF_ANNOTATION annot, + FS_RECTF* rect) { + CPDF_Dictionary* pAnnotDict = CPDFDictionaryFromFPDFAnnotation(annot); + if (!rect || !pAnnotDict) + return false; + CFX_FloatRect rt = pAnnotDict->GetRectFor("Rect"); + rect->left = rt.left; + rect->bottom = rt.bottom; + rect->right = rt.right; + rect->top = rt.top; + return true; +} + +DLLEXPORT unsigned long STDCALL FPDFAnnot_GetText(FPDF_ANNOTATION annot, + FPDFANNOT_TEXTTYPE type, + char* buffer, + unsigned long buflen) { + CPDF_Dictionary* pAnnotDict = CPDFDictionaryFromFPDFAnnotation(annot); + if (!pAnnotDict) + return 0; + CFX_ByteString key = type == FPDFANNOT_TEXTTYPE_Author ? "T" : "Contents"; + CFX_ByteString contents = pAnnotDict->GetUnicodeTextFor(key).UTF16LE_Encode(); + unsigned long len = contents.GetLength(); + if (buffer && buflen >= len) + memcpy(buffer, contents.c_str(), len); + return len; +} diff --git a/fpdfsdk/fpdfannot_embeddertest.cpp b/fpdfsdk/fpdfannot_embeddertest.cpp new file mode 100644 index 0000000000..27a76eecfa --- /dev/null +++ b/fpdfsdk/fpdfannot_embeddertest.cpp @@ -0,0 +1,132 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "public/fpdf_annot.h" +#include "public/fpdfview.h" +#include "testing/embedder_test.h" +#include "testing/gtest/include/gtest/gtest.h" + +class FPDFAnnotEmbeddertest : public EmbedderTest {}; + +TEST_F(FPDFAnnotEmbeddertest, ExtractHighlightLongContent) { + // Open a file with one annotation and load its first page. + ASSERT_TRUE(OpenDocument("annotation_highlight_long_content.pdf")); + FPDF_PAGE page = FPDF_LoadPage(document(), 0); + ASSERT_TRUE(page); + + // Check that there is a total of 1 annotation on its first page. + EXPECT_EQ(1, FPDFPage_GetAnnotCount(page)); + + // Check that the annotation is of type "highlight". + FPDF_ANNOTATION annot; + ASSERT_TRUE(FPDFPage_GetAnnot(page, 0, &annot)); + EXPECT_EQ(FPDF_ANNOT_HIGHLIGHT, FPDFAnnot_GetSubtype(annot)); + + // Check that the annotation color is yellow. + unsigned int R; + unsigned int G; + unsigned int B; + unsigned int A; + EXPECT_TRUE( + FPDFAnnot_GetColor(annot, FPDFANNOT_COLORTYPE_Color, &R, &G, &B, &A)); + EXPECT_EQ(255u, R); + EXPECT_EQ(255u, G); + EXPECT_EQ(0u, B); + EXPECT_EQ(255u, A); + + // Check that the author is correct. + unsigned long len = + FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Author, nullptr, 0); + std::vector buf(len); + EXPECT_EQ(28u, FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Author, buf.data(), + len)); + EXPECT_STREQ(L"Jae Hyun Park", + GetPlatformWString(reinterpret_cast(buf.data())) + .c_str()); + + // Check that the content is correct. + len = FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Contents, nullptr, 0); + buf.clear(); + buf.resize(len); + EXPECT_EQ(2690u, FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Contents, + buf.data(), len)); + const wchar_t contents[] = + L"This is a note for that highlight annotation. Very long highlight " + "annotation. Long long long Long long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long longLong long longLong long longLong long longLong long " + "longLong long long. END"; + EXPECT_STREQ(contents, + GetPlatformWString(reinterpret_cast(buf.data())) + .c_str()); + + // Check that the quadpoints are correct. + FS_QUADPOINTSF quadpoints; + ASSERT_TRUE(FPDFAnnot_GetAttachmentPoints(annot, &quadpoints)); + EXPECT_EQ(115.802643f, quadpoints.x1); + EXPECT_EQ(718.913940f, quadpoints.y1); + EXPECT_EQ(157.211182f, quadpoints.x4); + EXPECT_EQ(706.264465f, quadpoints.y4); + + UnloadPage(page); +} + +TEST_F(FPDFAnnotEmbeddertest, ExtractInkMultiple) { + // Open a file with three annotations and load its first page. + ASSERT_TRUE(OpenDocument("annotation_ink_multiple.pdf")); + FPDF_PAGE page = FPDF_LoadPage(document(), 0); + ASSERT_TRUE(page); + + // Check that there is a total of 3 annotation on its first page. + EXPECT_EQ(3, FPDFPage_GetAnnotCount(page)); + + // Check that the third annotation of type "ink". + FPDF_ANNOTATION annot; + ASSERT_TRUE(FPDFPage_GetAnnot(page, 2, &annot)); + EXPECT_EQ(FPDF_ANNOT_INK, FPDFAnnot_GetSubtype(annot)); + + // Check that the annotation color is blue with opacity. + unsigned int R; + unsigned int G; + unsigned int B; + unsigned int A; + EXPECT_TRUE( + FPDFAnnot_GetColor(annot, FPDFANNOT_COLORTYPE_Color, &R, &G, &B, &A)); + EXPECT_EQ(0u, R); + EXPECT_EQ(0u, G); + EXPECT_EQ(255u, B); + EXPECT_EQ(76u, A); + + // Check that there is no content. + EXPECT_EQ(2u, + FPDFAnnot_GetText(annot, FPDFANNOT_TEXTTYPE_Contents, nullptr, 0)); + + // Check that the rectange coordinates are correct. + // Note that upon rendering, the rectangle coordinates will be adjusted. + FS_RECTF rect; + ASSERT_TRUE(FPDFAnnot_GetRect(annot, &rect)); + EXPECT_EQ(351.820404f, rect.left); + EXPECT_EQ(583.830688f, rect.bottom); + EXPECT_EQ(475.336090f, rect.right); + EXPECT_EQ(681.535034f, rect.top); + + UnloadPage(page); +} diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp index f20e8ab9f4..0dcb25aaca 100644 --- a/fpdfsdk/fpdfview.cpp +++ b/fpdfsdk/fpdfview.cpp @@ -17,6 +17,7 @@ #include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/page/cpdf_pageobject.h" #include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fpdfapi/render/cpdf_progressiverenderer.h" @@ -303,6 +304,14 @@ CPDF_Page* CPDFPageFromFPDFPage(FPDF_PAGE page) { #endif // PDF_ENABLE_XFA } +FPDF_ANNOTATION FPDFAnnotationFromCPDFDictionary(CPDF_Dictionary* pDict) { + return static_cast(pDict); +} + +CPDF_Dictionary* CPDFDictionaryFromFPDFAnnotation(FPDF_ANNOTATION annot) { + return static_cast(annot); +} + CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap) { return static_cast(bitmap); } diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c index 37f1d91de2..5a65fc4111 100644 --- a/fpdfsdk/fpdfview_c_api_test.c +++ b/fpdfsdk/fpdfview_c_api_test.c @@ -9,6 +9,7 @@ #include "fpdfsdk/fpdfview_c_api_test.h" +#include "public/fpdf_annot.h" #include "public/fpdf_dataavail.h" #include "public/fpdf_doc.h" #include "public/fpdf_edit.h" @@ -33,6 +34,15 @@ fnptr g_c_api_test_fnptr = NULL; // Extern, so can't know it doesn't change. // Function to call from gtest harness to ensure linker resolution. int CheckPDFiumCApi() { + //fpdf_annot.h + CHK(FPDFPage_GetAnnotCount); + CHK(FPDFPage_GetAnnot); + CHK(FPDFAnnot_GetSubtype); + CHK(FPDFAnnot_GetColor); + CHK(FPDFAnnot_GetAttachmentPoints); + CHK(FPDFAnnot_GetRect); + CHK(FPDFAnnot_GetText); + // fpdf_dataavail.h CHK(FPDFAvail_Create); CHK(FPDFAvail_Destroy); diff --git a/fpdfsdk/fsdk_define.h b/fpdfsdk/fsdk_define.h index e14cc19ea1..e49977976b 100644 --- a/fpdfsdk/fsdk_define.h +++ b/fpdfsdk/fsdk_define.h @@ -61,6 +61,8 @@ FPDF_DOCUMENT FPDFDocumentFromCPDFDocument(CPDF_Document* doc); CPDF_Page* CPDFPageFromFPDFPage(FPDF_PAGE page); +FPDF_ANNOTATION FPDFAnnotationFromCPDFDictionary(CPDF_Dictionary* pDict); +CPDF_Dictionary* CPDFDictionaryFromFPDFAnnotation(FPDF_ANNOTATION annot); CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap); void FSDK_SetSandBoxPolicy(FPDF_DWORD policy, FPDF_BOOL enable); -- cgit v1.2.3