Most of the applications has PDF validation functionality. Apache-PDF box helps to achieving this in automation.
Add maven dependency
<dependency>
<groupid>org.apache.pdfbox</groupid>
<artifactid>pdfbox</artifactid>
<version>${pdfbox.version}</version>
</dependency>
Here is the sample scripts to extract the text from PDF and then validate if the text you are looking is present in the PDF document or not
package com.selcukes.pdf;
import io.github.selcukes.wdb.WebDriverBinary;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.net.URL;
public class ReadPDF {
WebDriver driver;
@BeforeClass
public void setUp() {
WebDriverBinary.chromeDriver().setup();
driver = new ChromeDriver();
}
/**
* To verify PDF content in the pdf document
*/
@Test
public void testVerifyPDFTextInBrowser() throws IOException {
driver.get("http://www.princexml.com/samples/");
driver.findElement(By.xpath("//a[contains(@href,'dictionary.pdf')]")).click();
Assert.assertTrue(verifyPDFContent(driver.getCurrentUrl(), "Old Icelandic"));
}
/**
* To verify pdf in the URL
*/
@Test
public void testVerifyPDFInURL() {
driver.get("http://www.princexml.com/samples/");
driver.findElement(By.xpath("//a[contains(@href,'dictionary.pdf')]")).click();
String getURL = driver.getCurrentUrl();
Assert.assertTrue(getURL.contains(".pdf"));
}
public boolean verifyPDFContent(String strURL, String reqTextInPDF) throws IOException {
URL pdfUrl = new URL(strURL);
BufferedInputStream TestFile = new BufferedInputStream(pdfUrl.openStream());
PDDocument pdDoc = PDDocument.load(TestFile);
PDFTextStripper pdfStripper = new PDFTextStripper();
//Specify the range which will read first five pages of the PDF
pdfStripper.setStartPage(1);
pdfStripper.setEndPage(5);
String parsedText = pdfStripper.getText(pdDoc);
return parsedText.contains(reqTextInPDF);
}
@AfterClass
public void tearDown() {
driver.quit();
}
}

0 Comments