Verify PDF text using Selenium WebDriver

Most of the applications has PDF validation functionality. Apache-PDF box helps to achieving this in automation.

Add maven dependency

 <dependency>
    <groupid>org.apache.pdfbox</groupid>
    <artifactid>pdfbox</artifactid>
    <version>${pdfbox.version}</version>
 </dependency>

Here is the sample scripts to extract the text from PDF and then validate if the text you are looking is present in the PDF document or not

package com.selcukes.pdf;

import io.github.selcukes.wdb.WebDriverBinary;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.net.URL;

public class ReadPDF {

    WebDriver driver;

    @BeforeClass
    public void setUp() {
        WebDriverBinary.chromeDriver().setup();
        driver = new ChromeDriver();
    }

    /**
     * To verify PDF content in the pdf document
     */
    @Test
    public void testVerifyPDFTextInBrowser() throws IOException {

        driver.get("http://www.princexml.com/samples/");
        driver.findElement(By.xpath("//a[contains(@href,'dictionary.pdf')]")).click();
        Assert.assertTrue(verifyPDFContent(driver.getCurrentUrl(), "Old Icelandic"));
    }

    /**
     * To verify pdf in the URL
     */
    @Test
    public void testVerifyPDFInURL() {
        driver.get("http://www.princexml.com/samples/");
        driver.findElement(By.xpath("//a[contains(@href,'dictionary.pdf')]")).click();
        String getURL = driver.getCurrentUrl();
        Assert.assertTrue(getURL.contains(".pdf"));
    }

    public boolean verifyPDFContent(String strURL, String reqTextInPDF) throws IOException {
        URL pdfUrl = new URL(strURL);
        BufferedInputStream TestFile = new BufferedInputStream(pdfUrl.openStream());
        PDDocument pdDoc = PDDocument.load(TestFile);
        PDFTextStripper pdfStripper = new PDFTextStripper();
        //Specify the range which will read first five pages of the PDF
        pdfStripper.setStartPage(1);
        pdfStripper.setEndPage(5);
        String parsedText = pdfStripper.getText(pdDoc);
        return parsedText.contains(reqTextInPDF);
    }

    @AfterClass
    public void tearDown() {
        driver.quit();
    }
}

Header Ads

Verify PDF text using Selenium WebDriver

Post a Comment

0 Comments

Facebook

Recent Posts

Categories

Contact Us

Menu Footer Widget

Header Ads

Verify PDF text using Selenium WebDriver

You may like these posts

Post a Comment

0 Comments

Facebook

Recent Posts

Categories

Contact Us

Menu Footer Widget