Verifying Files With Variable Parts

Applies to TestComplete 14.71, last modified on April 22, 2021

When testing applications, you may need to verify PDF files. For example, you may need to do this to make sure the application under test generates the correct output data (for example, a report in the PDF format).

The PDF file you need to verify may contain some variable information that changes from one application run to another while the other information remains unchanged. For example, it can contain a time stamp, a document ID, the owner name and so on. In this case, byte-by-byte comparison of the actual text content against the expected text content will always fail. To avoid this and verify PDF files with variable parts successfully, you can exclude the variable parts from verification:

  1. Get your tested PDF file.

  2. Use the PDF.ConvertToText method to get the text content of the file.

  3. Isolate and exclude the variable parts from the text content. To do this, you can use regular expressions or various scripting objects that TestComplete provides to work with strings.

    Note:

    JavaScript, JScript, Python, VBScript, C#Script and C++Script have internal support for regular expressions. To learn how to use regular expressions in these scripting languages, see:

    docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/scripting-articles

    For DelphiScript, regular expressions are implemented via the HISUtils.RegExpr object. This object can also be used in JavaScript, JScript, Python, VBScript, C#Script and C++Script code.

  4. Verify the resulting contents against the expected contents.

The code below demonstrates how to verify the contents of a PDF file ignoring date and time stamps:

JavaScript

function Main()
{
  let path1 = "C:\\work\\baseline.pdf";
  let path2 = "C:\\work\\report.pdf";

  if (ComparePDF(path1, path2))
    Log.Message("The text contents of specified PDF files are the same");

}

function ComparePDF(path1, path2)
{
  if (((path1 != "") && (aqFile.Exists(path1)) && (aqFileSystem.GetFileExtension(path1) == "pdf"))
    && ((path2 != "") && (aqFile.Exists(path2)) && (aqFileSystem.GetFileExtension(path2) == "pdf")))
    {
      // Get the text contents of PDF files
      let str1 = PDF.ConvertToText(path1);
      let str2 = PDF.ConvertToText(path2);

      // Use the regular expression
      // to replace the date/time stamp
      regEx = /\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}/gim;

      str1 = str1.replace(regEx, "<ignore>");
      str2 = str2.replace(regEx, "<ignore>");

      // Compare the resulting contents
      return equal(str1, str2);

    }
  else
    return false;
}

JScript

function Main()
{
  var path1 = "C:\\work\\baseline.pdf";
  var path2 = "C:\\work\\report.pdf";

  if (ComparePDF(path1, path2))
    Log.Message("The text contents of specified PDF files are the same");

}

function ComparePDF(path1, path2)
{
  if (((path1 != "") && (aqFile.Exists(path1)) && (aqFileSystem.GetFileExtension(path1) == "pdf"))
  && ((path2 != "") && (aqFile.Exists(path2)) && (aqFileSystem.GetFileExtension(path2) == "pdf")))
    {
      // Get the text contents of PDF files
      var str1 = PDF.ConvertToText(path1);
      var str2 = PDF.ConvertToText(path2);

      // Use the regular expression
      // to replace the date/time stamp
      regEx = /\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}/gim;

      str1 = str1.replace(regEx, "<ignore>");
      str2 = str2.replace(regEx, "<ignore>");

      // Compare the resulting contents
      return (str1 == str2);

  }
  else
    return false;
}

Python

def Main():
  path1 = "C:\\work\\baseline.pdf"
  path2 = "C:\\work\\report.pdf"

  if (ComparePDF(path1, path2)):
    Log.Message("The text contents of specified PDF files are the same")

def ComparePDF(path1, path2):
  if (path1 != "" and aqFile.Exists(path1) and aqFileSystem.GetFileExtension(path1) == "pdf" and \
  path2 != "" and aqFile.Exists(path2) and aqFileSystem.GetFileExtension(path2) == "pdf"):
    # Get the text contents of PDF files
    str1 = PDF.ConvertToText(path1)
    str2 = PDF.ConvertToText(path2)

    # Use the regular expression
    # to replace the date/time stamp
    regEx = "/\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}/gim"

    str1 = str1.replace(regEx, "<ignore>")
    str2 = str2.replace(regEx, "<ignore>")
   
    # Compare the resulting contents
    return (str1 == str2)

  else:
    return False

VBScript

Sub Main()

  path1 = "C:\work\baseline.pdf"
  path2 = "C:\work\report.pdf"

  If ComparePDF(path1, path2) Then
    Log.Message("The text contents of specified PDF files are the same")
  End If

End Sub

Function ComparePDF(path1, path2)
  If path1 <> "" And aqFile.Exists(path1) And aqFileSystem.GetFileExtension(path1) = "pdf" _
    And path2 <> "" And aqFile.Exists(path2) And aqFileSystem.GetFileExtension(path2) = "pdf" Then
      ' Get the text contents of PDF files
      str1 = PDF.ConvertToText(path1)
      str2 = PDF.ConvertToText(path2)

      ' Use the regular expression
      ' to replace the date/time stamp
      Set regEx = New RegExp
      regEx.Pattern = "\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}"
      regEx.IgnoreCase = True
      regEx.Global = True

      str1 = regEx.replace(str1, "<ignore>")
      str2 = regEx.replace(str2, "<ignore>")

      ' Compare the resulting contents
      ComparePDF = (str1 = str2)

  Else
    ComparePDF = False
  End If
End Function

DelphiScript

function ComparePDF(path1, path2);
var str1, str2;
var regEx;
begin

  if ((path1 <> '') and (aqFile.Exists(path1)) and (aqFileSystem.GetFileExtension(path1) = 'pdf'))
    and ((path2 <> '') and (aqFile.Exists(path2)) and (aqFileSystem.GetFileExtension(path2) = 'pdf')) then
    begin
      // Get the text contents of PDF files
      str1 := PDF.ConvertToText(path1);
      str2 := PDF.ConvertToText(path2);

      // Use the regular expression
      // to replace the date/time stamp
      regEx : = HISUtils.RegExpr;
      regEx.Expression := '\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}';

      str1 := regEx.Replace(str1, '<ignore>');
      str2 := regEx.Replace(str2, '<ignore>');
      // Compare the resulting contents
      result : = (str1 = str2);

    end
  else
    result := false;
end;

procedure Main();
var path1, path2;
begin
  path1 := 'C:\work\baseline.pdf';
  path2 := 'C:\work\report.pdf';

  if ComparePDF(path1, path2) then
    Log.Message('The text contents of specified PDF files are the same');

end;

C++Script, C#Script

function Main()
{
var path1 = "C:\\work\\baseline.pdf";
var path2 = "C:\\work\\report.pdf";

  if (ComparePDF(path1, path2))
    Log["Message"]("The text contents of specified PDF files are the same");

}

function ComparePDF(path1, path2)
{
  if (((path1 != "") && (aqFile["Exists"](path1)) && (aqFileSystem["GetFileExtension"](path1) == "pdf"))
    && ((path2 != "") && (aqFile["Exists"](path2)) && (aqFileSystem["GetFileExtension"](path2) == "pdf")))
    {
      // Get the text contents of PDF files
      var str1 = PDF["ConvertToText"](path1);
      var str2 = PDF["ConvertToText"](path2);

      // Use the regular expression
      // to replace the date/time stamp
      regEx = /\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}/gim;

      str1 = str1["replace"](regEx, "<ignore>");
      str2 = str2["replace"](regEx, "<ignore>");

      // Compare the resulting contents
      return (str1 == str2);

    }
  else
    return false;

}

See Also

PDF Checkpoints
Regular Expressions Syntax

Highlight search results