When testing applications, you may need to verify PDF files. For example, you may need to do this to make sure the application under test generates the correct output data (for example, a report in the PDF format).
The PDF file you need to verify may contain some variable information that changes from one application run to another while the other information remains unchanged. For example, it can contain a time stamp, a document ID, the owner name and so on. In this case, byte-by-byte comparison of the actual text content against the expected text content will always fail. To avoid this and verify PDF files with variable parts successfully, you can exclude the variable parts from verification:
-
Get your tested PDF file.
-
Use the
PDF.ConvertToText
method to get the text content of the file. -
Isolate and exclude the variable parts from the text content. To do this, you can use regular expressions or various scripting objects that TestComplete provides to work with strings.
Note: JavaScript, JScript, Python, VBScript, C#Script and C++Script have internal support for regular expressions. To learn how to use regular expressions in these scripting languages, see:
docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/scripting-articles
For DelphiScript, regular expressions are implemented via the
HISUtils.RegExpr
object. This object can also be used in JavaScript, JScript, Python, VBScript, C#Script and C++Script code. -
Verify the resulting contents against the expected contents.
The code below demonstrates how to verify the contents of a PDF file ignoring date and time stamps:
JavaScript
{
let path1 = "C:\\work\\baseline.pdf";
let path2 = "C:\\work\\report.pdf";
if (ComparePDF(path1, path2))
Log.Message("The text contents of specified PDF files are the same");
}
function ComparePDF(path1, path2)
{
if (((path1 != "") && (aqFile.Exists(path1)) && (aqFileSystem.GetFileExtension(path1) == "pdf"))
&& ((path2 != "") && (aqFile.Exists(path2)) && (aqFileSystem.GetFileExtension(path2) == "pdf")))
{
// Get the text contents of PDF files
let str1 = PDF.ConvertToText(path1);
let str2 = PDF.ConvertToText(path2);
// Use the regular expression
// to replace the date/time stamp
regEx = /\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}/gim;
str1 = str1.replace(regEx, "<ignore>");
str2 = str2.replace(regEx, "<ignore>");
// Compare the resulting contents
return equal(str1, str2);
}
else
return false;
}
JScript
{
var path1 = "C:\\work\\baseline.pdf";
var path2 = "C:\\work\\report.pdf";
if (ComparePDF(path1, path2))
Log.Message("The text contents of specified PDF files are the same");
}
function ComparePDF(path1, path2)
{
if (((path1 != "") && (aqFile.Exists(path1)) && (aqFileSystem.GetFileExtension(path1) == "pdf"))
&& ((path2 != "") && (aqFile.Exists(path2)) && (aqFileSystem.GetFileExtension(path2) == "pdf")))
{
// Get the text contents of PDF files
var str1 = PDF.ConvertToText(path1);
var str2 = PDF.ConvertToText(path2);
// Use the regular expression
// to replace the date/time stamp
regEx = /\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}/gim;
str1 = str1.replace(regEx, "<ignore>");
str2 = str2.replace(regEx, "<ignore>");
// Compare the resulting contents
return (str1 == str2);
}
else
return false;
}
Python
def Main():
path1 = "C:\\work\\baseline.pdf"
path2 = "C:\\work\\report.pdf"
if (ComparePDF(path1, path2)):
Log.Message("The text contents of specified PDF files are the same")
def ComparePDF(path1, path2):
if (path1 != "" and aqFile.Exists(path1) and aqFileSystem.GetFileExtension(path1) == "pdf" and \
path2 != "" and aqFile.Exists(path2) and aqFileSystem.GetFileExtension(path2) == "pdf"):
# Get the text contents of PDF files
str1 = PDF.ConvertToText(path1)
str2 = PDF.ConvertToText(path2)
# Use the regular expression
# to replace the date/time stamp
regEx = "/\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}/gim"
str1 = str1.replace(regEx, "<ignore>")
str2 = str2.replace(regEx, "<ignore>")
# Compare the resulting contents
return (str1 == str2)
else:
return False
VBScript
path1 = "C:\work\baseline.pdf"
path2 = "C:\work\report.pdf"
If ComparePDF(path1, path2) Then
Log.Message("The text contents of specified PDF files are the same")
End If
End Sub
Function ComparePDF(path1, path2)
If path1 <> "" And aqFile.Exists(path1) And aqFileSystem.GetFileExtension(path1) = "pdf" _
And path2 <> "" And aqFile.Exists(path2) And aqFileSystem.GetFileExtension(path2) = "pdf" Then
' Get the text contents of PDF files
str1 = PDF.ConvertToText(path1)
str2 = PDF.ConvertToText(path2)
' Use the regular expression
' to replace the date/time stamp
Set regEx = New RegExp
regEx.Pattern = "\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}"
regEx.IgnoreCase = True
regEx.Global = True
str1 = regEx.replace(str1, "<ignore>")
str2 = regEx.replace(str2, "<ignore>")
' Compare the resulting contents
ComparePDF = (str1 = str2)
Else
ComparePDF = False
End If
End Function
DelphiScript
var str1, str2;
var regEx;
begin
if ((path1 <> '') and (aqFile.Exists(path1)) and (aqFileSystem.GetFileExtension(path1) = 'pdf'))
and ((path2 <> '') and (aqFile.Exists(path2)) and (aqFileSystem.GetFileExtension(path2) = 'pdf')) then
begin
// Get the text contents of PDF files
str1 := PDF.ConvertToText(path1);
str2 := PDF.ConvertToText(path2);
// Use the regular expression
// to replace the date/time stamp
regEx : = HISUtils.RegExpr;
regEx.Expression := '\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}';
str1 := regEx.Replace(str1, '<ignore>');
str2 := regEx.Replace(str2, '<ignore>');
// Compare the resulting contents
result : = (str1 = str2);
end
else
result := false;
end;
procedure Main();
var path1, path2;
begin
path1 := 'C:\work\baseline.pdf';
path2 := 'C:\work\report.pdf';
if ComparePDF(path1, path2) then
Log.Message('The text contents of specified PDF files are the same');
end;
C++Script, C#Script
{
var path1 = "C:\\work\\baseline.pdf";
var path2 = "C:\\work\\report.pdf";
if (ComparePDF(path1, path2))
Log["Message"]("The text contents of specified PDF files are the same");
}
function ComparePDF(path1, path2)
{
if (((path1 != "") && (aqFile["Exists"](path1)) && (aqFileSystem["GetFileExtension"](path1) == "pdf"))
&& ((path2 != "") && (aqFile["Exists"](path2)) && (aqFileSystem["GetFileExtension"](path2) == "pdf")))
{
// Get the text contents of PDF files
var str1 = PDF["ConvertToText"](path1);
var str2 = PDF["ConvertToText"](path2);
// Use the regular expression
// to replace the date/time stamp
regEx = /\d{1,2}.\d{1,2}.\d{2,4}\s\d{1,2}:\d{2}:\d{2}\s\w{2}/gim;
str1 = str1["replace"](regEx, "<ignore>");
str2 = str2["replace"](regEx, "<ignore>");
// Compare the resulting contents
return (str1 == str2);
}
else
return false;
}