diff --git a/Changes b/Changes new file mode 100644 index 0000000..aa245cc --- /dev/null +++ b/Changes @@ -0,0 +1,14 @@ +Revision history for SelectPdf Online API client for Perl. + +1.4.0 + + - Added Pdf Merge client. + - Added Pdf to Text client. + +1.3.0 + - Modified package structure. Added more documentation. + +1.2.2 + - Initial version for HtmlToPdfClient. + + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..d438a28 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,18 @@ +Changes +Makefile.PL +MANIFEST +README +t/SelectPdf.t +samples/html-to-pdf-main.pl +samples/html-to-pdf-headers-and-footers.pl +samples/pdf-merge.pl +samples/pdf-to-text.pl +samples/search-pdf.pl +lib/SelectPdf.pm +lib/SelectPdf/ApiClient.pm +lib/SelectPdf/AsyncJobClient.pm +lib/SelectPdf/HtmlToPdfClient.pm +lib/SelectPdf/UsageClient.pm +lib/SelectPdf/WebElementsClient.pm +lib/SelectPdf/PdfMergeClient.pm +lib/SelectPdf/PdfToTextClient.pm diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..d3a44aa --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,51 @@ +use 5.020001; +use ExtUtils::MakeMaker; +WriteMakefile( + NAME => 'SelectPdf', + VERSION_FROM => 'lib/SelectPdf.pm', + PREREQ_PM => { + 'LWP::UserAgent' => 0, + 'HTTP::Request::Common' => 0, + 'HTTP::Status' => 0, + 'JSON' => 0 + }, + ABSTRACT => 'SelectPdf Online REST API Perl Client (HTML to PDF, PDF merge, PDF to text, search PDF)', + AUTHOR => 'SelectPdf ', + LICENSE => 'perl', + META_ADD => { + provides => { + 'SelectPdf' => { + file => 'lib/SelectPdf.pm', + version => '1.4.0', + }, + 'SelectPdf::ApiClient' => { + file => 'lib/SelectPdf/ApiClient.pm', + version => '1.4.0', + }, + 'SelectPdf::AsyncJobClient' => { + file => 'lib/SelectPdf/AsyncJobClient.pm', + version => '1.4.0', + }, + 'SelectPdf::HtmlToPdfClient' => { + file => 'lib/SelectPdf/HtmlToPdfClient.pm', + version => '1.4.0', + }, + 'SelectPdf::UsageClient' => { + file => 'lib/SelectPdf/UsageClient.pm', + version => '1.4.0', + }, + 'SelectPdf::WebElementsClient' => { + file => 'lib/SelectPdf/WebElementsClient.pm', + version => '1.4.0', + }, + 'SelectPdf::PdfMergeClient' => { + file => 'lib/SelectPdf/PdfMergeClient.pm', + version => '1.4.0', + }, + 'SelectPdf::PdfToTextClient' => { + file => 'lib/SelectPdf/PdfToTextClient.pm', + version => '1.4.0', + }, + }, + } +); diff --git a/README b/README new file mode 100644 index 0000000..bdac401 --- /dev/null +++ b/README @@ -0,0 +1,33 @@ +SelectPdf version 1.4.0 +======================= + +This is a Perl version for SelectPdf Online REST API client library. +It contains: HTML to PDF client, PDF Merge client, PDF to Text client. + +INSTALLATION + +To install this module type the following (use make, dmake or nmake depending on your system): + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries: + +LWP::UserAgent +HTTP::Request::Common +HTTP::Status +JSON + +COPYRIGHT AND LICENCE + +Copyright (C) 2021 by SelectPdf + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.20.1 or, +at your option, any later version of Perl 5 you may have available. + + diff --git a/README.md b/README.md index 11d62f8..d4b7b59 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,140 @@ -# selectpdf-api-perl-client -Perl client for SelectPdf Online REST API +# SelectPdf Online REST API - Perl Client + +## HTML To PDF API - Perl Client + +SelectPdf HTML To PDF Online REST API is a professional solution that lets you create PDF from web pages and raw HTML code in your applications. The API is easy to use and the integration takes only a few lines of code. + +## Features + +* Create PDF from any web page or html string. +* Full html5/css3/javascript support. +* Set PDF options such as page size and orientation, margins, security, web page settings. +* Set PDF viewer options and PDF document information. +* Create custom headers and footers for the pdf document. +* Hide web page elements during the conversion. +* Automatically generate bookmarks during the html to pdf conversion. +* Support for partial page conversion. +* Easy integration, no third party libraries needed. +* Works in all programming languages. +* No installation required. + +Sign up for for free to get instant API access to SelectPdf [HTML to PDF API](https://selectpdf.com/html-to-pdf-api/). + +## Pdf Merge API + +SelectPdf offers a REST API that can be used to merge PDF documents from local disk or remote url. + +## Pdf To Text API + +SelectPdf offers a REST API that can be used to extract text from local or remote PDF documents and search in existing PDF documents. + + +## Installation + +Download [selectpdf-api-perl-client-1.4.0.zip](https://github.com/selectpdf/selectpdf-api-perl-client/releases/download/1.4.0/selectpdf-api-python-client-1.4.0.zip), unzip it and run: + +``` +cd selectpdf-api-perl-client-1.4.0 +perl Makefile.PL +make +make test +make install +``` + +OR + +Install SelectPdf Perl Client for Online API via CPAN: [SelectPdf on CPAN](https://metacpan.org/dist/SelectPdf). + +``` +cpanm SelectPdf +``` + +OR + +Clone [selectpdf-api-perl-client](https://github.com/selectpdf/selectpdf-api-perl-client) from Github and install the library. + +``` +git clone https://github.com/selectpdf/selectpdf-api-perl-client +cd selectpdf-api-perl-client +perl Makefile.PL +make +make test +make install +``` + +## Sample Code + +``` +local $| = 1; + +use strict; +use JSON; +use SelectPdf; + +print "This is SelectPdf-$SelectPdf::VERSION.\n"; + +my $url = "https://selectpdf.com/"; +my $local_file = "Test.pdf"; +my $apiKey = "Your API key here"; + +eval { + my $client = new SelectPdf::HtmlToPdfClient($apiKey); + + # set parameters - see full list at https://selectpdf.com/html-to-pdf-api/ + $client + # main properties + + ->setPageSize("A4") # PDF page size + ->setPageOrientation("Portrait") # PDF page orientation + ->setMargins(0) # PDF page margins + ->setRenderingEngine('WebKit') # rendering engine + ->setConversionDelay(1) # conversion delay + ->setNavigationTimeout(30) # navigation timeout + ->setShowPageNumbers('False') # page numbers + ->setPageBreaksEnhancedAlgorithm('True') # enhanced page break algorithm + + # additional properties + + #->setUseCssPrint('True') # enable CSS media print + #->setDisableJavascript('True') # disable javascript + #->setDisableInternalLinks('True') # disable internal links + #->setDisableExternalLinks('True') # disable external links + #->setKeepImagesTogether('True') # keep images together + #->setScaleImages('True') # scale images to create smaller pdfs + #->setSinglePagePdf('True') # generate a single page PDF + #->setUserPassword('password') # secure the PDF with a password + + # generate automatic bookmarks + + #->setPdfBookmarksSelectors("H1, H2") # create outlines (bookmarks) for the specified elements + #->setViewerPageMode(1) # 1 (Use Outlines) - display outlines (bookmarks) in viewer + ; + + print "Starting conversion ...\n"; + + # convert url to file + $client->convertUrlToFile($url, $local_file); + + # convert url to memory + # my $pdf = $client->convertUrl($url); + + # convert html string to file + # $client->convertHtmlStringToFile("This is some html.", $local_file); + + # convert html string to memory + # my $pdf = $client->convertHtmlString("This is some html."); + + print "Finished! Number of pages: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); +}; + +if ($@) { + print "An error occurred: $@\n"; +} + +``` \ No newline at end of file diff --git a/lib/SelectPdf.pm b/lib/SelectPdf.pm new file mode 100644 index 0000000..96bfb7b --- /dev/null +++ b/lib/SelectPdf.pm @@ -0,0 +1,196 @@ +package SelectPdf; + +our $VERSION = '1.4.0'; + +require SelectPdf::HtmlToPdfClient; +require SelectPdf::PdfMergeClient; +require SelectPdf::PdfToTextClient; +require SelectPdf::UsageClient; + +=head1 NAME + +SelectPdf - SelectPdf Online REST API client library for Perl. Contains HTML to PDF converter, PDF merge, PDF to text extractor, search PDF. + +=head1 SYNOPSIS + + use SelectPdf; + print "This is SelectPdf-$SelectPdf::VERSION\n"; + +Convert HTML to PDF + + use SelectPdf; + print "This is SelectPdf-$SelectPdf::VERSION\n"; + + my $url = "https://selectpdf.com/"; + my $local_file = "Test.pdf"; + my $apiKey = "Your API key here"; + + eval { + my $client = new HtmlToPdfClient($apiKey); + + $client + ->setPageSize("A4") + ->setMargins(0) + ->setShowPageNumbers('False') + ->setPageBreaksEnhancedAlgorithm('True') + ; + + $client->convertUrlToFile($url, $local_file); + }; + + if ($@) { + print "An error occurred: $@\n"; + } + + +Merge PDFs from local disk or public url and save result into a file on disk. + + use SelectPdf; + print "This is SelectPdf-$SelectPdf::VERSION\n"; + + my $test_url = "https://selectpdf.com/demo/files/selectpdf.pdf"; + my $test_pdf = "Input.pdf"; + my $local_file = "Result.pdf"; + my $apiKey = "Your API key here"; + + eval { + my $client = new SelectPdf::PdfMergeClient($apiKey); + + # set parameters - see full list at https://selectpdf.com/pdf-merge-api/ + $client + # specify the pdf files that will be merged (order will be preserved in the final pdf) + + ->addFile($test_pdf) # add PDF from local file + ->addUrlFile($test_url) # add PDF From public url + #->addFileWithPassword($test_pdf, "pdf_password") # add PDF (that requires a password) from local file + #->addUrlFileWithPassword($test_url, "pdf_password") # add PDF (that requires a password) from public url + ; + + print "Starting pdf merge ...\n"; + + # merge pdfs to local file + $client->saveToFile($local_file); + + # merge pdfs to memory + # my $pdf = $client->save(); + + print "Finished! Number of pages: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(0); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); + }; + + if ($@) { + print "An error occurred: $@\n"; + } + +Extract text from PDF + + use JSON; + use SelectPdf; + + print "This is SelectPdf-$SelectPdf::VERSION.\n"; + + my $test_url = "https://selectpdf.com/demo/files/selectpdf.pdf"; + my $test_pdf = "Input.pdf"; + my $local_file = "Test.txt"; + my $apiKey = "Your API key here"; + + eval { + my $client = new SelectPdf::PdfToTextClient($apiKey); + + print "Starting pdf to text ...\n"; + + # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/ + $client + ->setStartPage(1) # start page (processing starts from here) + ->setEndPage(0) # end page (set 0 to process file til the end) + ->setOutputFormat(0) # set output format - 0 (Text), 1 (Html) + ; + + # convert local pdf to local text file + $client->getTextFromFileToFile($test_pdf, $local_file); + + # extract text from local pdf to memory + # my $text = $client->getTextFromFile($test_pdf); + # print $text; + + # convert pdf from public url to local text file + # $client->getTextFromUrlToFile($test_url, $local_file); + + # extract text from pdf from public url to memory + # my $text = $client->getTextFromUrl($test_url); + # print $text; + + print "Finished! Number of pages processed: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(0); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); + }; + + if ($@) { + print "An error occurred: $@\n"; + } + +Search PDF + + use JSON; + use SelectPdf; + + print "This is SelectPdf-$SelectPdf::VERSION.\n"; + + my $test_url = "https://selectpdf.com/demo/files/selectpdf.pdf"; + my $test_pdf = "Input.pdf"; + my $apiKey = "Your API key here"; + + eval { + my $client = new SelectPdf::PdfToTextClient($apiKey); + + print "Starting search pdf ...\n"; + + # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/ + $client + ->setStartPage(1) # start page (processing starts from here) + ->setEndPage(0) # end page (set 0 to process file til the end) + ->setOutputFormat(0) # set output format - 0 (Text), 1 (Html) + ; + + # search local pdf + my $results = $client->searchFile($test_pdf, "pdf", "True", "True"); + + # search pdf from public url + # my $results = $client->searchUrl($test_url, "pdf", "True", "True"); + + my $count = keys @{$results}; + print("Number of search results: " . $count . "\n"); + print("Results: " . encode_json($results) . "\n"); + + print "Finished! Number of pages processed: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(0); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); + }; + + if ($@) { + print "An error occurred: $@\n"; + } + +=head1 DESCRIPTION + +SelectPdf HTML To PDF Online REST API is a professional solution that lets you create PDF from web pages and raw HTML code in your applications. +The API is easy to use and the integration takes only a few lines of code. The generated PDFs are perfect. +That makes SelectPdf API the best html to pdf online service that can be used. + +For more details and full list of parameters see L. + +=cut +1; diff --git a/lib/SelectPdf/ApiClient.pm b/lib/SelectPdf/ApiClient.pm new file mode 100644 index 0000000..a072eeb --- /dev/null +++ b/lib/SelectPdf/ApiClient.pm @@ -0,0 +1,302 @@ +package SelectPdf::ApiClient; + +use strict; + +use LWP::UserAgent; +use HTTP::Request::Common; +use HTTP::Status qw(:constants :is status_message); + +use constant MULTIPART_FORM_DATA_BOUNDARY => "------------SelectPdf_Api_Boundry_\$"; +use constant NEW_LINE => "\r\n"; + +our $VERSION = '1.4.0'; + +=head1 NAME + +SelectPdf::ApiClient - Base class for API clients. Do not use this directly. + +=head1 METHODS + +=head2 new + +ApiClient Constructor. Do not use this directly. +=cut +sub new { + my $type = shift; + my $self = {}; + + # API endpoint + $self->{apiEndpoint} = "https://selectpdf.com/api2/convert/"; + + # API async jobs endpoint + $self->{apiAsyncEndpoint} = "https://selectpdf.com/api2/asyncjob/"; + + # API web elements endpoint + $self->{apiWebElementsEndpoint} = "https://selectpdf.com/api2/webelements/"; + + # Parameters that will be sent to the API. + $self->{parameters} = {}; + + # HTTP Headers that will be sent to the API. + $self->{headers} = {}; + + # Files that will be sent to the API. + $self->{files} = {}; + + # Binary data that will be sent to the API. + $self->{binaryData} = {}; + + # Number of pages of the pdf document resulted from the conversion. + $self->{numberOfPages} = 0; + + # Job ID for asynchronous calls or for calls that require a second request. + $self->{jobId} = ""; + + # Last HTTP Code + $self->{lastHTTPCode} = ""; + + # Ping interval in seconds for asynchronous calls. Default value is 3 seconds. + $self->{AsyncCallsPingInterval} = 3; + + # Maximum number of pings for asynchronous calls. Default value is 1,000 pings. + $self->{AsyncCallsMaxPings} = 1000; + + bless $self, $type; + return $self; +} + +=head2 setApiEndpoint( $apiEndpoint ) + +Set a custom SelectPdf API endpoint. Do not use this method unless advised by SelectPdf. + + $client->setApiEndpoint($apiEndpoint); + +Parameters: + +- $apiEndpoint API endpoint. +=cut +sub setApiEndpoint { + my($self, $apiEndpoint) = @_; + $self->{apiEndpoint} = $apiEndpoint if defined($apiEndpoint); + return $self->{apiEndpoint}; +} + +=head2 setApiAsyncEndpoint( $apiAsyncEndpoint ) + +Set a custom SelectPdf API endpoint for async jobs. Do not use this method unless advised by SelectPdf. + + $client->setApiAsyncEndpoint($apiAsyncEndpoint); + +Parameters: + +- $apiAsyncEndpoint API async jobs endpoint. +=cut +sub setApiAsyncEndpoint { + my($self, $apiAsyncEndpoint) = @_; + $self->{apiAsyncEndpoint} = $apiAsyncEndpoint if defined($apiAsyncEndpoint); + return $self->{apiAsyncEndpoint}; +} + +=head2 setApiWebElementsEndpoint( $apiWebElementsEndpoint ) + +Set a custom SelectPdf API endpoint for web elements. Do not use this method unless advised by SelectPdf. + + $client->setApiWebElementsEndpoint($apiWebElementsEndpoint); + +Parameters: + +- $apiWebElementsEndpoint API web elements endpoint. +=cut +sub setApiWebElementsEndpoint { + my($self, $apiWebElementsEndpoint) = @_; + $self->{apiWebElementsEndpoint} = $apiWebElementsEndpoint if defined($apiWebElementsEndpoint); + return $self->{apiWebElementsEndpoint}; +} + +# Create a POST request. +# +# @returns Response content. +sub performPost { + my($self) = @_; + + # reset results + $self->{numberOfPages} = 0; + $self->{jobId} = ""; + $self->{lastHTTPCode} = ""; + + # print "\nParameters (to endpoint $self->{apiEndpoint}):\n"; + # foreach my $k (keys(%{ $self->{parameters} })) { + # print "$k => $self->{parameters}{$k}\n"; + # } + # print "\n"; + + # prepare request + my $ua = LWP::UserAgent->new; + $ua->timeout(6000); # 6,000 seconds = 100 min + + # set headers + $self->{headers}{"Content-type"} = "application/x-www-form-urlencoded"; + $self->{headers}{"selectpdf-api-client"} = "perl-$]-$VERSION"; + + foreach my $k (keys(%{ $self->{headers} })) { + $ua->default_header($k => $self->{headers}{$k}); + } + + # call the API + my $response = $ua->request(POST $self->{apiEndpoint}, $self->{parameters}); + + # get response + my $code = $response->code; + $self->{lastHTTPCode} = $code; + # print ("HTTP Code: $self->{lastHTTPCode}.\n"); + + if ($response->code == HTTP_OK) { + $self->{numberOfPages} = int($response->header("selectpdf-api-pages")); + $self->{jobId} = $response->header("selectpdf-api-jobid"); + + return $response->decoded_content; + } + elsif ($response->code == HTTP_ACCEPTED) { + $self->{jobId} = $response->header("selectpdf-api-jobid"); + return undef; + } + else { + my $message = $response->message; + if ($response->decoded_content) { + $message = $response->decoded_content; + } + + die "($code) $message"; + } + +} + +# Create a POST request. +# +# @returns Response content. +sub performPostAsMultipartFormData { + my($self) = @_; + + # reset results + $self->{numberOfPages} = 0; + $self->{jobId} = ""; + $self->{lastHTTPCode} = ""; + + # print "\nParameters (to endpoint $self->{apiEndpoint}):\n"; + # foreach $k (keys(%{ $self->{parameters} })) { + # print "$k => $self->{parameters}{$k}\n"; + # } + # print "\n"; + + # prepare request + my $ua = LWP::UserAgent->new; + $ua->timeout(6000); # 6,000 seconds = 100 min + + # set headers + $self->{headers}{"selectpdf-api-client"} = "perl-$]-$VERSION"; + + foreach my $k (keys(%{ $self->{headers} })) { + $ua->default_header($k => $self->{headers}{$k}); + } + + # merge parameters and files + my $alldata = $self->{parameters}; + foreach my $k (keys(%{ $self->{files} })) { + $alldata->{$k} = [$self->{files}{$k}]; + } + + # print "\nAll data (to endpoint $self->{apiEndpoint}):\n"; + # foreach my $k (keys %{$alldata}) { + # print "$k => $alldata->{$k}\n"; + # } + # print "\n"; + + # call the API + my $response = $ua->request(POST $self->{apiEndpoint}, Content_Type => 'form-data', Content => $alldata); + + # get response + my $code = $response->code; + $self->{lastHTTPCode} = $code; + # print ("HTTP Code: $self->{lastHTTPCode}.\n"); + + if ($response->code == HTTP_OK) { + $self->{numberOfPages} = int($response->header("selectpdf-api-pages")); + $self->{jobId} = $response->header("selectpdf-api-jobid"); + + return $response->decoded_content; + } + elsif ($response->code == HTTP_ACCEPTED) { + $self->{jobId} = $response->header("selectpdf-api-jobid"); + return undef; + } + else { + my $message = $response->message; + if ($response->decoded_content) { + $message = $response->decoded_content; + } + + die "($code) $message"; + } + +} + +# Start an asynchronous job. +# +# @returns Asynchronous job ID. +sub startAsyncJob { + my($self) = @_; + + $self->{parameters}{"async"} = "True"; + $self->performPost(); + + return $self->{jobId}; +} + +# Start an asynchronous job that requires multipart form data. +# +# @returns Asynchronous job ID. +sub startAsyncJobMultipartFormData { + my($self) = @_; + + $self->{parameters}{"async"} = "True"; + $self->performPostAsMultipartFormData(); + + return $self->{jobId}; +} + +=head2 getNumberOfPages + +Get the number of pages of the PDF document resulted from the API call. + + $pages = $client->getNumberOfPages(); + +Returns: + +- Number of pages of the PDF document. +=cut +sub getNumberOfPages { + my($self) = @_; + return $self->{numberOfPages}; +} + +# Serialize boolean values as "True" or "False" for the API. +# +# @returns Serialized value. +sub serializeBoolean { + my($self, $value) = @_; + + if (not defined($value) or $value eq 'undef') { + $value = 0; + } + else { + $value =~ s/^\s+|\s+$//g; + $value = lc $value; + + if ($value eq 'false' or $value eq 'no' or $value eq '0' or $value eq 'off') { + $value = 0; + } + } + return $value ? 'True' : 'False'; +} + +1; \ No newline at end of file diff --git a/lib/SelectPdf/AsyncJobClient.pm b/lib/SelectPdf/AsyncJobClient.pm new file mode 100644 index 0000000..169f68c --- /dev/null +++ b/lib/SelectPdf/AsyncJobClient.pm @@ -0,0 +1,79 @@ +package SelectPdf::AsyncJobClient; + +use SelectPdf::ApiClient; +use strict; +our @ISA = qw(SelectPdf::ApiClient); + +=head1 NAME + +SelectPdf::AsyncJobClient - Get the result of an asynchronous call. + +=head1 METHODS + +=head2 new( $apiKey, $JobId ) + +Construct the async job client. + + my $client = SelectPdf::AsyncJobClient->new($apiKey, $jobId); + +Parameters: + +- $apiKey API Key. + +- $jobId Job ID. +=cut +sub new { + my $type = shift; + my $self = $type->SUPER::new; + + # API endpoint + $self->{apiEndpoint} = "https://selectpdf.com/api2/asyncjob/"; + + $self->{parameters}{"key"} = shift; + $self->{parameters}{"job_id"} = shift; + + bless $self, $type; + return $self; +} + +=head2 getResult + +Get result of the asynchronous job. + +Returns: + +- Byte array containing the resulted file if the job is finished. Returns 'undef' if the job is still running. +=cut +sub getResult() { + my($self) = @_; + + my $result = $self->SUPER::performPost(); + + if ($self->{jobId}) { + return undef; + } + else { + return $result; + } +} + +=head2 finished + +Check if asynchronous job is finished. + +Returns: + +- True if job finished. +=cut +sub finished() { + my($self) = @_; + + if ($self->{lastHTTPCode} eq 200) { + return 1; + } + else { + return 0; + } +} + +1; \ No newline at end of file diff --git a/lib/SelectPdf/HtmlToPdfClient.pm b/lib/SelectPdf/HtmlToPdfClient.pm new file mode 100644 index 0000000..741377a --- /dev/null +++ b/lib/SelectPdf/HtmlToPdfClient.pm @@ -0,0 +1,2082 @@ +package SelectPdf::HtmlToPdfClient; + +use SelectPdf::ApiClient; +use SelectPdf::AsyncJobClient; +use SelectPdf::WebElementsClient; +use strict; +our @ISA = qw(SelectPdf::ApiClient); + +=head1 NAME + +SelectPdf::HtmlToPdfClient - Html To Pdf Conversion with SelectPdf Online API. + +=head1 SYNOPSIS + +Convert URL to PDF and save result into a file on disk. + + use SelectPdf; + print "This is SelectPdf-$SelectPdf::VERSION\n"; + + my $url = "https://selectpdf.com/"; + my $local_file = "Test.pdf"; + my $apiKey = "Your API key here"; + + eval { + my $client = new HtmlToPdfClient($apiKey); + + $client + ->setPageSize("A4") + ->setMargins(0) + ->setShowPageNumbers('False') + ->setPageBreaksEnhancedAlgorithm('True') + ; + + $client->convertUrlToFile($url, $local_file); + }; + + if ($@) { + print "An error occurred: $@\n"; + } + +Convert raw HTML string to PDF and save result into a file on disk. + + use SelectPdf; + print "This is SelectPdf-$SelectPdf::VERSION\n"; + + my $html = "This is a test HTML."; + my $local_file = "Test.pdf"; + my $apiKey = "Your API key here"; + + eval { + my $client = new HtmlToPdfClient($apiKey); + + $client + ->setPageSize("A4") + ->setMargins(0) + ->setShowPageNumbers('False') + ->setPageBreaksEnhancedAlgorithm('True') + ; + + $client->convertHtmlStringToFile($html, $local_file); + }; + + if ($@) { + print "An error occurred: $@\n"; + } + +For more details and full list of parameters see L. + +=head1 METHODS + +=head2 new( $apiKey ) + +Construct the Html To Pdf Client. + + my $client = SelectPdf::HtmlToPdfClient->new($apiKey); + +Parameters: + +- $apiKey API Key. +=cut +sub new { + my $type = shift; + my $self = $type->SUPER::new; + + # API endpoint + $self->{apiEndpoint} = "https://selectpdf.com/api2/convert/"; + + $self->{parameters}{"key"} = shift; + + bless $self, $type; + return $self; +} + +=head2 convertUrl( $url ) + +Convert the specified url to PDF. +SelectPdf online API can convert http:// and https:// publicly available urls. + + $content = $client->convertUrl($url); + +Parameters: + +- $url Address of the web page being converted. + +Returns: + +- Byte array containing the resulted PDF. +=cut +sub convertUrl($) { + my($self, $url) = @_; + + $self->{parameters}{"url"} = $url; + $self->{parameters}{"async"} = "False"; + $self->{parameters}{"html"} = ""; + $self->{parameters}{"base_url"} = ""; + + return $self->SUPER::performPost(); +} + +=head2 convertUrl( $url, $filePath ) + +Convert the specified url to PDF and writes the resulted PDF to a local file. +SelectPdf online API can convert http:// and https:// publicly available urls. + + $client->convertUrlToFile($url, $filePath); + +Parameters: + +- $url Address of the web page being converted. + +- $filePath Local file including path if necessary. +=cut +sub convertUrlToFile($;$) { + my($self, $url, $filePath) = @_; + + my $content = $self->convertUrl($url); + + my $file = IO::File->new( $filePath, '>' ) or die "Unable to open output file - $!\n"; + $file->binmode; + $file->print( $content ); + $file->close; +} + +=head2 convertUrlAsync( $url ) + +Convert the specified url to PDF using an asynchronous call. +SelectPdf online API can convert http:// and https:// publicly available urls. + + $content = $client->convertUrlAsync($url); + +Parameters: + +- $url Address of the web page being converted. + +Returns: + +- Byte array containing the resulted PDF. +=cut +sub convertUrlAsync($) { + my($self, $url) = @_; + + $self->{parameters}{"url"} = $url; + $self->{parameters}{"html"} = ""; + $self->{parameters}{"base_url"} = ""; + + my $JobID = $self->SUPER::startAsyncJob() or die "An error occurred launching the asynchronous call."; + + my $noPings = 0; + + do + { + $noPings++; + + # sleep for a few seconds before next ping + sleep($self->{AsyncCallsPingInterval}); + + my $asyncJobClient = new SelectPdf::AsyncJobClient($self->{parameters}{"key"}, $JobID); + $asyncJobClient->setApiEndpoint($self->{apiAsyncEndpoint}); + + my $result = $asyncJobClient->getResult(); + + if ($asyncJobClient->finished) + { + $self->{numberOfPages} = $asyncJobClient->getNumberOfPages(); + + return $result; + } + + } while ($noPings <= $self->{AsyncCallsMaxPings}); + + die "Asynchronous call did not finish in expected timeframe."; + +} + +=head2 convertUrlToFileAsync( $url, $filePath ) + +Convert the specified url to PDF using an asynchronous call and writes the resulted PDF to a local file. +SelectPdf online API can convert http:// and https:// publicly available urls. + + $client->convertUrlToFileAsync($url, $filePath); + +Parameters: + +- $url Address of the web page being converted. + +- $filePath Local file including path if necessary. +=cut +sub convertUrlToFileAsync($;$) { + my($self, $url, $filePath) = @_; + + my $content = $self->convertUrlAsync($url); + + my $file = IO::File->new( $filePath, '>' ) or die "Unable to open output file - $!\n"; + $file->binmode; + $file->print( $content ); + $file->close; +} + +=head2 convertHtmlStringWithBaseUrl( $htmlString, $baseUrl ) + +Convert the specified HTML string to PDF. Use a base url to resolve relative paths to resources. + + $content = $client->convertHtmlStringWithBaseUrl($htmlString, $baseUrl); + +Parameters: + +- $htmlString HTML string with the content being converted. + +- $baseUrl Base url used to resolve relative paths to resources (css, images, javascript, etc). Must be a http:// or https:// publicly available url. + +Returns: + +- Byte array containing the resulted PDF. +=cut +sub convertHtmlStringWithBaseUrl($,$) { + my($self, $htmlString, $baseUrl) = @_; + + $self->{parameters}{"url"} = ""; + $self->{parameters}{"async"} = "False"; + $self->{parameters}{"html"} = $htmlString; + $self->{parameters}{"base_url"} = $baseUrl; + + return $self->SUPER::performPost(); +} + +=head2 convertHtmlStringWithBaseUrlToFile( $htmlString, $baseUrl, $filePath ) + +Convert the specified HTML string to PDF and writes the resulted PDF to a local file. Use a base url to resolve relative paths to resources. + + $client->convertHtmlStringWithBaseUrlToFile($htmlString, $baseUrl, $filePath); + +Parameters: + +- $htmlString HTML string with the content being converted. + +- $baseUrl Base url used to resolve relative paths to resources (css, images, javascript, etc). Must be a http:// or https:// publicly available url. + +- $filePath: Local file including path if necessary. + +=cut +sub convertHtmlStringWithBaseUrlToFile($,$,$) { + my($self, $htmlString, $baseUrl, $filePath) = @_; + + my $content = $self->convertHtmlStringWithBaseUrl($htmlString, $baseUrl); + + my $file = IO::File->new( $filePath, '>' ) or die "Unable to open output file - $!\n"; + $file->binmode; + $file->print( $content ); + $file->close; +} + +=head2 convertHtmlStringWithBaseUrlAsync( $htmlString, $baseUrl ) + +Convert the specified HTML string to PDF with an asynchronous call. Use a base url to resolve relative paths to resources. + + $content = $client->convertHtmlStringWithBaseUrlAsync($htmlString, $baseUrl); + +Parameters: + +- $htmlString HTML string with the content being converted. + +- $baseUrl Base url used to resolve relative paths to resources (css, images, javascript, etc). Must be a http:// or https:// publicly available url. + +Returns: + +- Byte array containing the resulted PDF. +=cut +sub convertHtmlStringWithBaseUrlAsync($,$) { + my($self, $htmlString, $baseUrl) = @_; + + $self->{parameters}{"url"} = ""; + $self->{parameters}{"async"} = "False"; + $self->{parameters}{"html"} = $htmlString; + $self->{parameters}{"base_url"} = $baseUrl; + + my $JobID = $self->SUPER::startAsyncJob() or die "An error occurred launching the asynchronous call."; + + my $noPings = 0; + + do + { + $noPings++; + + # sleep for a few seconds before next ping + sleep($self->{AsyncCallsPingInterval}); + + my $asyncJobClient = new SelectPdf::AsyncJobClient($self->{parameters}{"key"}, $JobID); + $asyncJobClient->setApiEndpoint($self->{apiAsyncEndpoint}); + + my $result = $asyncJobClient->getResult(); + + if ($asyncJobClient->finished) + { + $self->{numberOfPages} = $asyncJobClient->getNumberOfPages(); + + return $result; + } + + } while ($noPings <= $self->{AsyncCallsMaxPings}); + + die "Asynchronous call did not finish in expected timeframe."; + +} + +=head2 convertHtmlStringWithBaseUrlToFileAsync( $htmlString, $baseUrl, $filePath ) + +Convert the specified HTML string to PDF with an asynchronous call and writes the resulted PDF to a local file. Use a base url to resolve relative paths to resources. + + $client->convertHtmlStringWithBaseUrlToFileAsync($htmlString, $baseUrl, $filePath); + +Parameters: + +- $htmlString HTML string with the content being converted. + +- $baseUrl Base url used to resolve relative paths to resources (css, images, javascript, etc). Must be a http:// or https:// publicly available url. + +- $filePath: Local file including path if necessary. + +=cut +sub convertHtmlStringWithBaseUrlToFileAsync($,$,$) { + my($self, $htmlString, $baseUrl, $filePath) = @_; + + my $content = $self->convertHtmlStringWithBaseUrlAsync($htmlString, $baseUrl); + + my $file = IO::File->new( $filePath, '>' ) or die "Unable to open output file - $!\n"; + $file->binmode; + $file->print( $content ); + $file->close; +} + +=head2 convertHtmlString( $htmlString ) + +Convert the specified HTML string to PDF. + + $content = $client->convertHtmlString($htmlString); + +Parameters: + +- $htmlString HTML string with the content being converted. + +Returns: + +- Byte array containing the resulted PDF. +=cut +sub convertHtmlString($) { + my($self, $htmlString) = @_; + + return $self->convertHtmlStringWithBaseUrl($htmlString, ""); +} + +=head2 convertHtmlStringToFile( $htmlString, $filePath ) + +Convert the specified HTML string to PDF and writes the resulted PDF to a local file. + + $client->convertHtmlStringToFile($htmlString, $filePath); + +Parameters: + +- $htmlString HTML string with the content being converted. + +- $filePath: Local file including path if necessary. + +=cut +sub convertHtmlStringToFile($,$) { + my($self, $htmlString, $filePath) = @_; + + $self->convertHtmlStringWithBaseUrlToFile($htmlString, "", $filePath); +} + +=head2 convertHtmlStringAsync( $htmlString ) + +Convert the specified HTML string to PDF with an asynchronous call. + + $content = $client->convertHtmlStringAsync($htmlString); + +Parameters: + +- $htmlString HTML string with the content being converted. + +Returns: + +- Byte array containing the resulted PDF. +=cut +sub convertHtmlStringAsync($) { + my($self, $htmlString) = @_; + + return $self->convertHtmlStringWithBaseUrlAsync($htmlString, ""); +} + +=head2 convertHtmlStringToFileAsync( $htmlString, $filePath ) + +Convert the specified HTML string to PDF with an asynchronous call and writes the resulted PDF to a local file. + + $client->convertHtmlStringToFileAsync($htmlString, $filePath); + +Parameters: + +- $htmlString HTML string with the content being converted. + +- $filePath: Local file including path if necessary. + +=cut +sub convertHtmlStringToFileAsync($,$) { + my($self, $htmlString, $filePath) = @_; + + $self->convertHtmlStringWithBaseUrlToFileAsync($htmlString, "", $filePath); +} + +=head2 setPageSize( $pageSize ) + +Set PDF page size. Default value is A4. If page size is set to Custom, use setPageWidth and setPageHeight methods to set the custom width/height of the PDF pages. + +Parameters: + +- $pageSize: PDF page size. Possible values: Custom, A0, A1, A2, A3, A4, A5, A6, A7, A8, Letter, HalfLetter, Ledger, Legal. + +Returns: + +- Reference to the current object. +=cut +sub setPageSize($) { + my($self, $pageSize) = @_; + + if ($pageSize !~ m/^(Custom|A0|A1|A2|A3|A4|A5|A6|A7|A8|Letter|HalfLetter|Ledger|Legal)$/i) { + die ("Allowed values for Page Size: Custom, A0, A1, A2, A3, A4, A5, A6, A7, A8, Letter, HalfLetter, Ledger, Legal."); + } + + $self->{parameters}{"page_size"} = $pageSize; + return $self; +} + +=head2 setPageWidth( $pageWidth ) + +Set PDF page width in points. Default value is 595pt (A4 page width in points). 1pt = 1/72 inch. +This is taken into account only if page size is set to Custom using setPageSize method. + +Parameters: + +- $pageWidth: Page width in points. + +Returns: + +- Reference to the current object. +=cut +sub setPageWidth($) { + my($self, $pageWidth) = @_; + + $self->{parameters}{"page_width"} = $pageWidth; + return $self; +} + +=head2 setPageHeight( $pageHeight ) + +Set PDF page height in points. Default value is 842pt (A4 page height in points). 1pt = 1/72 inch. +This is taken into account only if page size is set to Custom using setPageSize method. + +Parameters: + +- $pageHeight: Page height in points. + +Returns: + +- Reference to the current object. +=cut +sub setPageHeight($) { + my($self, $pageHeight) = @_; + + $self->{parameters}{"page_height"} = $pageHeight; + return $self; +} + +=head2 setPageOrientation( $pageOrientation ) + +Set PDF page orientation. Default value is Portrait. + +Parameters: + +- $pageOrientation: PDF page orientation. Possible values: Portrait, Landscape. + +Returns: + +- Reference to the current object. +=cut +sub setPageOrientation($) { + my($self, $pageOrientation) = @_; + + if ($pageOrientation !~ m/^(Portrait|Landscape)$/i) { + die ("Allowed values for Page Orientation: Portrait, Landscape."); + } + + $self->{parameters}{"page_orientation"} = $pageOrientation; + return $self; +} + +=head2 setMarginTop( $marginTop ) + +Set top margin of the PDF pages. Default value is 5pt. + +Parameters: + +- $marginTop: Margin value in points. 1pt = 1/72 inch. + +Returns: + +- Reference to the current object. +=cut +sub setMarginTop($) { + my($self, $marginTop) = @_; + + $self->{parameters}{"margin_top"} = $marginTop; + return $self; +} + +=head2 setMarginRight( $marginRight ) + +Set right margin of the PDF pages. Default value is 5pt. + +Parameters: + +- $marginRight: Margin value in points. 1pt = 1/72 inch. + +Returns: + +- Reference to the current object. +=cut +sub setMarginRight($) { + my($self, $marginRight) = @_; + + $self->{parameters}{"margin_right"} = $marginRight; + return $self; +} + +=head2 setMarginBottom( $marginBottom ) + +Set bottom margin of the PDF pages. Default value is 5pt. + +Parameters: + +- $marginBottom: Margin value in points. 1pt = 1/72 inch. + +Returns: + +- Reference to the current object. +=cut +sub setMarginBottom($) { + my($self, $marginBottom) = @_; + + $self->{parameters}{"margin_bottom"} = $marginBottom; + return $self; +} + +=head2 setMarginLeft( $marginLeft ) + +Set left margin of the PDF pages. Default value is 5pt. + +Parameters: + +- $marginLeft: Margin value in points. 1pt = 1/72 inch. + +Returns: + +- Reference to the current object. +=cut +sub setMarginLeft($) { + my($self, $marginLeft) = @_; + + $self->{parameters}{"margin_left"} = $marginLeft; + return $self; +} + +=head2 setMargins( $margin ) + +Set all margins of the PDF pages to the same value. Default value is 5pt. + +Parameters: + +- $margin: Margin value in points. 1pt = 1/72 inch. + +Returns: + +- Reference to the current object. +=cut +sub setMargins($) { + my($self, $margin) = @_; + + return $self->setMarginTop($margin)->setMarginRight($margin)->setMarginBottom($margin)->setMarginLeft($margin); +} + +=head2 setPdfName( $pdfName ) + +Specify the name of the pdf document that will be created. The default value is Document.pdf. + +Parameters: + +- $pdfName: Name of the generated PDF document. + +Returns: + +- Reference to the current object. +=cut +sub setPdfName($) { + my($self, $pdfName) = @_; + + $self->{parameters}{"pdf_name"} = $pdfName; + return $self; +} + +=head2 setRenderingEngine( $renderingEngine ) + +Set the rendering engine used for the HTML to PDF conversion. Default value is WebKit. + +Parameters: + +- $renderingEngine: HTML rendering engine. Possible values: WebKit, Restricted, Blink. + +Returns: + +- Reference to the current object. +=cut +sub setRenderingEngine($) { + my($self, $renderingEngine) = @_; + + if ($renderingEngine !~ m/^(WebKit|Restricted|Blink)$/i) { + die ("Allowed values for Rendering Engine: WebKit, Restricted, Blink."); + } + + $self->{parameters}{"engine"} = $renderingEngine; + return $self; +} + +=head2 setUserPassword( $userPassword ) + +Set PDF user password. + +Parameters: + +- $userPassword: PDF user password. + +Returns: + +- Reference to the current object. +=cut +sub setUserPassword($) { + my($self, $userPassword) = @_; + + $self->{parameters}{"user_password"} = $userPassword; + return $self; +} + +=head2 setOwnerPassword( $ownerPassword ) + +Set PDF owner password. + +Parameters: + +- $ownerPassword: PDF owner password. + +Returns: + +- Reference to the current object. +=cut +sub setOwnerPassword($) { + my($self, $ownerPassword) = @_; + + $self->{parameters}{"owner_password"} = $ownerPassword; + return $self; +} + +=head2 setWebPageWidth( $webPageWidth ) + +Set the width used by the converter's internal browser window in pixels. The default value is 1024px. + +Parameters: + +- $webPageWidth: Browser window width in pixels. + +Returns: + +- Reference to the current object. +=cut +sub setWebPageWidth($) { + my($self, $webPageWidth) = @_; + + $self->{parameters}{"web_page_width"} = $webPageWidth; + return $self; +} + +=head2 setWebPageHeight( $webPageHeight ) + +Set the height used by the converter's internal browser window in pixels. +The default value is 0px and it means that the page height is automatically calculated by the converter. + +Parameters: + +- $webPageHeight: Browser window height in pixels. Set it to 0px to automatically calculate page height. + +Returns: + +- Reference to the current object. +=cut +sub setWebPageHeight($) { + my($self, $webPageHeight) = @_; + + $self->{parameters}{"web_page_height"} = $webPageHeight; + return $self; +} + +=head2 setMinLoadTime( $minLoadTime ) + +Introduce a delay (in seconds) before the actual conversion to allow the web page to fully load. This method is an alias for setConversionDelay. +The default value is 1 second. Use a larger value if the web page has content that takes time to render when it is displayed in the browser. + +Parameters: + +- $minLoadTime: Delay in seconds. + +Returns: + +- Reference to the current object. +=cut +sub setMinLoadTime($) { + my($self, $minLoadTime) = @_; + + $self->{parameters}{"min_load_time"} = $minLoadTime; + return $self; +} + +=head2 setConversionDelay( $delay ) + +Introduce a delay (in seconds) before the actual conversion to allow the web page to fully load. This method is an alias for setMinLoadTime. +The default value is 1 second. Use a larger value if the web page has content that takes time to render when it is displayed in the browser. + +Parameters: + +- $delay: Delay in seconds. + +Returns: + +- Reference to the current object. +=cut +sub setConversionDelay($) { + my($self, $delay) = @_; + + return $self->setMinLoadTime($delay); +} + +=head2 setMaxLoadTime( $maxLoadTime ) + +Set the maximum amount of time (in seconds) that the convert will wait for the page to load. This method is an alias for setNavigationTimeout. +A timeout error is displayed when this time elapses. The default value is 30 seconds. +Use a larger value (up to 120 seconds allowed) for pages that take a long time to load. + +Parameters: + +- $maxLoadTime: Timeout in seconds. + +Returns: + +- Reference to the current object. +=cut +sub setMaxLoadTime($) { + my($self, $maxLoadTime) = @_; + + $self->{parameters}{"max_load_time"} = $maxLoadTime; + return $self; +} + +=head2 setNavigationTimeout( $timeout ) + +Set the maximum amount of time (in seconds) that the convert will wait for the page to load. This method is an alias for setMaxLoadTime. +A timeout error is displayed when this time elapses. The default value is 30 seconds. +Use a larger value (up to 120 seconds allowed) for pages that take a long time to load. + +Parameters: + +- $timeout: Timeout in seconds. + +Returns: + +- Reference to the current object. +=cut +sub setNavigationTimeout($) { + my($self, $timeout) = @_; + + return $self->setMaxLoadTime($timeout); +} + +=head2 setSecureProtocol( $secureProtocol ) + +Set the protocol used for secure (HTTPS) connections. Set this only if you have an older server that only works with older SSL connections. + +Parameters: + +- $secureProtocol: Secure protocol. Possible values: 0 (TLS 1.1 or newer), 1 (TLS 1.0), 2 (SSL v3 only). + +Returns: + +- Reference to the current object. +=cut +sub setSecureProtocol($) { + my($self, $secureProtocol) = @_; + + if ($secureProtocol ne 0 and $secureProtocol ne 1 and $secureProtocol ne 2) { + die ("Allowed values for Secure Protocol: 0 (TLS 1.1 or newer), 1 (TLS 1.0), 2 (SSL v3 only)."); + } + + $self->{parameters}{"protocol"} = $secureProtocol; + return $self; +} + +=head2 setUseCssPrint( $useCssPrint ) + +Specify if the CSS Print media type is used instead of the Screen media type. The default value is False. + +Parameters: + +- $useCssPrint: Use CSS Print media or not. + +Returns: + +- Reference to the current object. +=cut +sub setUseCssPrint($) { + my($self, $useCssPrint) = @_; + + $self->{parameters}{"use_css_print"} = $self->SUPER::serializeBoolean($useCssPrint); + return $self; +} + +=head2 setBackgroundColor( $backgroundColor ) + +Specify the background color of the PDF page in RGB html format. The default is #FFFFFF. + +Parameters: + +- $backgroundColor: Background color in #RRGGBB format. + +Returns: + +- Reference to the current object. +=cut +sub setBackgroundColor($) { + my($self, $backgroundColor) = @_; + + if ($backgroundColor !~ m/^#?[0-9a-fA-F]{6}$/) { + die ("Color value must be in #RRGGBB format."); + } + + $self->{parameters}{"background_color"} = $backgroundColor; + return $self; +} + +=head2 setDrawHtmlBackground( $drawHtmlBackground ) + +Set a flag indicating if the web page background is rendered in PDF. The default value is True. + +Parameters: + +- $drawHtmlBackground: Draw the HTML background or not. + +Returns: + +- Reference to the current object. +=cut +sub setDrawHtmlBackground($) { + my($self, $drawHtmlBackground) = @_; + + $self->{parameters}{"draw_html_background"} = $self->SUPER::serializeBoolean($drawHtmlBackground); + return $self; +} + +=head2 setDisableJavascript( $disableJavascript ) + +Do not run JavaScript in web pages. The default value is False and javascript is executed. + +Parameters: + +- $disableJavascript: Disable javascript or not. + +Returns: + +- Reference to the current object. +=cut +sub setDisableJavascript($) { + my($self, $disableJavascript) = @_; + + $self->{parameters}{"disable_javascript"} = $self->SUPER::serializeBoolean($disableJavascript); + return $self; +} + +=head2 setDisableInternalLinks( $disableInternalLinks ) + +Do not create internal links in the PDF. The default value is False and internal links are created. + +Parameters: + +- $disableInternalLinks: Disable internal links or not. + +Returns: + +- Reference to the current object. +=cut +sub setDisableInternalLinks($) { + my($self, $disableInternalLinks) = @_; + + $self->{parameters}{"disable_internal_links"} = $self->SUPER::serializeBoolean($disableInternalLinks); + return $self; +} + +=head2 setDisableExternalLinks( $disableExternalLinks ) + +Do not create external links in the PDF. The default value is False and external links are created. + +Parameters: + +- $disableExternalLinks: Disable external links or not. + +Returns: + +- Reference to the current object. +=cut +sub setDisableExternalLinks($) { + my($self, $disableExternalLinks) = @_; + + $self->{parameters}{"disable_external_links"} = $self->SUPER::serializeBoolean($disableExternalLinks); + return $self; +} + +=head2 setRenderOnTimeout( $renderOnTimeout ) + +Try to render the PDF even in case of the web page loading timeout. The default value is False and an exception is raised in case of web page navigation timeout. + +Parameters: + +- $renderOnTimeout: Render in case of timeout or not. + +Returns: + +- Reference to the current object. +=cut +sub setRenderOnTimeout($) { + my($self, $renderOnTimeout) = @_; + + $self->{parameters}{"render_on_timeout"} = $self->SUPER::serializeBoolean($renderOnTimeout); + return $self; +} + +=head2 setKeepImagesTogether( $keepImagesTogether ) + +Avoid breaking images between PDF pages. The default value is False and images are split between pages if larger. + +Parameters: + +- $keepImagesTogether: Try to keep images on same page or not. + +Returns: + +- Reference to the current object. +=cut +sub setKeepImagesTogether($) { + my($self, $keepImagesTogether) = @_; + + $self->{parameters}{"keep_images_together"} = $self->SUPER::serializeBoolean($keepImagesTogether); + return $self; +} + +=head2 setDocTitle( $docTitle ) + +Set the PDF document title. + +Parameters: + +- $docTitle: Document title. + +Returns: + +- Reference to the current object. +=cut +sub setDocTitle($) { + my($self, $docTitle) = @_; + + $self->{parameters}{"doc_title"} = $docTitle; + return $self; +} + +=head2 setDocSubject( $docSubject ) + +Set the PDF document subject. + +Parameters: + +- $docSubject: Document subject. + +Returns: + +- Reference to the current object. +=cut +sub setDocSubject($) { + my($self, $docSubject) = @_; + + $self->{parameters}{"doc_subject"} = $docSubject; + return $self; +} + +=head2 setDocKeywords( $docKeywords ) + +Set the PDF document keywords. + +Parameters: + +- $docKeywords: Document keywords. + +Returns: + +- Reference to the current object. +=cut +sub setDocKeywords($) { + my($self, $docKeywords) = @_; + + $self->{parameters}{"doc_keywords"} = $docKeywords; + return $self; +} + +=head2 setDocAuthor( $docAuthor ) + +Set the PDF document author. + +Parameters: + +- $docAuthor: Document author. + +Returns: + +- Reference to the current object. +=cut +sub setDocAuthor($) { + my($self, $docAuthor) = @_; + + $self->{parameters}{"doc_author"} = $docAuthor; + return $self; +} + +=head2 setDocAddCreationDate( $docAddCreationDate ) + +Add the date and time when the PDF document was created to the PDF document information. The default value is False. + +Parameters: + +- $docAddCreationDate: Add creation date to the document metadata or not. + +Returns: + +- Reference to the current object. +=cut +sub setDocAddCreationDate($) { + my($self, $docAddCreationDate) = @_; + + $self->{parameters}{"doc_add_creation_date"} = $self->SUPER::serializeBoolean($docAddCreationDate); + return $self; +} + +=head2 setViewerPageLayout( $pageLayout ) + +Set the page layout to be used when the document is opened in a PDF viewer. The default value is 1 - OneColumn. + +Parameters: + +- $pageLayout: Page layout. Possible values: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right). + +Returns: + +- Reference to the current object. +=cut +sub setViewerPageLayout($) { + my($self, $pageLayout) = @_; + + if ($pageLayout ne 0 and $pageLayout ne 1 and $pageLayout ne 2 and $pageLayout ne 3) { + die ("Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right)."); + } + + $self->{parameters}{"viewer_page_layout"} = $pageLayout; + return $self; +} + +=head2 setViewerPageMode( $pageMode ) + +Set the document page mode when the pdf document is opened in a PDF viewer. The default value is 0 - UseNone. + +Parameters: + +- $pageMode: Page mode. Possible values: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments). + +Returns: + +- Reference to the current object. +=cut +sub setViewerPageMode($) { + my($self, $pageMode) = @_; + + if ($pageMode ne 0 and $pageMode ne 1 and $pageMode ne 2 and $pageMode ne 3 and $pageMode ne 4 and $pageMode ne 5) { + die ("Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments)."); + } + + $self->{parameters}{"viewer_page_mode"} = $pageMode; + return $self; +} + +=head2 setViewerCenterWindow( $viewerCenterWindow ) + +Set a flag specifying whether to position the document's window in the center of the screen. The default value is False. + +Parameters: + +- $viewerCenterWindow: Center window or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerCenterWindow($) { + my($self, $viewerCenterWindow) = @_; + + $self->{parameters}{"viewer_center_window"} = $self->SUPER::serializeBoolean($viewerCenterWindow); + return $self; +} + +=head2 setViewerDisplayDocTitle( $viewerDisplayDocTitle ) + +Set a flag specifying whether the window's title bar should display the document title taken from document information. The default value is False. + +Parameters: + +- $viewerDisplayDocTitle: Display title or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerDisplayDocTitle($) { + my($self, $viewerDisplayDocTitle) = @_; + + $self->{parameters}{"viewer_display_doc_title"} = $self->SUPER::serializeBoolean($viewerDisplayDocTitle); + return $self; +} + +=head2 setViewerFitWindow( $viewerFitWindow ) + +Set a flag specifying whether to resize the document's window to fit the size of the first displayed page. The default value is False. + +Parameters: + +- $viewerFitWindow: Fit window or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerFitWindow($) { + my($self, $viewerFitWindow) = @_; + + $self->{parameters}{"viewer_fit_window"} = $self->SUPER::serializeBoolean($viewerFitWindow); + return $self; +} + +=head2 setViewerHideMenuBar( $viewerHideMenuBar ) + +Set a flag specifying whether to hide the pdf viewer application's menu bar when the document is active. The default value is False. + +Parameters: + +- $viewerHideMenuBar: Hide menu bar or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerHideMenuBar($) { + my($self, $viewerHideMenuBar) = @_; + + $self->{parameters}{"viewer_hide_menu_bar"} = $self->SUPER::serializeBoolean($viewerHideMenuBar); + return $self; +} + +=head2 setViewerHideToolbar( $viewerHideToolbar ) + +Set a flag specifying whether to hide the pdf viewer application's tool bars when the document is active. The default value is False. + +Parameters: + +- $viewerHideToolbar: Hide tool bars or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerHideToolbar($) { + my($self, $viewerHideToolbar) = @_; + + $self->{parameters}{"viewer_hide_toolbar"} = $self->SUPER::serializeBoolean($viewerHideToolbar); + return $self; +} + +=head2 setViewerHideWindowUI( $viewerHideWindowUI ) + +Set a flag specifying whether to hide user interface elements in the document's window (such as scroll bars and navigation controls), +leaving only the document's contents displayed. + +Parameters: + +- $viewerHideWindowUI: Hide window UI or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerHideWindowUI($) { + my($self, $viewerHideWindowUI) = @_; + + $self->{parameters}{"viewer_hide_window_ui"} = $self->SUPER::serializeBoolean($viewerHideWindowUI); + return $self; +} + +=head2 setShowHeader( $showHeader ) + +Control if a custom header is displayed in the generated PDF document. The default value is False. + +Parameters: + +- $showHeader: Show header or not. + +Returns: + +- Reference to the current object. +=cut +sub setShowHeader($) { + my($self, $showHeader) = @_; + + $self->{parameters}{"show_header"} = $self->SUPER::serializeBoolean($showHeader); + return $self; +} + +=head2 setHeaderHeight( $height ) + +The height of the pdf document header. This height is specified in points. 1 point is 1/72 inch. The default value is 50. + +Parameters: + +- $height: Header height. + +Returns: + +- Reference to the current object. +=cut +sub setHeaderHeight($) { + my($self, $height) = @_; + + $self->{parameters}{"header_height"} = $height; + return $self; +} + +=head2 setHeaderUrl( $url ) + +Set the url of the web page that is converted and rendered in the PDF document header. + +Parameters: + +- $url: The url of the web page that is converted and rendered in the pdf document header. + +Returns: + +- Reference to the current object. +=cut +sub setHeaderUrl($) { + my($self, $url) = @_; + + $self->{parameters}{"header_url"} = $url; + return $self; +} + +=head2 setHeaderHtml( $html ) + +Set the raw html that is converted and rendered in the pdf document header. + +Parameters: + +- $html: The raw html that is converted and rendered in the pdf document header. + +Returns: + +- Reference to the current object. +=cut +sub setHeaderHtml($) { + my($self, $html) = @_; + + $self->{parameters}{"header_html"} = $html; + return $self; +} + +=head2 setHeaderBaseUrl( $baseUrl ) + +Set an optional base url parameter can be used together with the header HTML to resolve relative paths from the html string. + +Parameters: + +- $baseUrl: Header base url. + +Returns: + +- Reference to the current object. +=cut +sub setHeaderBaseUrl($) { + my($self, $baseUrl) = @_; + + $self->{parameters}{"header_base_url"} = $baseUrl; + return $self; +} + +=head2 setHeaderDisplayOnFirstPage( $displayOnFirstPage ) + +Control the visibility of the header on the first page of the generated pdf document. The default value is True. + +Parameters: + +- $displayOnFirstPage: Display header on the first page or not. + +Returns: + +- Reference to the current object. +=cut +sub setHeaderDisplayOnFirstPage($) { + my($self, $displayOnFirstPage) = @_; + + $self->{parameters}{"header_display_on_first_page"} = $self->SUPER::serializeBoolean($displayOnFirstPage); + return $self; +} + +=head2 setHeaderDisplayOnOddPages( $displayOnOddPages ) + +Control the visibility of the header on the odd numbered pages of the generated pdf document. The default value is True. + +Parameters: + +- $displayOnOddPages: Display header on odd pages or not. + +Returns: + +- Reference to the current object. +=cut +sub setHeaderDisplayOnOddPages($) { + my($self, $displayOnOddPages) = @_; + + $self->{parameters}{"header_display_on_odd_pages"} = $self->SUPER::serializeBoolean($displayOnOddPages); + return $self; +} + +=head2 setHeaderDisplayOnEvenPages( $displayOnEvenPages ) + +Control the visibility of the header on the even numbered pages of the generated pdf document. The default value is True. + +Parameters: + +- $displayOnEvenPages: Display header on even pages or not. + +Returns: + +- Reference to the current object. +=cut +sub setHeaderDisplayOnEvenPages($) { + my($self, $displayOnEvenPages) = @_; + + $self->{parameters}{"header_display_on_even_pages"} = $self->SUPER::serializeBoolean($displayOnEvenPages); + return $self; +} + +=head2 setHeaderWebPageWidth( $headerWebPageWidth ) + +Set the width in pixels used by the converter's internal browser window during the conversion of the header content. The default value is 1024px. + +Parameters: + +- $headerWebPageWidth: Browser window width in pixels. + +Returns: + +- Reference to the current object. +=cut +sub setHeaderWebPageWidth($) { + my($self, $headerWebPageWidth) = @_; + + $self->{parameters}{"header_web_page_width"} = $headerWebPageWidth; + return $self; +} + +=head2 setHeaderWebPageHeight( $headerWebPageHeight ) + +Set the height in pixels used by the converter's internal browser window during the conversion of the header content. +The default value is 0px and it means that the page height is automatically calculated by the converter. + +Parameters: + +- $headerWebPageHeight: Browser window height in pixels. Set it to 0px to automatically calculate page height. + +Returns: + +- Reference to the current object. +=cut +sub setHeaderWebPageHeight($) { + my($self, $headerWebPageHeight) = @_; + + $self->{parameters}{"header_web_page_height"} = $headerWebPageHeight; + return $self; +} + + + +=head2 setShowFooter( $showFooter ) + +Control if a custom footer is displayed in the generated PDF document. The default value is False. + +Parameters: + +- $showFooter: Show footer or not. + +Returns: + +- Reference to the current object. +=cut +sub setShowFooter($) { + my($self, $showFooter) = @_; + + $self->{parameters}{"show_footer"} = $self->SUPER::serializeBoolean($showFooter); + return $self; +} + +=head2 setFooterHeight( $height ) + +The height of the pdf document footer. This height is specified in points. 1 point is 1/72 inch. The default value is 50. + +Parameters: + +- $height: Footer height. + +Returns: + +- Reference to the current object. +=cut +sub setFooterHeight($) { + my($self, $height) = @_; + + $self->{parameters}{"footer_height"} = $height; + return $self; +} + +=head2 setFooterUrl( $url ) + +Set the url of the web page that is converted and rendered in the PDF document footer. + +Parameters: + +- $url: The url of the web page that is converted and rendered in the pdf document footer. + +Returns: + +- Reference to the current object. +=cut +sub setFooterUrl($) { + my($self, $url) = @_; + + $self->{parameters}{"footer_url"} = $url; + return $self; +} + +=head2 setFooterHtml( $html ) + +Set the raw html that is converted and rendered in the pdf document footer. + +Parameters: + +- $html: The raw html that is converted and rendered in the pdf document footer. + +Returns: + +- Reference to the current object. +=cut +sub setFooterHtml($) { + my($self, $html) = @_; + + $self->{parameters}{"footer_html"} = $html; + return $self; +} + +=head2 setFooterBaseUrl( $baseUrl ) + +Set an optional base url parameter can be used together with the footer HTML to resolve relative paths from the html string. + +Parameters: + +- $baseUrl: Footer base url. + +Returns: + +- Reference to the current object. +=cut +sub setFooterBaseUrl($) { + my($self, $baseUrl) = @_; + + $self->{parameters}{"footer_base_url"} = $baseUrl; + return $self; +} + +=head2 setFooterDisplayOnFirstPage( $displayOnFirstPage ) + +Control the visibility of the footer on the first page of the generated pdf document. The default value is True. + +Parameters: + +- $displayOnFirstPage: Display footer on the first page or not. + +Returns: + +- Reference to the current object. +=cut +sub setFooterDisplayOnFirstPage($) { + my($self, $displayOnFirstPage) = @_; + + $self->{parameters}{"footer_display_on_first_page"} = $self->SUPER::serializeBoolean($displayOnFirstPage); + return $self; +} + +=head2 setFooterDisplayOnOddPages( $displayOnOddPages ) + +Control the visibility of the footer on the odd numbered pages of the generated pdf document. The default value is True. + +Parameters: + +- $displayOnOddPages: Display footer on odd pages or not. + +Returns: + +- Reference to the current object. +=cut +sub setFooterDisplayOnOddPages($) { + my($self, $displayOnOddPages) = @_; + + $self->{parameters}{"footer_display_on_odd_pages"} = $self->SUPER::serializeBoolean($displayOnOddPages); + return $self; +} + +=head2 setFooterDisplayOnEvenPages( $displayOnEvenPages ) + +Control the visibility of the footer on the even numbered pages of the generated pdf document. The default value is True. + +Parameters: + +- $displayOnEvenPages: Display footer on even pages or not. + +Returns: + +- Reference to the current object. +=cut +sub setFooterDisplayOnEvenPages($) { + my($self, $displayOnEvenPages) = @_; + + $self->{parameters}{"footer_display_on_even_pages"} = $self->SUPER::serializeBoolean($displayOnEvenPages); + return $self; +} + +=head2 setFooterDisplayOnLastPage( $displayOnLastPage ) + +Add a special footer on the last page of the generated pdf document only. The default value is False. +Use setFooterUrl or setFooterHtml and setFooterBaseUrl to specify the content of the last page footer. +Use setFooterHeight to specify the height of the special last page footer. + +Parameters: + +- $displayOnLastPage: Display special footer on the last page or not. + +Returns: + +- Reference to the current object. +=cut +sub setFooterDisplayOnLastPage($) { + my($self, $displayOnLastPage) = @_; + + $self->{parameters}{"footer_display_on_last_page"} = $self->SUPER::serializeBoolean($displayOnLastPage); + return $self; +} + +=head2 setFooterWebPageWidth( $footerWebPageWidth ) + +Set the width in pixels used by the converter's internal browser window during the conversion of the footer content. The default value is 1024px. + +Parameters: + +- $footerWebPageWidth: Browser window width in pixels. + +Returns: + +- Reference to the current object. +=cut +sub setFooterWebPageWidth($) { + my($self, $footerWebPageWidth) = @_; + + $self->{parameters}{"footer_web_page_width"} = $footerWebPageWidth; + return $self; +} + +=head2 setFooterWebPageHeight( $footerWebPageHeight ) + +Set the height in pixels used by the converter's internal browser window during the conversion of the footer content. +The default value is 0px and it means that the page height is automatically calculated by the converter. + +Parameters: + +- $footerWebPageHeight: Browser window height in pixels. Set it to 0px to automatically calculate page height. + +Returns: + +- Reference to the current object. +=cut +sub setFooterWebPageHeight($) { + my($self, $footerWebPageHeight) = @_; + + $self->{parameters}{"footer_web_page_height"} = $footerWebPageHeight; + return $self; +} + + +=head2 setShowPageNumbers( $showPageNumbers ) + +Show page numbers. Default value is True. Page numbers will be displayed in the footer of the PDF document. + +Parameters: + +- $showPageNumbers: Show page numbers or not. + +Returns: + +- Reference to the current object. +=cut +sub setShowPageNumbers($) { + my($self, $showPageNumbers) = @_; + + $self->{parameters}{"page_numbers"} = $self->SUPER::serializeBoolean($showPageNumbers); + return $self; +} + +=head2 setPageNumbersFirst( $firstPageNumber ) + +Control the page number for the first page being rendered. The default value is 1. + +Parameters: + +- $firstPageNumber: First page number. + +Returns: + +- Reference to the current object. +=cut +sub setPageNumbersFirst($) { + my($self, $firstPageNumber) = @_; + + $self->{parameters}{"page_numbers_first"} = $firstPageNumber; + return $self; +} + +=head2 setPageNumbersOffset( $totalPagesOffset ) + +Control the total number of pages offset in the generated pdf document. The default value is 0. + +Parameters: + +- $totalPagesOffset: Offset for the total number of pages in the generated pdf document. + +Returns: + +- Reference to the current object. +=cut +sub setPageNumbersOffset($) { + my($self, $totalPagesOffset) = @_; + + $self->{parameters}{"page_numbers_offset"} = $totalPagesOffset; + return $self; +} + +=head2 setPageNumbersTemplate( $template ) + +Set the text that is used to display the page numbers. It can contain the placeholder {page_number} for the current page number and {total_pages} +for the total number of pages. The default value is "Page: {page_number} of {total_pages}". + +Parameters: + +- $template: Page numbers template. + +Returns: + +- Reference to the current object. +=cut +sub setPageNumbersTemplate($) { + my($self, $template) = @_; + + $self->{parameters}{"page_numbers_template"} = $template; + return $self; +} + +=head2 setPageNumbersFontName( $fontName ) + +Set the font used to display the page numbers text. The default value is "Helvetica". + +Parameters: + +- $fontName: The font used to display the page numbers text. + +Returns: + +- Reference to the current object. +=cut +sub setPageNumbersFontName($) { + my($self, $fontName) = @_; + + $self->{parameters}{"page_numbers_font_name"} = $fontName; + return $self; +} + +=head2 setPageNumbersFontSize( $fontSize ) + +Set the size of the font used to display the page numbers. The default value is 10 points. + +Parameters: + +- $fontSize: The size in points of the font used to display the page numbers. + +Returns: + +- Reference to the current object. +=cut +sub setPageNumbersFontSize($) { + my($self, $fontSize) = @_; + + $self->{parameters}{"page_numbers_font_size"} = $fontSize; + return $self; +} + +=head2 setPageNumbersAlignment( $alignment ) + +Set the alignment of the page numbers text. The default value is "2" - Right. + +Parameters: + +- $alignment: The alignment of the page numbers text. Possible values: 1 (Left), 2 (Center), 3 (Right). + +Returns: + +- Reference to the current object. +=cut +sub setPageNumbersAlignment($) { + my($self, $alignment) = @_; + + if ($alignment ne 1 and $alignment ne 2 and $alignment ne 3) { + die ("Allowed values for Page Numbers Alignment: 1 (Left), 2 (Center), 3 (Right)."); + } + + $self->{parameters}{"page_numbers_alignment"} = $alignment; + return $self; +} + +=head2 setPageNumbersColor( $color ) + +Specify the color of the page numbers text in #RRGGBB html format. The default value is #333333. + +Parameters: + +- $color: Page numbers color. + +Returns: + +- Reference to the current object. +=cut +sub setPageNumbersColor($) { + my($self, $color) = @_; + + if ($color !~ m/^#?[0-9a-fA-F]{6}$/) { + die ("Color value must be in #RRGGBB format."); + } + + $self->{parameters}{"page_numbers_color"} = $color; + return $self; +} + +=head2 setPageNumbersVerticalPosition( $position ) + +Specify the position in points on the vertical where the page numbers text is displayed in the footer. The default value is 10 points. + +Parameters: + +- $position: Page numbers Y position in points. + +Returns: + +- Reference to the current object. +=cut +sub setPageNumbersVerticalPosition($) { + my($self, $position) = @_; + + $self->{parameters}{"page_numbers_pos_y"} = $position; + return $self; +} + +=head2 setPdfBookmarksSelectors( $selectors ) + +Generate automatic bookmarks in pdf. The elements that will be bookmarked are defined using CSS selectors. +For example, the selector for all the H1 elements is "H1", the selector for all the elements with the CSS class name 'myclass' is "*.myclass" and +the selector for the elements with the id 'myid' is "*#myid". +Read more about CSS selectors here. + +Parameters: + +- $selectors: CSS selectors used to identify HTML elements, comma separated. + +Returns: + +- Reference to the current object. +=cut +sub setPdfBookmarksSelectors($) { + my($self, $selectors) = @_; + + $self->{parameters}{"pdf_bookmarks_selectors"} = $selectors; + return $self; +} + +=head2 setPdfHideElements( $selectors ) + +Exclude page elements from the conversion. The elements that will be excluded are defined using CSS selectors. +For example, the selector for all the H1 elements is "H1", the selector for all the elements with the CSS class name 'myclass' is "*.myclass" and +the selector for the elements with the id 'myid' is "*#myid". +Read more about CSS selectors here. + +Parameters: + +- $selectors: CSS selectors used to identify HTML elements, comma separated. + +Returns: + +- Reference to the current object. +=cut +sub setPdfHideElements($) { + my($self, $selectors) = @_; + + $self->{parameters}{"pdf_hide_elements"} = $selectors; + return $self; +} + +=head2 setPdfShowOnlyElementID( $elementID ) + +Convert only a specific section of the web page to pdf. The section that will be converted to pdf is specified by the html element ID. +The element can be anything (image, table, table row, div, text, etc). + +Parameters: + +- $elementID: HTML element ID. + +Returns: + +- Reference to the current object. +=cut +sub setPdfShowOnlyElementID($) { + my($self, $elementID) = @_; + + $self->{parameters}{"pdf_show_only_element_id"} = $elementID; + return $self; +} + +=head2 setPdfWebElementsSelectors( $selectors ) + +Get the locations of page elements from the conversion. The elements that will have their locations retrieved are defined using CSS selectors. +For example, the selector for all the H1 elements is "H1", the selector for all the elements with the CSS class name 'myclass' is "*.myclass" and +the selector for the elements with the id 'myid' is "*#myid". +Read more about CSS selectors here. + +Parameters: + +- $selectors: CSS selectors used to identify HTML elements, comma separated. + +Returns: + +- Reference to the current object. +=cut +sub setPdfWebElementsSelectors($) { + my($self, $selectors) = @_; + + $self->{parameters}{"pdf_web_elements_selectors"} = $selectors; + return $self; +} + +=head2 setStartupMode( $startupMode ) + +Set converter startup mode. The default value is Automatic and the conversion is started immediately. +By default this is set to Automatic and the conversion is started as soon as the page loads (and conversion delay set with setConversionDelay elapses). +If set to Manual, the conversion is started only by a javascript call to SelectPdf.startConversion() from within the web page. + +Parameters: + +- $startupMode: Converter startup mode. Possible values: Automatic, Manual. + +Returns: + +- Reference to the current object. +=cut +sub setStartupMode($) { + my($self, $startupMode) = @_; + + if ($startupMode !~ m/^(Automatic|Manual)$/i) { + die ("Allowed values for Startup Mode: Automatic, Manual."); + } + + $self->{parameters}{"startup_mode"} = $startupMode; + return $self; +} + +=head2 setSkipDecoding( $skipDecoding ) + +Internal use only. + +Parameters: + +- $skipDecoding: The default value is True. + +Returns: + +- Reference to the current object. +=cut +sub setSkipDecoding($) { + my($self, $skipDecoding) = @_; + + $self->{parameters}{"skip_decoding"} = $self->SUPER::serializeBoolean($skipDecoding); + return $self; +} + +=head2 setScaleImages( $scaleImages ) + +Set a flag indicating if the images from the page are scaled during the conversion process. The default value is False and images are not scaled. + +Parameters: + +- $scaleImages: Scale images or not. + +Returns: + +- Reference to the current object. +=cut +sub setScaleImages($) { + my($self, $scaleImages) = @_; + + $self->{parameters}{"scale_images"} = $self->SUPER::serializeBoolean($scaleImages); + return $self; +} + +=head2 setSinglePagePdf( $generateSinglePagePdf ) + +Generate a single page PDF. The converter will automatically resize the PDF page to fit all the content in a single page. +The default value of this property is False and the PDF will contain several pages if the content is large. + +Parameters: + +- $generateSinglePagePdf: Generate a single page PDF or not. + +Returns: + +- Reference to the current object. +=cut +sub setSinglePagePdf($) { + my($self, $generateSinglePagePdf) = @_; + + $self->{parameters}{"single_page_pdf"} = $self->SUPER::serializeBoolean($generateSinglePagePdf); + return $self; +} + +=head2 setPageBreaksEnhancedAlgorithm( $enableEnhancedPageBreaksAlgorithm ) + +Get or set a flag indicating if an enhanced custom page breaks algorithm is used. +The enhanced algorithm is a little bit slower but it will prevent the appearance of hidden text in the PDF when custom page breaks are used. +The default value for this property is False. + +Parameters: + +- $enableEnhancedPageBreaksAlgorithm: Enable enhanced page breaks algorithm or not. + +Returns: + +- Reference to the current object. +=cut +sub setPageBreaksEnhancedAlgorithm($) { + my($self, $enableEnhancedPageBreaksAlgorithm) = @_; + + $self->{parameters}{"page_breaks_enhanced_algorithm"} = $self->SUPER::serializeBoolean($enableEnhancedPageBreaksAlgorithm); + return $self; +} + +=head2 setCookies( $cookies ) + +Set HTTP cookies for the web page being converted. + +Parameters: + +- $cookies: Dictionary with HTTP cookies that will be sent to the page being converted. + +Returns: + +- Reference to the current object. +=cut +sub setCookies($) { + my($self, $cookies) = @_; + + my $url = URI->new('', 'http'); + $url->query_form(%$cookies); + my $cookiesString = $url->query; + + $self->{parameters}{"cookies_string"} = $cookiesString; + return $self; +} + +=head2 setCustomParameter( $parameterName, $parameterValue ) + +Set a custom parameter. Do not use this method unless advised by SelectPdf. + +Parameters: + +- $parameterName: Parameter name. + +- $parameterValue: Parameter value. + +Returns: + +- Reference to the current object. +=cut +sub setCustomParameter($,$) { + my($self, $parameterName, $parameterValue) = @_; + + $self->{parameters}{$parameterName} = $parameterValue; + return $self; +} + +=head2 getWebElements + +Get the locations of certain web elements. This is retrieved if pdf_web_elements_selectors parameter is set and elements were found to match the selectors. + +Returns: + +- Json with web elements locations. +=cut +sub getWebElements { + my($self) = @_; + + my $webElementsClient = SelectPdf::WebElementsClient->new($self->{parameters}{"key"}, $self->{jobId}); + $webElementsClient->setApiEndpoint($self->{apiWebElementsEndpoint}); + + return $webElementsClient->getWebElements(); +} + + + +1; \ No newline at end of file diff --git a/lib/SelectPdf/PdfMergeClient.pm b/lib/SelectPdf/PdfMergeClient.pm new file mode 100644 index 0000000..b8a65da --- /dev/null +++ b/lib/SelectPdf/PdfMergeClient.pm @@ -0,0 +1,673 @@ +package SelectPdf::PdfMergeClient; + +use JSON; +use SelectPdf::ApiClient; +use SelectPdf::AsyncJobClient; +use strict; +our @ISA = qw(SelectPdf::ApiClient); + +=head1 NAME + +SelectPdf::PdfMergeClient - Pdf Merge with SelectPdf Online API. + +=head1 SYNOPSIS + +Merge PDFs from local disk or public url and save result into a file on disk. + + use SelectPdf; + print "This is SelectPdf-$SelectPdf::VERSION\n"; + + my $test_url = "https://selectpdf.com/demo/files/selectpdf.pdf"; + my $test_pdf = "Input.pdf"; + my $local_file = "Result.pdf"; + my $apiKey = "Your API key here"; + + eval { + my $client = new SelectPdf::PdfMergeClient($apiKey); + + # set parameters - see full list at https://selectpdf.com/pdf-merge-api/ + $client + # specify the pdf files that will be merged (order will be preserved in the final pdf) + + ->addFile($test_pdf) # add PDF from local file + ->addUrlFile($test_url) # add PDF From public url + #->addFileWithPassword($test_pdf, "pdf_password") # add PDF (that requires a password) from local file + #->addUrlFileWithPassword($test_url, "pdf_password") # add PDF (that requires a password) from public url + ; + + print "Starting pdf merge ...\n"; + + # merge pdfs to local file + $client->saveToFile($local_file); + + # merge pdfs to memory + # my $pdf = $client->save(); + + print "Finished! Number of pages: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(0); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); + }; + + if ($@) { + print "An error occurred: $@\n"; + } + +For more details and full list of parameters see L. + +=head1 METHODS + +=head2 new( $apiKey ) + +Construct the Pdf Merge Client. + + my $client = SelectPdf::PdfMergeClient->new($apiKey); + +Parameters: + +- $apiKey: API Key. +=cut +sub new { + my $type = shift; + my $self = $type->SUPER::new; + + # API endpoint + $self->{apiEndpoint} = "https://selectpdf.com/api2/pdfmerge/"; + + $self->{fileIdx} = 0; + + $self->{parameters}{"key"} = shift; + + bless $self, $type; + return $self; +} + +=head2 addFile( $inputPdf ) + +Add local PDF document to the list of input files. + + my $client = new SelectPdf::PdfMergeClient($apiKey); + $client->addFile($inputPdf); + +Parameters: + +- $inputPdf: Path to a local PDF file. + +Returns: + +- Reference to the current object. +=cut +sub addFile($) { + my($self, $inputPdf) = @_; + + $self->{fileIdx} = $self->{fileIdx} + 1; + + $self->{files}{"file_" . $self->{fileIdx}} = $inputPdf; + $self->{parameters}{"url_" . $self->{fileIdx}} = ""; + $self->{parameters}{"password_" . $self->{fileIdx}} = ""; + + return $self; +} + +=head2 addFileWithPassword( $inputPdf, $userPassword ) + +Add local PDF document to the list of input files. + + my $client = new SelectPdf::PdfMergeClient($apiKey); + $client->addFileWithPassword($inputPdf, $userPassword); + +Parameters: + +- $inputPdf: Path to a local PDF file. + +- $userPassword: User password for the PDF document. + +Returns: + +- Reference to the current object. +=cut +sub addFileWithPassword($,$) { + my($self, $inputPdf, $userPassword) = @_; + + $self->{fileIdx} = $self->{fileIdx} + 1; + + $self->{files}{"file_" . $self->{fileIdx}} = $inputPdf; + $self->{parameters}{"url_" . $self->{fileIdx}} = ""; + $self->{parameters}{"password_" . $self->{fileIdx}} = $userPassword; + + return $self; +} + +=head2 addUrlFile( $inputUrl ) + +Add remote PDF document to the list of input files. + + my $client = new SelectPdf::PdfMergeClient($apiKey); + $client->addUrlFile($inputUrl); + +Parameters: + +- $inputUrl: Url of a remote PDF file. + +Returns: + +- Reference to the current object. +=cut +sub addUrlFile($) { + my($self, $inputUrl) = @_; + + $self->{fileIdx} = $self->{fileIdx} + 1; + + $self->{files}{"file_" . $self->{fileIdx}} = ""; + $self->{parameters}{"url_" . $self->{fileIdx}} = $inputUrl; + $self->{parameters}{"password_" . $self->{fileIdx}} = ""; + + return $self; +} + +=head2 addUrlFileWithPassword( $inputUrl, $userPassword ) + +Add remote PDF document to the list of input files. + + my $client = new SelectPdf::PdfMergeClient($apiKey); + $client->addUrlFileWithPassword($inputUrl, $userPassword); + +Parameters: + +- $inputUrl: Url of a remote PDF file. + +- $userPassword: User password for the PDF document. + +Returns: + +- Reference to the current object. +=cut +sub addUrlFileWithPassword($,$) { + my($self, $inputUrl, $userPassword) = @_; + + $self->{fileIdx} = $self->{fileIdx} + 1; + + $self->{files}{"file_" . $self->{fileIdx}} = ""; + $self->{parameters}{"url_" . $self->{fileIdx}} = $inputUrl; + $self->{parameters}{"password_" . $self->{fileIdx}} = $userPassword; + + return $self; +} + +=head2 save + +Merge all specified input pdfs and return the resulted PDF. + + my $client = new SelectPdf::PdfMergeClient($apiKey); + $client->addFile($inputPdf1); + $client->addFile($inputPdf2); + $content = $client->save(); + +Returns: + +- Byte array containing the resulted PDF. +=cut +sub save() { + my($self) = @_; + + $self->{parameters}{"async"} = "False"; + $self->{parameters}{"files_no"} = $self->{fileIdx}; + + my $result = $self->SUPER::performPostAsMultipartFormData(); + + $self->{fileIdx} = 0; + $self->{files} = {}; + + return $result; +} + +=head2 saveToFile( $filePath ) + +Merge all specified input pdfs and writes the resulted PDF to a local file. + + my $client = new SelectPdf::PdfMergeClient($apiKey); + $client->addFile($inputPdf1); + $client->addFile($inputPdf2); + $client->saveToFile($filePath); + +Parameters: + +- $filePath: Local file including path if necessary. + +Returns: + +- Byte array containing the resulted PDF. +=cut +sub saveToFile($) { + my($self, $filePath) = @_; + + my $result = $self->save(); + + my $file = IO::File->new( $filePath, '>' ) or die "Unable to open output file - $!\n"; + $file->binmode; + $file->print( $result ); + $file->close; +} + +=head2 saveAsync + +Merge all specified input pdfs and return the resulted PDF. + + my $client = new SelectPdf::PdfMergeClient($apiKey); + $client->addFile($inputPdf1); + $client->addFile($inputPdf2); + $content = $client->saveAsync(); + +Returns: + +- Byte array containing the resulted PDF. An asynchronous call is used. +=cut +sub saveAsync() { + my($self) = @_; + + $self->{parameters}{"files_no"} = $self->{fileIdx}; + + my $JobID = $self->SUPER::startAsyncJobMultipartFormData() or die "An error occurred launching the asynchronous call."; + + my $noPings = 0; + + do + { + $noPings++; + + # sleep for a few seconds before next ping + sleep($self->{AsyncCallsPingInterval}); + + my $asyncJobClient = new SelectPdf::AsyncJobClient($self->{parameters}{"key"}, $JobID); + $asyncJobClient->setApiEndpoint($self->{apiAsyncEndpoint}); + + my $result = $asyncJobClient->getResult(); + + if ($asyncJobClient->finished) + { + $self->{numberOfPages} = $asyncJobClient->getNumberOfPages(); + + $self->{fileIdx} = 0; + $self->{files} = {}; + + return $result; + } + + } while ($noPings <= $self->{AsyncCallsMaxPings}); + + $self->{fileIdx} = 0; + $self->{files} = {}; + + die "Asynchronous call did not finish in expected timeframe."; + +} + +=head2 saveToFileAsync( $filePath ) + +Merge all specified input pdfs and writes the resulted PDF to a local file. An asynchronous call is used. + + my $client = new SelectPdf::PdfMergeClient($apiKey); + $client->addFile($inputPdf1); + $client->addFile($inputPdf2); + $client->saveToFileAsync($filePath); + +Parameters: + +- $filePath: Local file including path if necessary. + +Returns: + +- Byte array containing the resulted PDF. +=cut +sub saveToFileAsync($) { + my($self, $filePath) = @_; + + my $result = $self->saveAsync(); + + my $file = IO::File->new( $filePath, '>' ) or die "Unable to open output file - $!\n"; + $file->binmode; + $file->print( $result ); + $file->close; +} + + +=head2 setDocTitle( $docTitle ) + +Set the PDF document title. + +Parameters: + +- $docTitle: Document title. + +Returns: + +- Reference to the current object. +=cut +sub setDocTitle($) { + my($self, $docTitle) = @_; + + $self->{parameters}{"doc_title"} = $docTitle; + return $self; +} + +=head2 setDocSubject( $docSubject ) + +Set the PDF document subject. + +Parameters: + +- $docSubject: Document subject. + +Returns: + +- Reference to the current object. +=cut +sub setDocSubject($) { + my($self, $docSubject) = @_; + + $self->{parameters}{"doc_subject"} = $docSubject; + return $self; +} + +=head2 setDocKeywords( $docKeywords ) + +Set the PDF document keywords. + +Parameters: + +- $docKeywords: Document keywords. + +Returns: + +- Reference to the current object. +=cut +sub setDocKeywords($) { + my($self, $docKeywords) = @_; + + $self->{parameters}{"doc_keywords"} = $docKeywords; + return $self; +} + +=head2 setDocAuthor( $docAuthor ) + +Set the PDF document author. + +Parameters: + +- $docAuthor: Document author. + +Returns: + +- Reference to the current object. +=cut +sub setDocAuthor($) { + my($self, $docAuthor) = @_; + + $self->{parameters}{"doc_author"} = $docAuthor; + return $self; +} + +=head2 setDocAddCreationDate( $docAddCreationDate ) + +Add the date and time when the PDF document was created to the PDF document information. The default value is False. + +Parameters: + +- $docAddCreationDate: Add creation date to the document metadata or not. + +Returns: + +- Reference to the current object. +=cut +sub setDocAddCreationDate($) { + my($self, $docAddCreationDate) = @_; + + $self->{parameters}{"doc_add_creation_date"} = $self->SUPER::serializeBoolean($docAddCreationDate); + return $self; +} + +=head2 setViewerPageLayout( $pageLayout ) + +Set the page layout to be used when the document is opened in a PDF viewer. The default value is 1 - OneColumn. + +Parameters: + +- $pageLayout: Page layout. Possible values: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right). + +Returns: + +- Reference to the current object. +=cut +sub setViewerPageLayout($) { + my($self, $pageLayout) = @_; + + if ($pageLayout ne 0 and $pageLayout ne 1 and $pageLayout ne 2 and $pageLayout ne 3) { + die ("Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right)."); + } + + $self->{parameters}{"viewer_page_layout"} = $pageLayout; + return $self; +} + +=head2 setViewerPageMode( $pageMode ) + +Set the document page mode when the pdf document is opened in a PDF viewer. The default value is 0 - UseNone. + +Parameters: + +- $pageMode: Page mode. Possible values: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments). + +Returns: + +- Reference to the current object. +=cut +sub setViewerPageMode($) { + my($self, $pageMode) = @_; + + if ($pageMode ne 0 and $pageMode ne 1 and $pageMode ne 2 and $pageMode ne 3 and $pageMode ne 4 and $pageMode ne 5) { + die ("Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments)."); + } + + $self->{parameters}{"viewer_page_mode"} = $pageMode; + return $self; +} + +=head2 setViewerCenterWindow( $viewerCenterWindow ) + +Set a flag specifying whether to position the document's window in the center of the screen. The default value is False. + +Parameters: + +- $viewerCenterWindow: Center window or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerCenterWindow($) { + my($self, $viewerCenterWindow) = @_; + + $self->{parameters}{"viewer_center_window"} = $self->SUPER::serializeBoolean($viewerCenterWindow); + return $self; +} + +=head2 setViewerDisplayDocTitle( $viewerDisplayDocTitle ) + +Set a flag specifying whether the window's title bar should display the document title taken from document information. The default value is False. + +Parameters: + +- $viewerDisplayDocTitle: Display title or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerDisplayDocTitle($) { + my($self, $viewerDisplayDocTitle) = @_; + + $self->{parameters}{"viewer_display_doc_title"} = $self->SUPER::serializeBoolean($viewerDisplayDocTitle); + return $self; +} + +=head2 setViewerFitWindow( $viewerFitWindow ) + +Set a flag specifying whether to resize the document's window to fit the size of the first displayed page. The default value is False. + +Parameters: + +- $viewerFitWindow: Fit window or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerFitWindow($) { + my($self, $viewerFitWindow) = @_; + + $self->{parameters}{"viewer_fit_window"} = $self->SUPER::serializeBoolean($viewerFitWindow); + return $self; +} + +=head2 setViewerHideMenuBar( $viewerHideMenuBar ) + +Set a flag specifying whether to hide the pdf viewer application's menu bar when the document is active. The default value is False. + +Parameters: + +- $viewerHideMenuBar: Hide menu bar or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerHideMenuBar($) { + my($self, $viewerHideMenuBar) = @_; + + $self->{parameters}{"viewer_hide_menu_bar"} = $self->SUPER::serializeBoolean($viewerHideMenuBar); + return $self; +} + +=head2 setViewerHideToolbar( $viewerHideToolbar ) + +Set a flag specifying whether to hide the pdf viewer application's tool bars when the document is active. The default value is False. + +Parameters: + +- $viewerHideToolbar: Hide tool bars or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerHideToolbar($) { + my($self, $viewerHideToolbar) = @_; + + $self->{parameters}{"viewer_hide_toolbar"} = $self->SUPER::serializeBoolean($viewerHideToolbar); + return $self; +} + +=head2 setViewerHideWindowUI( $viewerHideWindowUI ) + +Set a flag specifying whether to hide user interface elements in the document's window (such as scroll bars and navigation controls), +leaving only the document's contents displayed. + +Parameters: + +- $viewerHideWindowUI: Hide window UI or not. + +Returns: + +- Reference to the current object. +=cut +sub setViewerHideWindowUI($) { + my($self, $viewerHideWindowUI) = @_; + + $self->{parameters}{"viewer_hide_window_ui"} = $self->SUPER::serializeBoolean($viewerHideWindowUI); + return $self; +} + +=head2 setUserPassword( $userPassword ) + +Set PDF user password. + +Parameters: + +- $userPassword: PDF user password. + +Returns: + +- Reference to the current object. +=cut +sub setUserPassword($) { + my($self, $userPassword) = @_; + + $self->{parameters}{"user_password"} = $userPassword; + return $self; +} + +=head2 setOwnerPassword( $ownerPassword ) + +Set PDF owner password. + +Parameters: + +- $ownerPassword: PDF owner password. + +Returns: + +- Reference to the current object. +=cut +sub setOwnerPassword($) { + my($self, $ownerPassword) = @_; + + $self->{parameters}{"owner_password"} = $ownerPassword; + return $self; +} + +=head2 setCustomParameter( $parameterName, $parameterValue ) + +Set a custom parameter. Do not use this method unless advised by SelectPdf. + +Parameters: + +- $parameterName: Parameter name. + +- $parameterValue: Parameter value. + +Returns: + +- Reference to the current object. +=cut +sub setCustomParameter($,$) { + my($self, $parameterName, $parameterValue) = @_; + + $self->{parameters}{$parameterName} = $parameterValue; + return $self; +} + +=head2 setTimeout( $timeout ) + +Set the maximum amount of time (in seconds) for this job. +The default value is 30 seconds. +Use a larger value (up to 120 seconds allowed) for pages that take a long time to load. + +Parameters: + +- $timeout: Timeout in seconds. + +Returns: + +- Reference to the current object. +=cut +sub setTimeout($) { + my($self, $timeout) = @_; + + $self->{parameters}{"timeout"} = $timeout; + return $self; +} + +1; \ No newline at end of file diff --git a/lib/SelectPdf/PdfToTextClient.pm b/lib/SelectPdf/PdfToTextClient.pm new file mode 100644 index 0000000..9f8a215 --- /dev/null +++ b/lib/SelectPdf/PdfToTextClient.pm @@ -0,0 +1,801 @@ +package SelectPdf::PdfToTextClient; + +use JSON; +use SelectPdf::ApiClient; +use SelectPdf::AsyncJobClient; +use strict; +our @ISA = qw(SelectPdf::ApiClient); + +=head1 NAME + +SelectPdf::PdfToTextClient - Pdf To Text Conversion with SelectPdf Online API. Extract text from PDF. Search PDF. + +=head1 SYNOPSIS + +Extract text from PDF + + use JSON; + use SelectPdf; + + print "This is SelectPdf-$SelectPdf::VERSION.\n"; + + my $test_url = "https://selectpdf.com/demo/files/selectpdf.pdf"; + my $test_pdf = "Input.pdf"; + my $local_file = "Test.txt"; + my $apiKey = "Your API key here"; + + eval { + my $client = new SelectPdf::PdfToTextClient($apiKey); + + print "Starting pdf to text ...\n"; + + # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/ + $client + ->setStartPage(1) # start page (processing starts from here) + ->setEndPage(0) # end page (set 0 to process file til the end) + ->setOutputFormat(0) # set output format - 0 (Text), 1 (Html) + ; + + # convert local pdf to local text file + $client->getTextFromFileToFile($test_pdf, $local_file); + + # extract text from local pdf to memory + # my $text = $client->getTextFromFile($test_pdf); + # print $text; + + # convert pdf from public url to local text file + # $client->getTextFromUrlToFile($test_url, $local_file); + + # extract text from pdf from public url to memory + # my $text = $client->getTextFromUrl($test_url); + # print $text; + + print "Finished! Number of pages processed: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(0); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); + }; + + if ($@) { + print "An error occurred: $@\n"; + } + +Search PDF + + use JSON; + use SelectPdf; + + print "This is SelectPdf-$SelectPdf::VERSION.\n"; + + my $test_url = "https://selectpdf.com/demo/files/selectpdf.pdf"; + my $test_pdf = "Input.pdf"; + my $apiKey = "Your API key here"; + + eval { + my $client = new SelectPdf::PdfToTextClient($apiKey); + + print "Starting search pdf ...\n"; + + # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/ + $client + ->setStartPage(1) # start page (processing starts from here) + ->setEndPage(0) # end page (set 0 to process file til the end) + ->setOutputFormat(0) # set output format - 0 (Text), 1 (Html) + ; + + # search local pdf + my $results = $client->searchFile($test_pdf, "pdf", "True", "True"); + + # search pdf from public url + # my $results = $client->searchUrl($test_url, "pdf", "True", "True"); + + my $count = keys @{$results}; + print("Number of search results: " . $count . "\n"); + print("Results: " . encode_json($results) . "\n"); + + print "Finished! Number of pages processed: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(0); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); + }; + + if ($@) { + print "An error occurred: $@\n"; + } + +For more details and full list of parameters see L. + +=head1 METHODS + +=head2 new( $apiKey ) + +Construct the Pdf To Text Client. + + my $client = SelectPdf::PdfToTextClient->new($apiKey); + +Parameters: + +- $apiKey: API Key. +=cut +sub new { + my $type = shift; + my $self = $type->SUPER::new; + + # API endpoint + $self->{apiEndpoint} = "https://selectpdf.com/api2/pdftotext/"; + + $self->{fileIdx} = 0; + + $self->{parameters}{"key"} = shift; + + bless $self, $type; + return $self; +} + +=head2 getTextFromFile( $inputPdf ) + +Get the text from the specified pdf. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $text = $client->getTextFromFile($inputPdf); + +Parameters: + +- $inputPdf: Path to a local PDF file. + +Returns: + +- Extracted text. +=cut +sub getTextFromFile($) { + my($self, $inputPdf) = @_; + + $self->{parameters}{"async"} = "False"; + $self->{parameters}{"action"} = "Convert"; + $self->{parameters}{"url"} = ""; + + $self->{files} = {}; + $self->{files}{"inputPdf"} = $inputPdf; + + my $text = $self->SUPER::performPostAsMultipartFormData(); + $text =~ s/\r//g; + + return $text; +} + +=head2 getTextFromFileToFile( $inputPdf, $outputFilePath ) + +Get the text from the specified pdf and write it to the specified text file. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $client->getTextFromFileToFile($inputPdf, $outputFilePath); + +Parameters: + +- $inputPdf: Path to a local PDF file. + +- $outputFilePath: The output file where the resulted text will be written. + +=cut +sub getTextFromFileToFile($,$) { + my($self, $inputPdf, $outputFilePath) = @_; + + my $result = $self->getTextFromFile($inputPdf); + + my $file = IO::File->new( $outputFilePath, '>:encoding(UTF-8)' ) or die "Unable to open output file - $!\n"; + $file->print( $result ); + $file->close; +} + +=head2 getTextFromFileAsync( $inputPdf ) + +Get the text from the specified pdf with an asynchronous call. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $text = $client->getTextFromFileAsync($inputPdf); + +Parameters: + +- $inputPdf: Path to a local PDF file. + +Returns: + +- Extracted text. +=cut +sub getTextFromFileAsync($) { + my($self, $inputPdf) = @_; + + $self->{parameters}{"action"} = "Convert"; + $self->{parameters}{"url"} = ""; + + $self->{files} = {}; + $self->{files}{"inputPdf"} = $inputPdf; + + my $JobID = $self->SUPER::startAsyncJobMultipartFormData() or die "An error occurred launching the asynchronous call."; + + my $noPings = 0; + + do + { + $noPings++; + + # sleep for a few seconds before next ping + sleep($self->{AsyncCallsPingInterval}); + + my $asyncJobClient = new SelectPdf::AsyncJobClient($self->{parameters}{"key"}, $JobID); + $asyncJobClient->setApiEndpoint($self->{apiAsyncEndpoint}); + + my $text = $asyncJobClient->getResult(); + + if ($asyncJobClient->finished) + { + $self->{numberOfPages} = $asyncJobClient->getNumberOfPages(); + + $text =~ s/\r//g; + return $text; + } + + } while ($noPings <= $self->{AsyncCallsMaxPings}); + + die "Asynchronous call did not finish in expected timeframe."; +} + +=head2 getTextFromFileToFileAsync( $inputPdf, $outputFilePath ) + +Get the text from the specified pdf with an asynchronous call and write it to the specified text file. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $client->getTextFromFileToFileAsync($inputPdf, $outputFilePath); + +Parameters: + +- $inputPdf: Path to a local PDF file. + +- $outputFilePath: The output file where the resulted text will be written. + +=cut +sub getTextFromFileToFileAsync($,$) { + my($self, $inputPdf, $outputFilePath) = @_; + + my $result = $self->getTextFromFileAsync($inputPdf); + + my $file = IO::File->new( $outputFilePath, '>:encoding(UTF-8)' ) or die "Unable to open output file - $!\n"; + $file->print( $result ); + $file->close; +} + + +=head2 getTextFromUrl( $url ) + +Get the text from the specified pdf. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $text = $client->getTextFromUrl($url); + +Parameters: + +- $url: Address of the PDF file. + +Returns: + +- Extracted text. +=cut +sub getTextFromUrl($) { + my($self, $url) = @_; + + $self->{parameters}{"async"} = "False"; + $self->{parameters}{"action"} = "Convert"; + $self->{parameters}{"url"} = $url; + + $self->{files} = {}; + + my $text = $self->SUPER::performPostAsMultipartFormData(); + $text =~ s/\r//g; + + return $text; +} + +=head2 getTextFromUrlToFile( $url, $outputFilePath ) + +Get the text from the specified pdf and write it to the specified text file. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $client->getTextFromUrlToFile($url, $outputFilePath); + +Parameters: + +- $url: Address of the PDF file. + +- $outputFilePath: The output file where the resulted text will be written. + +=cut +sub getTextFromUrlToFile($,$) { + my($self, $url, $outputFilePath) = @_; + + my $result = $self->getTextFromUrl($url); + + my $file = IO::File->new( $outputFilePath, '>:encoding(UTF-8)' ) or die "Unable to open output file - $!\n"; + $file->print( $result ); + $file->close; +} + +=head2 getTextFromUrlAsync( $url ) + +Get the text from the specified pdf with an asynchronous call. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $text = $client->getTextFromUrlAsync($url); + +Parameters: + +- $url: Address of the PDF file. + +Returns: + +- Extracted text. +=cut +sub getTextFromUrlAsync($) { + my($self, $url) = @_; + + $self->{parameters}{"action"} = "Convert"; + $self->{parameters}{"url"} = $url; + + $self->{files} = {}; + + my $JobID = $self->SUPER::startAsyncJobMultipartFormData() or die "An error occurred launching the asynchronous call."; + + my $noPings = 0; + + do + { + $noPings++; + + # sleep for a few seconds before next ping + sleep($self->{AsyncCallsPingInterval}); + + my $asyncJobClient = new SelectPdf::AsyncJobClient($self->{parameters}{"key"}, $JobID); + $asyncJobClient->setApiEndpoint($self->{apiAsyncEndpoint}); + + my $text = $asyncJobClient->getResult(); + + if ($asyncJobClient->finished) + { + $self->{numberOfPages} = $asyncJobClient->getNumberOfPages(); + + $text =~ s/\r//g; + return $text; + } + + } while ($noPings <= $self->{AsyncCallsMaxPings}); + + die "Asynchronous call did not finish in expected timeframe."; +} + +=head2 getTextFromUrlToFileAsync( $url, $outputFilePath ) + +Get the text from the specified pdf with an asynchronous call and write it to the specified text file. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $client->getTextFromUrlToFileAsync($url, $outputFilePath); + +Parameters: + +- $url: Address of the PDF file. + +- $outputFilePath: The output file where the resulted text will be written. + +=cut +sub getTextFromUrlToFileAsync($,$) { + my($self, $url, $outputFilePath) = @_; + + my $result = $self->getTextFromUrlAsync($url); + + my $file = IO::File->new( $outputFilePath, '>:encoding(UTF-8)' ) or die "Unable to open output file - $!\n"; + $file->print( $result ); + $file->close; +} + + +=head2 searchFile( $inputPdf, $textToSearch, $caseSensitive, $wholeWordsOnly ) + +Search for a specific text in a PDF document. +Pages that participate to this operation are specified by setStartPage() and setEndPage() methods. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $results = $client->searchFile($inputPdf, $textToSearch); + +Parameters: + +- $inputPdf: Path to a local PDF file. + +- $textToSearch: Text to search. + +- $caseSensitive: If the search is case sensitive or not. + +- $wholeWordsOnly: If the search works on whole words or not. + +Returns: + +- List with text positions in the current PDF document. +=cut +sub searchFile($,$,$,$) { + my($self, $inputPdf, $textToSearch, $caseSensitive, $wholeWordsOnly) = @_; + + if (!$textToSearch) { + die ("Search text cannot be empty."); + } + + $self->{parameters}{"async"} = "False"; + $self->{parameters}{"action"} = "Search"; + $self->{parameters}{"url"} = ""; + $self->{parameters}{"search_text"} = $textToSearch; + $self->{parameters}{"case_sensitive"} = $self->SUPER::serializeBoolean($caseSensitive); + $self->{parameters}{"whole_words_only"} = $self->SUPER::serializeBoolean($wholeWordsOnly); + + $self->{files} = {}; + $self->{files}{"inputPdf"} = $inputPdf; + + $self->{headers}{"Accept"} = "text/json"; + + my $result = $self->SUPER::performPostAsMultipartFormData(); + + if ($result) { + return decode_json($result); + } + else { + return []; + } +} + +=head2 searchFileAsync( $inputPdf, $textToSearch, $caseSensitive, $wholeWordsOnly ) + +Search for a specific text in a PDF document with an asynchronous call. +Pages that participate to this operation are specified by setStartPage() and setEndPage() methods. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $results = $client->searchFileAsync($inputPdf, $textToSearch); + +Parameters: + +- $inputPdf: Path to a local PDF file. + +- $textToSearch: Text to search. + +- $caseSensitive: If the search is case sensitive or not. + +- $wholeWordsOnly: If the search works on whole words or not. + +Returns: + +- List with text positions in the current PDF document. +=cut +sub searchFileAsync($,$,$,$) { + my($self, $inputPdf, $textToSearch, $caseSensitive, $wholeWordsOnly) = @_; + + if (!$textToSearch) { + die ("Search text cannot be empty."); + } + + $self->{parameters}{"action"} = "Search"; + $self->{parameters}{"url"} = ""; + $self->{parameters}{"search_text"} = $textToSearch; + $self->{parameters}{"case_sensitive"} = $self->SUPER::serializeBoolean($caseSensitive); + $self->{parameters}{"whole_words_only"} = $self->SUPER::serializeBoolean($wholeWordsOnly); + + $self->{files} = {}; + $self->{files}{"inputPdf"} = $inputPdf; + + $self->{headers}{"Accept"} = "text/json"; + + my $JobID = $self->SUPER::startAsyncJobMultipartFormData() or die "An error occurred launching the asynchronous call."; + + my $noPings = 0; + + do + { + $noPings++; + + # sleep for a few seconds before next ping + sleep($self->{AsyncCallsPingInterval}); + + my $asyncJobClient = new SelectPdf::AsyncJobClient($self->{parameters}{"key"}, $JobID); + $asyncJobClient->setApiEndpoint($self->{apiAsyncEndpoint}); + + my $result = $asyncJobClient->getResult(); + + if ($asyncJobClient->finished) + { + $self->{numberOfPages} = $asyncJobClient->getNumberOfPages(); + + if ($result) { + return decode_json($result); + } + else { + return []; + } + } + + } while ($noPings <= $self->{AsyncCallsMaxPings}); + + die "Asynchronous call did not finish in expected timeframe."; + +} + +=head2 searchUrl( $url, $textToSearch, $caseSensitive, $wholeWordsOnly ) + +Search for a specific text in a PDF document. +Pages that participate to this operation are specified by setStartPage() and setEndPage() methods. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $results = $client->searchUrl($url, $textToSearch); + +Parameters: + +- $url: Address of the PDF file. + +- $textToSearch: Text to search. + +- $caseSensitive: If the search is case sensitive or not. + +- $wholeWordsOnly: If the search works on whole words or not. + +Returns: + +- List with text positions in the current PDF document. +=cut +sub searchUrl($,$,$,$) { + my($self, $url, $textToSearch, $caseSensitive, $wholeWordsOnly) = @_; + + if (!$textToSearch) { + die ("Search text cannot be empty."); + } + + $self->{parameters}{"async"} = "False"; + $self->{parameters}{"action"} = "Search"; + $self->{parameters}{"search_text"} = $textToSearch; + $self->{parameters}{"case_sensitive"} = $self->SUPER::serializeBoolean($caseSensitive); + $self->{parameters}{"whole_words_only"} = $self->SUPER::serializeBoolean($wholeWordsOnly); + + $self->{files} = {}; + $self->{parameters}{"url"} = $url; + + $self->{headers}{"Accept"} = "text/json"; + + my $result = $self->SUPER::performPostAsMultipartFormData(); + + if ($result) { + return decode_json($result); + } + else { + return []; + } +} + +=head2 searchUrlAsync( $url, $textToSearch, $caseSensitive, $wholeWordsOnly ) + +Search for a specific text in a PDF document with an asynchronous call. +Pages that participate to this operation are specified by setStartPage() and setEndPage() methods. + + my $client = new SelectPdf::PdfToTextClient($apiKey); + $results = $client->searchUrlAsync($url, $textToSearch); + +Parameters: + +- $url: Address of the PDF file. + +- $textToSearch: Text to search. + +- $caseSensitive: If the search is case sensitive or not. + +- $wholeWordsOnly: If the search works on whole words or not. + +Returns: + +- List with text positions in the current PDF document. +=cut +sub searchUrlAsync($,$,$,$) { + my($self, $url, $textToSearch, $caseSensitive, $wholeWordsOnly) = @_; + + if (!$textToSearch) { + die ("Search text cannot be empty."); + } + + $self->{parameters}{"action"} = "Search"; + $self->{parameters}{"search_text"} = $textToSearch; + $self->{parameters}{"case_sensitive"} = $self->SUPER::serializeBoolean($caseSensitive); + $self->{parameters}{"whole_words_only"} = $self->SUPER::serializeBoolean($wholeWordsOnly); + + $self->{files} = {}; + $self->{parameters}{"url"} = $url; + + $self->{headers}{"Accept"} = "text/json"; + + my $JobID = $self->SUPER::startAsyncJobMultipartFormData() or die "An error occurred launching the asynchronous call."; + + my $noPings = 0; + + do + { + $noPings++; + + # sleep for a few seconds before next ping + sleep($self->{AsyncCallsPingInterval}); + + my $asyncJobClient = new SelectPdf::AsyncJobClient($self->{parameters}{"key"}, $JobID); + $asyncJobClient->setApiEndpoint($self->{apiAsyncEndpoint}); + + my $result = $asyncJobClient->getResult(); + + if ($asyncJobClient->finished) + { + $self->{numberOfPages} = $asyncJobClient->getNumberOfPages(); + + if ($result) { + return decode_json($result); + } + else { + return []; + } + } + + } while ($noPings <= $self->{AsyncCallsMaxPings}); + + die "Asynchronous call did not finish in expected timeframe."; + +} + + +=head2 setCustomParameter( $parameterName, $parameterValue ) + +Set a custom parameter. Do not use this method unless advised by SelectPdf. + +Parameters: + +- $parameterName: Parameter name. + +- $parameterValue: Parameter value. + +Returns: + +- Reference to the current object. +=cut +sub setCustomParameter($,$) { + my($self, $parameterName, $parameterValue) = @_; + + $self->{parameters}{$parameterName} = $parameterValue; + return $self; +} + +=head2 setTimeout( $timeout ) + +Set the maximum amount of time (in seconds) for this job. +The default value is 30 seconds. +Use a larger value (up to 120 seconds allowed) for pages that take a long time to load. + +Parameters: + +- $timeout: Timeout in seconds. + +Returns: + +- Reference to the current object. +=cut +sub setTimeout($) { + my($self, $timeout) = @_; + + $self->{parameters}{"timeout"} = $timeout; + return $self; +} + +=head2 setStartPage( $startPage ) + +Set Start Page number. Default value is 1 (first page of the document). + +Parameters: + +- $startPage: Start page number (1-based). + +Returns: + +- Reference to the current object. +=cut +sub setStartPage($) { + my($self, $startPage) = @_; + + $self->{parameters}{"start_page"} = $startPage; + return $self; +} + +=head2 setEndPage( $endPage ) + +Set End Page number. Default value is 0 (process till the last page of the document). + +Parameters: + +- $endPage: End page number (1-based). + +Returns: + +- Reference to the current object. +=cut +sub setEndPage($) { + my($self, $endPage) = @_; + + $self->{parameters}{"end_page"} = $endPage; + return $self; +} + +=head2 setUserPassword( $userPassword ) + +Set PDF user password. + +Parameters: + +- $userPassword: PDF user password. + +Returns: + +- Reference to the current object. +=cut +sub setUserPassword($) { + my($self, $userPassword) = @_; + + $self->{parameters}{"user_password"} = $userPassword; + return $self; +} + + +=head2 setTextLayout( $textLayout ) + +Set the text layout. The default value is 0 (Original). + +Parameters: + +- $textLayout: The text layout. Possible values: 0 (Original), 1 (Reading). + +Returns: + +- Reference to the current object. +=cut +sub setTextLayout($) { + my($self, $textLayout) = @_; + + if ($textLayout ne 0 and $textLayout ne 1) { + die ("Allowed values for Text Layout: 0 (Original), 1 (Reading)."); + } + + $self->{parameters}{"text_layout"} = $textLayout; + return $self; +} + +=head2 setOutputFormat( $outputFormat ) + +Set the output format. The default value is 0 (Text). + +Parameters: + +- $outputFormat: The output format. Possible values: 0 (Text), 1 (Html). + +Returns: + +- Reference to the current object. +=cut +sub setOutputFormat($) { + my($self, $outputFormat) = @_; + + if ($outputFormat ne 0 and $outputFormat ne 1) { + die ("Allowed values for Output Format: 0 (Text), 1 (Html)."); + } + + $self->{parameters}{"output_format"} = $outputFormat; + return $self; +} + + +1; \ No newline at end of file diff --git a/lib/SelectPdf/UsageClient.pm b/lib/SelectPdf/UsageClient.pm new file mode 100644 index 0000000..6b68bf7 --- /dev/null +++ b/lib/SelectPdf/UsageClient.pm @@ -0,0 +1,72 @@ +package SelectPdf::UsageClient; + +use JSON; +use SelectPdf::ApiClient; +use strict; +our @ISA = qw(SelectPdf::ApiClient); + +=head1 NAME + +SelectPdf::UsageClient - Get usage details for SelectPdf Online API. + +=head1 METHODS + +=head2 new( $apiKey ) + +Construct the Usage client. + + my $client = SelectPdf::UsageClient->new($apiKey); + +Parameters: + +- $apiKey API Key. +=cut +sub new { + my $type = shift; + my $self = $type->SUPER::new; + + # API endpoint + $self->{apiEndpoint} = "https://selectpdf.com/api2/usage/"; + + $self->{parameters}{"key"} = shift; + + bless $self, $type; + return $self; +} + +=head2 getUsage( $getHistory ) + +Get API usage information with history, if specified. + + my $client = SelectPdf::UsageClient->new($apiKey); + $usageInfo = $client->getUsage($getHistory); + print("Conversions remained this month: ". $usageInfo->{"available"}); + +Parameters: + +- $getHistory Get history or not. + +Returns: + +- Usage information. +=cut +sub getUsage($) { + my($self, $getHistory) = @_; + + $self->{headers}{"Accept"} = "text/json"; + + if ($getHistory) { + $self->{parameters}{"get_history"} = "True"; + } + + my $result = $self->SUPER::performPost(); + + if ($result) { + return decode_json($result); + } + else { + return {}; + } +} + +1; \ No newline at end of file diff --git a/lib/SelectPdf/WebElementsClient.pm b/lib/SelectPdf/WebElementsClient.pm new file mode 100644 index 0000000..ab66e87 --- /dev/null +++ b/lib/SelectPdf/WebElementsClient.pm @@ -0,0 +1,66 @@ +package SelectPdf::WebElementsClient; + +use JSON; +use SelectPdf::ApiClient; +use strict; +our @ISA = qw(SelectPdf::ApiClient); + +=head1 NAME + +SelectPdf::WebElementsClient - Get the locations of certain web elements. +This is retrieved if pdf_web_elements_selectors parameter was set during the initial conversion call and elements were found to match the selectors. + +=head1 METHODS + +=head2 new( $apiKey, $jobId ) + +Construct the web elements client. + + my $client = SelectPdf::WebElementsClient->new($apiKey, $jobId); + +Parameters: + +- $apiKey API Key. +- $jobId Job ID. +=cut +sub new { + my $type = shift; + my $self = $type->SUPER::new; + + # API endpoint + $self->{apiEndpoint} = "https://selectpdf.com/api2/webelements/"; + + $self->{parameters}{"key"} = shift; + $self->{parameters}{"job_id"} = shift; + + bless $self, $type; + return $self; +} + +=head2 getWebElements + +Get the locations of certain web elements. This is retrieved if pdf_web_elements_selectors parameter is set and elements were found to match the selectors. + + my $client = SelectPdf::WebElementsClient->new($apiKey, $jobId); + $elements = $client->getWebElements(); + +Returns: + +- List of web elements locations. +=cut +sub getWebElements { + my($self) = @_; + + $self->{headers}{"Accept"} = "text/json"; + + my $result = $self->SUPER::performPost(); + + if ($result) { + return decode_json($result); + } + else { + return []; + } +} + +1; \ No newline at end of file diff --git a/samples/html-to-pdf-headers-and-footers.pl b/samples/html-to-pdf-headers-and-footers.pl new file mode 100644 index 0000000..c4c8781 --- /dev/null +++ b/samples/html-to-pdf-headers-and-footers.pl @@ -0,0 +1,66 @@ +local $| = 1; + +use strict; +use JSON; +use SelectPdf; + +print "This is SelectPdf-$SelectPdf::VERSION.\n"; + +my $url = "https://selectpdf.com/"; +my $local_file = "Test.pdf"; +my $apiKey = "Your API key here"; + +eval { + my $client = new SelectPdf::HtmlToPdfClient($apiKey); + + # set parameters - see full list at https://selectpdf.com/html-to-pdf-api/ + $client + ->setMargins(0) # PDF page margins + ->setPageBreaksEnhancedAlgorithm('True') # enhanced page break algorithm + + # header properties + ->setShowHeader('True') # display header + #->setHeaderHeight(50) # header height + #->setHeaderUrl($url) # header url + ->setHeaderHtml("This is the HEADER!!!!") # header html + + # footer properties + ->setShowFooter('True') # display footer + #->setFooterHeight(60) # footer height + #->setFooterUrl($url) # footer url + ->setFooterHtml("This is the Footer!!!!") # footer html + + # footer page numbers + ->setShowPageNumbers('True') # show page numbers in footer + ->setPageNumbersTemplate('{page_number} / {total_pages}') # page numbers template + ->setPageNumbersFontName('Verdana') # page numbers font name + ->setPageNumbersFontSize(12) # page numbers font size + ->setPageNumbersAlignment(2) # page numbers alignment 2 = Center + ; + + print "Starting conversion ...\n"; + + # convert url to file + $client->convertUrlToFile($url, $local_file); + + # convert url to memory + # my $pdf = $client->convertUrl($url); + + # convert html string to file + # $client->convertHtmlStringToFile("This is some html.", $local_file); + + # convert html string to memory + # my $pdf = $client->convertHtmlString("This is some html."); + + print "Finished! Number of pages: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); +}; + +if ($@) { + print "An error occurred: $@\n"; +} \ No newline at end of file diff --git a/samples/html-to-pdf-main.pl b/samples/html-to-pdf-main.pl new file mode 100644 index 0000000..5c150d5 --- /dev/null +++ b/samples/html-to-pdf-main.pl @@ -0,0 +1,71 @@ +local $| = 1; + +use strict; +use JSON; +use SelectPdf; + +print "This is SelectPdf-$SelectPdf::VERSION.\n"; + +my $url = "https://selectpdf.com/"; +my $local_file = "Test.pdf"; +my $apiKey = "Your API key here"; + +eval { + my $client = new SelectPdf::HtmlToPdfClient($apiKey); + + # set parameters - see full list at https://selectpdf.com/html-to-pdf-api/ + $client + # main properties + + ->setPageSize("A4") # PDF page size + ->setPageOrientation("Portrait") # PDF page orientation + ->setMargins(0) # PDF page margins + ->setRenderingEngine('WebKit') # rendering engine + ->setConversionDelay(1) # conversion delay + ->setNavigationTimeout(30) # navigation timeout + ->setShowPageNumbers('False') # page numbers + ->setPageBreaksEnhancedAlgorithm('True') # enhanced page break algorithm + + # additional properties + + #->setUseCssPrint('True') # enable CSS media print + #->setDisableJavascript('True') # disable javascript + #->setDisableInternalLinks('True') # disable internal links + #->setDisableExternalLinks('True') # disable external links + #->setKeepImagesTogether('True') # keep images together + #->setScaleImages('True') # scale images to create smaller pdfs + #->setSinglePagePdf('True') # generate a single page PDF + #->setUserPassword('password') # secure the PDF with a password + + # generate automatic bookmarks + + #->setPdfBookmarksSelectors("H1, H2") # create outlines (bookmarks) for the specified elements + #->setViewerPageMode(1) # 1 (Use Outlines) - display outlines (bookmarks) in viewer + ; + + print "Starting conversion ...\n"; + + # convert url to file + $client->convertUrlToFile($url, $local_file); + + # convert url to memory + # my $pdf = $client->convertUrl($url); + + # convert html string to file + # $client->convertHtmlStringToFile("This is some html.", $local_file); + + # convert html string to memory + # my $pdf = $client->convertHtmlString("This is some html."); + + print "Finished! Number of pages: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); +}; + +if ($@) { + print "An error occurred: $@\n"; +} \ No newline at end of file diff --git a/samples/pdf-merge.pl b/samples/pdf-merge.pl new file mode 100644 index 0000000..5ff0000 --- /dev/null +++ b/samples/pdf-merge.pl @@ -0,0 +1,46 @@ +local $| = 1; + +use strict; +use JSON; +use SelectPdf; + +print "This is SelectPdf-$SelectPdf::VERSION\n"; + +my $test_url = "https://selectpdf.com/demo/files/selectpdf.pdf"; +my $test_pdf = "Input.pdf"; +my $local_file = "Result.pdf"; +my $apiKey = "Your API key here"; + +eval { + my $client = new SelectPdf::PdfMergeClient($apiKey); + + # set parameters - see full list at https://selectpdf.com/pdf-merge-api/ + $client + # specify the pdf files that will be merged (order will be preserved in the final pdf) + + ->addFile($test_pdf) # add PDF from local file + ->addUrlFile($test_url) # add PDF From public url + #->addFileWithPassword($test_pdf, "pdf_password") # add PDF (that requires a password) from local file + #->addUrlFileWithPassword($test_url, "pdf_password") # add PDF (that requires a password) from public url + ; + + print "Starting pdf merge ...\n"; + + # merge pdfs to local file + $client->saveToFile($local_file); + + # merge pdfs to memory + # my $pdf = $client->save(); + + print "Finished! Number of pages: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(0); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); +}; + +if ($@) { + print "An error occurred: $@\n"; +} \ No newline at end of file diff --git a/samples/pdf-to-text.pl b/samples/pdf-to-text.pl new file mode 100644 index 0000000..bc6c67d --- /dev/null +++ b/samples/pdf-to-text.pl @@ -0,0 +1,51 @@ +local $| = 1; + +use strict; +use JSON; +use SelectPdf; + +print "This is SelectPdf-$SelectPdf::VERSION.\n"; + +my $test_url = "https://selectpdf.com/demo/files/selectpdf.pdf"; +my $test_pdf = "Input.pdf"; +my $local_file = "Test.txt"; +my $apiKey = "Your API key here"; + +eval { + my $client = new SelectPdf::PdfToTextClient($apiKey); + + print "Starting pdf to text ...\n"; + + # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/ + $client + ->setStartPage(1) # start page (processing starts from here) + ->setEndPage(0) # end page (set 0 to process file til the end) + ->setOutputFormat(0) # set output format - 0 (Text), 1 (Html) + ; + + # convert local pdf to local text file + $client->getTextFromFileToFile($test_pdf, $local_file); + + # extract text from local pdf to memory + # my $text = $client->getTextFromFile($test_pdf); + # print $text; + + # convert pdf from public url to local text file + # $client->getTextFromUrlToFile($test_url, $local_file); + + # extract text from pdf from public url to memory + # my $text = $client->getTextFromUrl($test_url); + # print $text; + + print "Finished! Number of pages processed: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(0); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); +}; + +if ($@) { + print "An error occurred: $@\n"; +} \ No newline at end of file diff --git a/samples/search-pdf.pl b/samples/search-pdf.pl new file mode 100644 index 0000000..c20c1a3 --- /dev/null +++ b/samples/search-pdf.pl @@ -0,0 +1,46 @@ +local $| = 1; + +use strict; +use JSON; +use SelectPdf; + +print "This is SelectPdf-$SelectPdf::VERSION.\n"; + +my $test_url = "https://selectpdf.com/demo/files/selectpdf.pdf"; +my $test_pdf = "Input.pdf"; +my $apiKey = "Your API key here"; + +eval { + my $client = new SelectPdf::PdfToTextClient($apiKey); + + print "Starting search pdf ...\n"; + + # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/ + $client + ->setStartPage(1) # start page (processing starts from here) + ->setEndPage(0) # end page (set 0 to process file til the end) + ->setOutputFormat(0) # set output format - 0 (Text), 1 (Html) + ; + + # search local pdf + my $results = $client->searchFile($test_pdf, "pdf", "True", "True"); + + # search pdf from public url + # my $results = $client->searchUrl($test_url, "pdf", "True", "True"); + + my $count = keys @{$results}; + print("Number of search results: " . $count . "\n"); + print("Results: " . encode_json($results) . "\n"); + + print "Finished! Number of pages processed: " . $client->getNumberOfPages() . ".\n"; + + # get API usage + my $usageClient = new SelectPdf::UsageClient($apiKey); + my $usage = $usageClient->getUsage(0); + print("Usage: " . encode_json($usage) . "\n"); + print("Conversions remained this month: ". $usage->{"available"}); +}; + +if ($@) { + print "An error occurred: $@\n"; +} \ No newline at end of file diff --git a/t/SelectPdf.t b/t/SelectPdf.t new file mode 100644 index 0000000..4c207e4 --- /dev/null +++ b/t/SelectPdf.t @@ -0,0 +1,18 @@ +# Before 'make install' is performed this script should be runnable with +# 'make test'. After 'make install' it should work as 'perl SelectPdf.t' + +######################### + +# change 'tests => 1' to 'tests => last_test_to_print'; + +use strict; +use warnings; + +use Test::More tests => 1; +BEGIN { use_ok('SelectPdf::HtmlToPdfClient') }; + +######################### + +# Insert your test code below, the Test::More module is use()ed here so read +# its man page ( perldoc Test::More ) for help writing this test script. +