Sampanna Rimal 9cd05ef3cb commitall
2024-07-10 18:28:19 +05:45

2235 lines
79 KiB
PHP
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
/*******************************************************************************
* FPDM *
* *
*@file fpdm.php *
*@name A free PDF form filling tool *
*@package fpdftk *
*@version 2.9 *
*@date 2017-05-11 *
*@author 0livier *
*@todo in the importance order, natively by fpdm *
* -stream inline support (content change,repack,offset/size calculations) *
* -pdf inline protection *
* -flatten support *
* -extends filling to another form fields types (checkboxes,combos..) *
*@note *
* V2.9 (11.05.2017) fixed an issue with some PDFs *
* V2.8 (31.12.2011) added UTF-8 support *
* V2.7 (29.12.2011) compatibility with PDFs generated by pdftk *
* V2.6 (25.12.2010) OpenOffice 3 compatibility issues for Florian *
* see tracking issue here: http://www.fpdf.org/?go=forum&i=53697&t=53697 *
* V2.5 (06.12.2010) pdftk support for flatten mode and more...special *
* christmas release to the fpdf fanclub even if the red guy is busy *
* V2.4 (01.12.2010) Hack for malformed stream definitions, new parsing and *
* stream core with advanced verbose output. Fix() bonus for corrupted pdfs. *
* V2.3 (28.11.2010) stream type was lost when /length defined after /Filter *
* V2.2 (27.11.2010) Stream filter improved:decode now handles multi filters! * *
* V2.1 (25.11.2010) Only filter support for streams, trailer detection was *
* too restrictive. fixes FDF error occuring when empty array data is given. *
* V2.0 (05.11.2010) Load support for inline text fields datas or FDF content *
* V1.1 (04.11.2010) Works now under php4 for backward compat. *
* V1.0 (03.11.2010) First working release *
*******************************************************************************/
global $FPDM_FILTERS, $FPDM_REGEXPS; //needs explicit global scope, otherwise autoloading will be incomplete.
$FPDM_FILTERS=array(); //holds all supported filters
$FPDM_REGEXPS= array(
//FIX: parse checkbox definition
"/AS"=>"/\/AS\s+\/(\w+)$/",
"name"=>"/\/(\w+)/",
// "/AP_D_SingleLine"=>"/\/D\s+\/(\w+)\s+\d+\s+\d+\s+R\s+\/(\w+)$/",
//ENDFIX
"/Type"=>"/\/Type\s+\/(\w+)$/",
"/Subtype" =>"/^\/Subtype\s+\/(\w+)$/"
);
//Major stream filters come from FPDI's stuff but I've added some :)
if (!defined('FPDM_DIRECT')) {
$FPDM_FILTERS = array("LZWDecode", "ASCIIHexDecode", "ASCII85Decode", "FlateDecode", "Standard" );
}
// require_once("filters/FilterASCIIHex.php");
// require_once("filters/FilterASCII85.php");
// require_once("filters/FilterFlate.php");
// require_once("filters/FilterLZW.php");
// require_once("filters/FilterStandard.php");
$__tmp = version_compare(phpversion(), "5") == -1 ? array('FPDM') : array('FPDM', false);
if (!call_user_func_array('class_exists', $__tmp)) {
define('FPDM_VERSION',2.9);
define('FPDM_INVALID',0);
define('FPDM_STATIC',1);
define('FPDM_COMMON',2);
define('FPDM_VERBOSE',3);
define('FPDM_CACHE',dirname(__FILE__).'/export/cache/'); //cache directory for fdf temporary files needed by pdftk.
define('FPDM_PASSWORD_MAX_LEN',15); //Security to prevent shell overflow.
class FPDM {
//@@@@@@@@@
var $useCheckboxParser = false; //boolean: allows activation of custom checkbox parser (not available in original FPDM source)
var $pdf_source = ''; //string: full pathname to the input pdf , a form file
var $fdf_source = ''; //string: full pathname to the input fdf , a form data file
var $pdf_output = ''; //string: full pathname to the resulting filled pdf
var $n = 0;
var $pdf_entries = array(); //array: Holds the content of the pdf file as array
var $fdf_content = ''; //string: holds the content of the fdf file
var $fdf_parse_needed = false;//boolean: false will use $fields data else extract data from fdf content
var $value_entries = array(); //array: a map of values to faliclitate access and changes
var $positions = array(); //array, stores what object id is at a given position n ($positions[n]=<obj_id>)
var $offsets = array(); //array of offsets for objects, index is the object's id, starting at 1
var $pointer = 0; //integer, Current line position in the pdf file during the parsing
var $shifts = array(); //array, Shifts of objects in the order positions they appear in the pdf, starting at 0.
var $shift = 0; //integer, Global shift file size due to object values size changes
var $streams = ''; //Holds streams configuration found during parsing
var $streams_filter = ''; //Regexp to decode filter streams
var $safe_mode = false; //boolean, if set, ignore previous offsets do no calculations for the new xref table, seek pos directly in file
var $check_mode = false; //boolean, Use this to track offset calculations errors in corrupteds pdfs files for sample
var $halt_mode = false; //if true, stops when offset error is encountered
var $info = array(); //array, holds the info properties
var $fields = array(); //array that holds fields-Data parsed from FDF
var $verbose = false; //boolean , a debug flag to decide whether or not to show internal process
var $verbose_level = 1; //integer default is 1 and if greater than 3, shows internal parsing as well
var $support = ''; //string set to 'native' for fpdm or 'pdftk' for pdf toolkit
var $flatten_mode = false; //if true, flatten field data as text and remove form fields (NOT YET SUPPORTED BY FPDM)
var $compress_mode = false; //boolean , pdftk feature only to compress streams
var $uncompress_mode = false; //boolean pdftk feature only to uncompress streams
var $security = array(); //Array holding securtity settings
//(password owner nad user, encrypt (set to 40 or 128 or 0), allow <permissions>] see pdfk help
var $needAppearancesTrue = false; //boolean, indicates if /NeedAppearances is already set to true
var $isUTF8 = false; //boolean (true for UTF-8, false for ISO-8859-1)
/**
* Constructor
*
*@example Common use:
*@param string $pdf_source Source-Filename
*@param string $fdf_source Source-Filename
*@param boolean $verbose , optional false per default
*/
function __construct() {
//==============
$args=func_get_args();
$num_args=func_num_args();
$FDF_FILE=($num_args>=FPDM_COMMON);
$VERBOSE_FLAG=($num_args>=FPDM_VERBOSE);
$verbose=false;
//We are not joking here, let's have a polymorphic constructor!
switch($num_args) {
case FPDM_INVALID:
$this->Error("Invalid instantiation of FPDM, requires at least one param");
break;
case FPDM_STATIC:
if($args[0] =='[_STATIC_]') break; //static use, caller is anonymous function defined in _set_field_value
//else this is the pdf_source then, fdf content is loaded using Load() function
default:
case FPDM_VERBOSE: //Use the verbose value provided
if($VERBOSE_FLAG) $verbose=$args[2];
case FPDM_COMMON: //Common use
$this->pdf_source = $args[0];//Blank pdf form
if($FDF_FILE) {
$this->fdf_source = $args[1];//Holds the data of the fields to fill the form
$this->fdf_parse_needed=true;
}
//calculation and map
$this->offsets=array();
$this->pointer=0;
$this->shift=0;
$this->shifts=array();
$this->n=0;
//Stream filters
$filters=$this->getFilters("|");
$this->streams_filter="/(\/($filters))+/";
//$this->dumpContent($this->streams_filter);
$this->info=array();
//Debug modes
$this->verbose=$verbose;
$this->verbose_level=($verbose&&is_int($verbose)) ? $verbose : 1;
$this->safe_mode=false;
$this->check_mode=false; //script will takes much more time if you do so
$this->halt_mode=true;
$this->support='native'; //may ne overriden
$this->security=array('password'=>array('owner'=>null,'user'=>null),'encrypt'=>0,'allow'=>array());
//echo "<br>filesize:".filesize($this->pdf_source);
$this->load_file('PDF');
if($FDF_FILE) $this->load_file('FDF');
}
}
/**
*Loads a form data to be merged
*
*@note this overrides fdf input source if it was previously defined
*@access public
*@param string|array $fdf_data a FDF file content or $pdf_data an array containing the values for the fields to change
**/
function Load($data,$isUTF8=false) {
//------------------------
$this->isUTF8 = $isUTF8;
$this->load_file('FDF',$data);
}
/**
*Loads a file according to its type
*
*@access private
*@param string type 'PDF' or 'FDF'
*@param String|array content the data content of FDF files only or directly the fields values as array
**/
function load_file($type,$content=NULL) {
//------------------------------------
switch($type) {
case "PDF" :
if($content)
$this->Error("load_file do not accept PDF content, only FDF content sorry");
else
$this->pdf_entries = $this->getEntries($this->pdf_source,'PDF');
break;
case "FDF" :
if(!is_null($content)) {
if(is_array($content)) {
$this->fields=$content;
$this->fdf_parse_needed=false;
//$this->dumpEntries($content,"PDF fields content");
} else if(is_string($content)){ //String
$this->fdf_content = $content; //TODO: check content
$this->fdf_parse_needed=true;
} else
$this->Error('Invalid content type for this FDF file!');
} else {
$this->fdf_content = $this->getContent($this->fdf_source,'FDF');
$this->fdf_parse_needed=true;
}
break;
default:
$this->Error("Invalid file type $type");
}
}
/**
*Set a mode and play with your power debug toys
*
*@access public
*@note for big boys only coz it may hurt
*@param string $mode a choice between 'safe','check','verbose','halt' or 'verbose_level'
*@param string|int $value an integer for verbose_level
**/
function set_modes($mode,$value) {
//-------------------------------
switch($mode) {
case 'safe':
$this->safe_mode=$value;
break;
case 'check':
$this->check_mode=$value;
break;
case 'flatten':
$this->flatten_mode=$value;
break;
case 'compress_mode':
$this->compress_mode=$value;
if($value) $this->uncompress_mode=false;
break;
case 'uncompress_mode':
$this->uncompress_mode=$value;
if($value) $this->compress_mode=false;
break;
case 'verbose':
$this->verbose=$value;
break;
case 'halt':
$this->halt_mode=$value;
break;
case 'verbose_level':
$this->verbose_level=$value;
break;
default:
$this->Error("set_modes error, Invalid mode '<i>$mode</i>'");
}
}
/**
*Retrieves informations of the pdf
*
*@access public
*@note To track PDF versions and so on...
*@param Boolean output
**/
function Info($asArray=false) {
//----------------------
$info=$this->info;
$info["Reader"]=($this->support == "native") ? 'FPDF-Merge '.FPDM_VERSION: $this->support;
$info["Fields"]=$this->fields;
$info["Modes"]=array(
'safe'=>($this->safe_mode)? 'Yes' :'No',
'check'=>($this->check_mode) ? 'Yes': 'No',
'flatten'=>($this->flatten_mode) ? 'Yes': 'No',
'compress_mode'=>($this->compress_mode) ? 'Yes': 'No',
'uncompress_mode'=>($this->uncompress_mode) ? 'Yes': 'No',
'verbose'=>$this->verbose,
'verbose_level'=>$this->verbose_level,
'halt'=>$this->halt_mode
);
if($asArray) {
return $info;
} else {
$this->dumpEntries($info);
}
}
/**
*Changes the support
*
*@access public
*@internal fixes xref table offsets
*@note special playskool toy for Christmas dedicated to my impatient fanclub (Grant, Kris, nejck,...)
*@param String support Allow to use external support that has more advanced features (ie 'pdftk')
**/
function Plays($cool) {
//----------------------
if($cool=='pdftk') //Use a coolest support as ..
$this->support='pdftk';//..Per DeFinition This is Kool!
else
$this->support='native';
}
/**
*Fixes a corrupted PDF file
*
*@access public
*@internal fixes xref table offsets
*@note Real work is not made here but by Merge that should be launched after to complete the work
**/
function Fix() {
//---------------
if(!$this->fields) $this->fields=array(); //Default: No field data
$this->set_modes('check',true); //Compare xref table offsets with objects offsets in the pdf file
$this->set_modes('halt',false); //Do no stop on errors so fix is applied during merge process
}
//######## pdftk's output configuration #######
/**
*Decides to use the compress filter to restore compression.
*@note This is only useful when you want to repack PDF that was previously edited in a text editor like vim or emacs.
**/
function Compress() {
//-------------------
$this->set_modes('compress',true);
$this->support="pdftk";
}
/**
*Decides to remove PDF page stream compression by applying the uncompress filter.
*@note This is only useful when you want to edit PDF code in a text editor like vim or emacs.
**/
function Uncompress() {
//---------------------
$this->set_modes('uncompress',true);
$this->support="pdftk";
}
/**
*Activates the flatten output to remove form from pdf file keeping field datas.
**/
function Flatten() {
//-----------------
$this->set_modes('flatten',true);
$this->support="pdftk";
}
/***
*Defines a password type
*@param String type , 'owner' or 'user'
**/
function Password($type,$code) {
//------------------------------
switch($type) {
case 'owner':
case 'user':
$this->security["password"]["$type"]=$code;
break;
default:
$this->Error("Unsupported password type ($type), specify 'owner' or 'user' instead.");
}
$this->support="pdftk";
}
/**
*Defines the encrytion to the given bits
*@param integer $bits 0, 40 or 128
**/
function Encrypt($bits) {
//-----------------------
switch($bits) {
case 0:
case 40:
case 128:
$this->security["encrypt"]=$bits;
break;
default:
$this->Error("Unsupported encrypt value of $bits, only 0, 40 and 128 are supported");
}
$this->support="pdftk";
}
/**
*Allow permissions
*
*@param Array permmissions If no arg is given, show help.
* Permissions are applied to the output PDF only if an encryption
* strength is specified or an owner or user password is given. If
* permissions are not specified, they default to 'none,' which
* means all of the following features are disabled.
*
* The permissions section may include one or more of the following
* features:
*
* Printing
* Top Quality Printing
*
* DegradedPrinting
* Lower Quality Printing
*
* ModifyContents
* Also allows Assembly
*
* Assembly
*
* CopyContents
* Also allows ScreenReaders
*
* ScreenReaders
*
* ModifyAnnotations
* Also allows FillIn
*
* FillIn
*
* AllFeatures
* Allows the user to perform all of the above, and top
* quality printing.
**/
function Allow($permissions=null) {
//--------------------------
$perms_help=array(
'Printing'=>'Top Quality Printing',
'DegradedPrinting'=>'Lower Quality Printing',
'ModifyContents' =>'Also allows Assembly',
'Assembly' => '',
'CopyContents' => 'Also allows ScreenReaders',
'ScreenReaders' => '',
'ModifyAnnotations'=>'Also allows FillIn',
'FillIn'=>'',
'AllFeatures'=> "All above"
);
if(is_null($permissions)) {
echo '<br>Info Allow permissions:<br>';
print_r($perms_help);
}else {
if(is_string($permissions)) $permissions=array($permissions);
$perms=array_keys($perms_help);
$this->security["allow"]=array_intersect($permissions, $perms);
$this->support="pdftk";
}
}
//#############################
/**
*Merge FDF file with a PDF file
*
*@access public
*@note files has been provided during the instantiation of this class
*@internal flatten mode is not yet supported
*@param Boolean flatten Optional, false by default, if true will use pdftk (requires a shell) to flatten the pdf form
**/
function Merge($flatten=false) {
//------------------------------
if($flatten) $this->Flatten();
if($this->support == "native") {
if($this->fdf_parse_needed) {
$fields=$this->parseFDFContent();
}else {
$fields=$this->fields;
}
$count_fields=count($fields);
if($this->verbose&&($count_fields==0))
$this->dumpContent("The FDF content has either no field data or parsing may failed","FDF parser: ");
$fields_value_definition_lines=array();
$count_entries=$this->parsePDFEntries($fields_value_definition_lines);
if($count_entries) {
$this->value_entries=$fields_value_definition_lines;
if($this->verbose) {
$this->dumpContent("$count_entries Field entry values found for $count_fields field values to fill","Merge info: ");
}
//==== Alterate work is made here: change values ============
if($count_fields) {
foreach($fields as $name => $value) {
$this->set_field_value("current",$name,$value);
// $value=''; //Strategy applies only to current value, clear others
// $this->set_field_value("default",$name,$value);
// $this->set_field_value("tooltip",$name,$value);
}
}
//===========================================================
//===== Cross refs/size fixes (offsets calculations for objects have been previously be done in set_field_value) =======
//Update cross reference table to match object size changes
$this->fix_xref_table();
//update the pointer to the cross reference table
$this->fix_xref_start();
}else
$this->Error("PDF file is empty!");
} //else pdftk's job is done in Output, not here.
}
/**
*Warns verbose/output conflicts
*
*@access private
*@param string $dest a output destination
**/
function Close($dest) {
//----------------
$this->Error("Output: Verbose mode should be desactivated, it is incompatible with this output mode $dest");
}
/**
*Get current pdf content (without any offset fixes)
*
*@access private
*@param String pdf_file, if given , use the content as buffer (note file will be deleted after!)
*@return string buffer the pdf content
**/
function get_buffer($pdf_file=''){
//---------------------
if($pdf_file == '') {
$buffer=implode("\n",$this->pdf_entries);
}else {
$buffer=$this->getContent($pdf_file,'PDF');
//@unlink($pdf_file);
}
return $buffer;
}
/**
*Output PDF to some destination
*
*@access public
*@note reproduces the fpdf's behavior
*@param string dest the destination
*@param string name the filename
**/
function Output($dest='', $name=''){
//-----------------------------------
$pdf_file='';
if($this->support == "pdftk") {
//As PDFTK can only merge FDF files not data directly,
require_once("lib/url.php"); //we will need a url support because relative urls for pdf inside fdf files are not supported by PDFTK...
require_once("export/fdf/fdf.php"); //...conjointly with my patched/bridged forge_fdf that provides fdf file generation support from array data.
require_once("export/pdf/pdftk.php");//Of course don't forget to bridge to PDFTK!
$tmp_file=false;
$pdf_file=resolve_path(fix_path(dirname(__FILE__).'/'.$this->pdf_source)); //string: full pathname to the input pdf , a form file
if($this->fdf_source) { //FDF file provided
$fdf_file=resolve_path(fix_path(dirname(__FILE__).'/'.$this->fdf_source));
}else {
$pdf_url=getUrlfromDir($pdf_file); //Normaly http scheme not local file
if($this->fdf_parse_needed) { //fdf source was provided
$pdf_data=$this->parseFDFContent();
}else { //fields data was provided as an array, we have to generate the fdf file
$pdf_data=$this->fields;
}
$fdf_file=fix_path(FPDM_CACHE)."fields".rnunid().".fdf";
$tmp_file=true;
$ret=output_fdf($pdf_url,$pdf_data,$fdf_file);
if(!$ret["success"])
$this->Error("Output failed as something goes wrong (Pdf was $pdf_url) <br> during internal FDF generation of file $fdf_file, <br>Reason is given by {$ret['return']}");
}
//Serializes security options (not deeply tested)
$security='';
if(!is_null($this->security["password"]["owner"])) $security.=' owner_pw "'.substr($this->security["password"]["owner"],0,FPDM_PASSWORD_MAX_LEN).'"';
if(!is_null($this->security["password"]["user"])) $security.=' user_pw "'.substr($this->security["password"]["user"],0,FPDM_PASSWORD_MAX_LEN).'"';
if($this->security["encrypt"]!=0) $security.=' encrypt_'.$this->security["encrypt"].'bit';
if(count($this->security["allow"])>0) {
$permissions=$this->security["allow"];
$security.=' allow ';
foreach($permissions as $permission)
$security.=' '.$permission;
}
//Serialize output modes
$output_modes='';
if($this->flatten_mode) $output_modes.=' flatten';
if($this->compress_mode) $output_modes.=' compress';
if($this->uncompress_mode) $output_modes.=' uncompress';
$ret=pdftk($pdf_file,$fdf_file,array("security"=>$security,"output_modes"=>$output_modes));
if($tmp_file) @unlink($fdf_file); //Clear cache
if($ret["success"]) {
$pdf_file=$ret["return"];
}else
$this->Error($ret["return"]);
}
//$this->buffer=$this->get_buffer($pdf_file);
$dest=strtoupper($dest);
if($dest=='')
{
if($name=='')
{
$name='doc.pdf';
$dest='I';
}
else
$dest='F';
}
//Abort to avoid to polluate output
if($this->verbose&&(($dest=='I')||($dest=='D'))) {
$this->Close($dest);
}
switch($dest)
{
case 'I':
//Send to standard output
if(ob_get_length())
$this->Error('Some data has already been output, can\'t send PDF file');
if(php_sapi_name()!='cli')
{
//We send to a browser
header('Content-Type: application/pdf');
if(headers_sent())
$this->Error('Some data has already been output, can\'t send PDF file');
header('Content-Length: '.strlen($this->get_buffer()));
header('Content-Disposition: inline; filename="'.$name.'"');
header('Cache-Control: private, max-age=0, must-revalidate');
header('Pragma: public');
ini_set('zlib.output_compression','0');
}
echo $this->get_buffer();
break;
case 'D':
//Download file
if(ob_get_length())
$this->Error('Some data has already been output, can\'t send PDF file');
header('Content-Type: application/x-download');
if(headers_sent())
$this->Error('Some data has already been output, can\'t send PDF file');
header('Content-Length: '.strlen($this->get_buffer()));
header('Content-Disposition: attachment; filename="'.$name.'"');
header("Expires: Mon, 26 Jul 1997 05:00:00 GMT"); // Date in the past
header("Last-Modified: " . gmdate("D, d M Y H:i:s") . " GMT"); // always modified
header("Cache-Control: no-store, no-cache, must-revalidate, max-age=0"); // HTTP/1.1
header("Cache-Control: post-check=0, pre-check=0", false);
//header("Pragma: "); // HTTP/1.0
header('Cache-Control: private, max-age=0, must-revalidate');
header('Pragma: public,no-cache');
ini_set('zlib.output_compression','0');
echo $this->get_buffer();
break;
case 'F':
//Save to local file
if($this->verbose) $this->dumpContent("Write file $name","Output");
$f=fopen($name,'wb');
if(!$f)
$this->Error('Unable to create output file: '.$name.' (currently opened under Acrobat Reader?)');
fwrite($f,$this->get_buffer(),strlen($this->get_buffer()));
fclose($f);
break;
case 'S':
//Return as a string
return $this->get_buffer();
default:
$this->Error('Incorrect output destination: '.$dest);
}
return '';
}
/**
*Decodes and returns the binary form of a field hexified value
*
*@note static method due to callback..
*@param string value the hexified string
*@return string call the binary string
**/
function pdf_decode_field_value($value) {
//----------------------------------------
$call=$this->static_method_call('_hex2bin',$value);
return $call;
}
/**
*Encodes and returns the headecimal form of a field binary value
*
*@note static method due to callback..
*@param string value the binary string
*@return string call the hexified string
**/
function pdf_encode_field_value($value) {
//---------------------------------------
$value=$this->static_method_call('_bin2hex',$value);
return $value;
}
/**
*Universal Php4/5 static call helper
*
*@param String $method a name of a method belonging to this class
*@return mixed the return value of the called method
**/
function static_method_call($method) {
//---------------------------------------------
$params_call=func_get_args();
array_shift($params_call);
//var_dump($params_call);
return call_user_func_array(array($this,$method),$params_call);
}
/**
*Changes a field value that can be in hex <> or binary form ()
*
*@param $matches the regexp matches of the line that contains the value to change
*@param String $value the new value for the field property
**/
function replace_value($matches,$value) {
//----------------------------------------------
array_shift($matches);
if(($value!='')&&($matches[1]=="<")) //Value must be hexified..
$value=$this->pdf_encode_field_value($value);
$matches[2]=$value;
$value_type_code=$matches[0]; //Should be V, DV or TU
$matches[0]="/".$value_type_code." ";
$value=implode("",$matches);
//echo(htmlentities($value));
return $value;
}
/**
*Core to change the value of a field property, inline.
*
*@access private
*@param int $line the lien where the field property value is defined in the pdf file
*@param string $value the new value to set
*@return int $shift the size change of the field property value
**/
function _set_field_value($line,$value) {
//----------------------------------------
$verbose_set=($this->verbose&&($this->verbose_level>1));
//get the line content
$CurLine =$this->pdf_entries[$line];
$OldLen=strlen($CurLine);
//My PHP4/5 static call hack, only to make the callback $this->replace_value($matches,"$value") possible!
$callback_code='$THIS=new FPDM("[_STATIC_]");return $THIS->replace_value($matches,"'.$value.'");';
$field_regexp='/^\/(\w+)\s?(\<|\()([^\)\>]*)(\)|\>)/';
if(preg_match($field_regexp,$CurLine)) {
//modify it according to the new value $value
$CurLine = preg_replace_callback(
$field_regexp,
create_function('$matches',$callback_code),
$CurLine
);
}else {
if($verbose_set) echo("<br>WARNING:".htmlentities("Can not access to the value: $CurLine using regexp $field_regexp"));
}
$NewLen=strlen($CurLine);
$Shift=$NewLen-$OldLen;
$this->shift=$this->shift+$Shift;
//Saves
$this->pdf_entries[$line]=$CurLine;
return $Shift;
}
function _encode_value($str) {
if($this->isUTF8)
$str="\xFE\xFF".iconv('UTF-8','UTF-16BE',$str);
return $this->_bin2hex($str);
}
function _set_field_value2($line,$value,$append) {
$CurLine=$this->pdf_entries[$line];
$OldLen=strlen($CurLine);
if($append)
{
$CurLine .= ' /V <'.$this->_encode_value($value).'>';
}
else
{
if(preg_match('#/V\s?[<(]([^>)]*)[>)]#', $CurLine, $a, PREG_OFFSET_CAPTURE))
{
$len=strlen($a[1][0]);
$pos1=$a[1][1];
$pos2=$pos1+$len;
$CurLine=substr($CurLine,0,$pos1-1).'<'.$this->_encode_value($value).'>'.substr($CurLine,$pos2+1);
}
else
$this->Error('/V not found');
}
$NewLen=strlen($CurLine);
$Shift=$NewLen-$OldLen;
$this->shift=$this->shift+$Shift;
$this->pdf_entries[$line]=$CurLine;
return $Shift;
}
/**
*Changes the value of a field property, inline.
*
*@param string $type supported values for type are 'default' , 'current' or 'tooltip'
*@param string $name name of the field annotation to change the value
*@param string $value the new value to set
**/
function set_field_value($type,$name,$value) {
//------------------------------------
$verbose_set=($this->verbose&&($this->verbose_level>1));
//Get the line(s) of the misc field values
if(isset($this->value_entries["$name"])) {
$object_id=$this->value_entries["$name"]["infos"]["object"];
if($type=="tooltip") {
$offset_shift=$this->set_field_tooltip($name,$value);
} elseif ($this->useCheckboxParser && isset($this->value_entries["$name"]['infos']['checkbox_state'])) { //FIX: set checkbox value
$offset_shift=$this->set_field_checkbox($name, $value);
//ENDFIX
} else {//if(isset($this->value_entries["$name"]["values"]["$type"])) {
// echo $this->value_entries["$name"]["values"]["$type"];
/* $field_value_line=$this->value_entries["$name"]["values"]["$type"];
$field_value_maxlen=$this->value_entries["$name"]["constraints"]["maxlen"];
if($field_value_maxlen) //Truncates the size if needed
$value=substr($value, 0, $field_value_maxlen);
if($verbose_set) echo "<br>Change $type value of the field $name at line $field_value_line to '<i>$value</i>'";
$offset_shift=$this->_set_field_value($field_value_line,$value);*/
if(isset($this->value_entries[$name]["values"]["current"]))
$offset_shift=$this->_set_field_value2($this->value_entries[$name]["values"]["current"],$value,false);
else
$offset_shift=$this->_set_field_value2($this->value_entries[$name]["infos"]["name_line"],$value,true);
}
// }else
// $this->Error("set_field_value failed as invalid valuetype $type for object $object_id");
//offset size shift will affect the next objects offsets taking into accound the order they appear in the file--
$this->apply_offset_shift_from_object($object_id,$offset_shift);
} else
$this->Error("field $name not found");
}
/**
*Changes the tooltip value of a field property, inline.
*
*@param string $name name of the field annotation to change the value
*@param string $value the new value to set
*@return int offset_shift the size variation
**/
function set_field_tooltip($name,$value) {
//------------------------------------
$offset_shift=0;
$verbose_set=($this->verbose&&($this->verbose_level>1));
//Get the line(s) of the misc field values
if(isset($this->value_entries["$name"])) {
$field_tooltip_line=$this->value_entries["$name"]["infos"]["tooltip"];
if($field_tooltip_line) {
if($verbose_set) echo "<br>Change tooltip of the field $name at line $field_tooltip_line to value [$value]";
$offset_shift=$this->_set_field_value($field_tooltip_line,$value);
}else {
if($verbose_set) echo "<br>Change toolpip value aborted, the field $name has no tooltip definition.";
}
} else
$this->Error("set_field_tooltip failed as the field $name does not exist");
return $offset_shift;
}
//FIX: parse checkbox definition
/**
*Changes the checkbox state.
*
*@param string $name name of the field to change the state
*@param string $value the new state to set
*@return int offset_shift the size variation
**/
public function set_field_checkbox($name, $value)
{
//------------------------------------
$offset_shift=0;
$verbose_set=($this->verbose&&($this->verbose_level>1));
//Get the line(s) of the misc field values
if (isset($this->value_entries["$name"])) {
if (isset($this->value_entries["$name"]["infos"]["checkbox_state_line"])
&& isset($this->value_entries["$name"]["infos"]["checkbox_no"])
&& isset($this->value_entries["$name"]["infos"]["checkbox_yes"])) {
$field_checkbox_line=$this->value_entries["$name"]["infos"]["checkbox_state_line"];
if ($field_checkbox_line) {
if ($verbose_set) {
echo "<br>Change checkbox of the field $name at line $field_checkbox_line to value [$value]";
}
$state = $this->value_entries["$name"]["infos"]["checkbox_no"];
if ($value) {
$state = $this->value_entries["$name"]["infos"]["checkbox_yes"];
}
$CurLine =$this->pdf_entries[$field_checkbox_line];
$OldLen=strlen($CurLine);
$CurLine = '/AS /'.$state;
$NewLen=strlen($CurLine);
$Shift=$NewLen-$OldLen;
$this->shift=$this->shift+$Shift;
//Saves
$this->pdf_entries[$field_checkbox_line]=$CurLine;
return $Shift;
// $offset_shift=$this->_set_field_value($field_checkbox_line, $state);
} else {
if ($verbose_set) {
echo "<br>Change checkbox value aborted, parsed checkbox definition incomplete.";
}
}
} else {
if ($verbose_set) {
echo "<br>Change checkbox value aborted, the field $name has no checkbox definition.";
}
}
} else {
$this->Error("set_field_checkbox failed as the field $name does not exist");
}
return $offset_shift;
}
//ENDFIX
/**
*Dumps the line entries
*
*@note for debug purposes
*@access private
*@param array entries the content to dump
*@param string tag an optional tag to highlight
*@param boolean halt decides to stop or not this script
**/
function dumpEntries($entries,$tag="",$halt=false) {
//------------------------------------------------------------
if($tag) echo "<br><h4>$tag</h4><hr>";
if($entries) {
echo "<pre>";
echo htmlentities(print_r($entries,true));
echo "</pre>";
}
if($halt) exit();
}
/**
*Dumps the string content
*
*@note for debug purposes
*@access private
*@param string content the content to dump
*@param string tag an optional tag to highlight
*@param boolean halt decides to stop or not this script
**/
function dumpContent($content,$tag="",$halt=false) {
//--------------------------------------------------
if($tag) echo "<h4>$tag</h4>";
if($content) {
echo "<pre>";
echo htmlentities($content);
echo "</pre>";
}
if($halt) exit();
}
/**
*Retrieves the content of a file as a string
*
*@access private
*@param string $filename the filename of the file
*@param string $filetype the type of file as info
*@return string $content
**/
function getContent($filename,$filetype) {
//----------------------------------------
//$content = file_get_contents($filename);
$handle=fopen($filename,'rb');
$content = fread($handle, filesize($filename));
fclose($handle);
if (!$content)
$this->Error(sprintf('Cannot open '.$filetype.' file %s !', $filename));
if($filetype=='PDF')
{
$start = substr($content, 0, 2048);
if(strpos($start, '/ObjStm')!==false)
$this->Error('Object streams are not supported');
if(strpos($start, '/Linearized')!==false)
$this->Error('Fast Web View mode is not supported');
$end = substr($content, -512);
if(strpos($end, '/Prev')!==false)
$this->Error('Incremental updates are not supported');
$this->needAppearancesTrue = (strpos($content, '/NeedAppearances true')!==false);
}
/* if($this->verbose) {
$this->dumpContent($content,"$filetype file content read");
}*/
return $content;
}
/**
*Retrieves the content of a file as an array of lines entries
*
*@access private
*@param string $filename the filename of the file
*@param string $filetype the type of file as info
*@return array $entries
**/
function getEntries($filename,$filetype) {
//----------------------------------------
$content=$this->getContent($filename,$filetype);
$entries=explode("\n",$content);
/* if($this->verbose) {
$this->dumpEntries($entries,"$filetype file entries");
}*/
return $entries;
}
/**
*Retrieves a binary string from its hexadecimal representation
*
*@access private
*@note Function was written because PHP has a bin2hex, but not a hex2bin!
*@internal note pack(“C”,hexdec(substr($data,$i,2))) DOES NOT WORK
*@param string $hexString the hexified string
*@return string $bin a binary string
**/
function _hex2bin ($hexString)
{
//echo "<br>_hex2bin($hexString)";
$BinStr = '';
$hexLength=strlen($hexString);
// only hex numbers is allowed
if ($hexLength % 2 != 0 || preg_match("/[^\da-fA-F]/",$hexString)) return FALSE;
//Loop through the input and convert it
for ($i = 0; $i < $hexLength; $i += 2)
$BinStr .= '%'.substr ($hexString, $i, 2);
// Raw url-decode and return the result
return rawurldecode ($BinStr);//chr(hexdec())
}
/**
*Encodes a binary string to its hexadecimal representation
*
*@access private
*@internal dechex(ord($str{$i})); is buggy because for hex value of 0-15 heading 0 is missing! Using sprintf() to get it right.
*@param string $str a binary string
*@return string $hex the hexified string
**/
function _bin2hex($str) {
//----------------------
$hex = "";
$i = 0;
do {
$hex .= sprintf("%02X", ord($str[$i]));
$i++;
} while ($i < strlen($str));
return $hex;
}
/**
* Extracts the map object for the xref table
* @note PDF lines should have been previouly been parsed to make this work
* @return array a map that holds the xrefstart infos and values
*/
function get_xref_table() {
//------------------------
return $this->value_entries['$_XREF_$'];
}
/**
* Extracts the offset of the xref table
* @note PDF lines should have been previouly been parsed to make this work
* @return int the xrefstart value
*/
function get_xref_start() {
//------------------------
return $this->value_entries['$_XREF_$']["infos"]["start"]["pointer"];
}
/**
* Extracts the line where the offset of the xref table is stored
* @note PDF lines should have been previouly been parsed to make this work
* @return int the wished line number
*/
function get_xref_start_line() {
//-------------------------------
return $this->value_entries['$_XREF_$']["infos"]["start"]["line"];
}
/**
* Calculates the offset of the xref table
*
* @return int the wished xrefstart offset value
*/
function get_xref_start_value() {
//-------------------------------
$size_shift=$this->shift;
$xref_start=$this->get_xref_start();
return $xref_start+$size_shift;
}
/**
* Read the offset of the xref table directly from file content
*
* @note content has been previously been defined in $this->get_buffer()
* @param int $object_id an object id, a integer value starting from 1
* @return int the wished xrefstart offset value
*/
function read_xref_start_value() {
//------------------------------
$buffer=$this->get_buffer();
$chunks = preg_split('/\bxref\b/', $buffer, -1, PREG_SPLIT_OFFSET_CAPTURE);
return intval($chunks[1][1])-4; //-4 , relative to end of xref
}
/**
* Calculates the new offset/xref for this object id by applying the offset_shift due to value changes
*
* @note uses internally precalculated $offsets,$positions and $shifts
* @param int $object_id an object id, a integer value starting from 1
* @return int the wished offset
*/
function get_offset_object_value($object_id) {
//--------------------------------------------
//Static is to keep forever...
static $offsets=null;
static $positions=null;
static $shifts=null;
//if(is_null($offsets)) { //...variables content set once. This is the beauty of php :)
//!NOTE: xref table is ordered by object id (position's object is not defined linearly in the pdf !)
$positions=$this->_get_positions_ordered();
//Makes it 0 indexed as object id starts from 1 and positions starts from 0
$offsets=$this->_get_offsets_starting_from_zero();
//Shifts are already 0 indexed, don't change.
$shifts=$this->shifts;
//}
$p=$positions[$object_id];
$offset=$offsets[$p];
$shift=$shifts[$p]; //size shift of the object due to value changes
return $offset+$shift;
}
/**
* Reads the offset of the xref table directly from file content
*
* @note content has been previously been defined in $this->get_buffer()
* @param int $object_id an object id, a integer value starting from 1
* @return int the wished offset
*/
function read_offset_object_value($object_id) {
//------------------------------
$buffer=$this->get_buffer();
$previous_object_footer='';//'endobj' or comment;
$object_header=$previous_object_footer.'\n'.$object_id.' 0 obj';
$chars = preg_split('/'.$object_header.'/', $buffer, -1, PREG_SPLIT_OFFSET_CAPTURE);
$offset=intval($chars[1][1])-strlen($object_header)+strlen($previous_object_footer)+2;
return $offset;
}
/**
* Fix the offset of the xref table
*
*/
function fix_xref_start() {
//-------------------------
$pdf_entries=&$this->pdf_entries;
$verbose_fix=($this->verbose&&($this->verbose_level>1));
$calculate_xrefstart_value=((!$this->safe_mode)||$this->check_mode);
$extract_xrefstart_value_from_file=($this->safe_mode||$this->check_mode);
if($calculate_xrefstart_value) {
$xref_start_value_calculated=$this->get_xref_start_value(); //get computed value from old one
if(!$this->safe_mode) $xref_start_value=$xref_start_value_calculated;
}
if($extract_xrefstart_value_from_file) {
$xref_start_value_safe=$this->read_xref_start_value();//read direct from new file content
if($this->safe_mode) $xref_start_value=$xref_start_value_safe;
}
if($this->check_mode) { //Compared calculated value with position value read direct from file
if($xref_start_value_calculated != $xref_start_value_safe) {
if($verbose_fix) echo "<br>xrefstart's value must be $xref_start_value_safe calculated is $xref_start_value_calculated.Don't worry, FPDFM-merge will fix it for you.<br>";
$xref_start_value=$xref_start_value_safe; //Overrides with the good value
if($this->halt_mode)
$this->Error("Halt on error mode enabled, aborting. Use \$pdf->set_modes('halt',false); to disable this mode and go further fixing corrupted pdf.");
} else {
if($verbose_fix) echo "<br>xrefstart's value for the file is correct and vaults <b>$xref_start_value</b>";
}
}
//updates xrefstart's value
$xref_start_line=$this->get_xref_start_line();
$pdf_entries[$xref_start_line]="$xref_start_value";
}
/**
* Get the offsets table 0 indexed
*
* @return array $offsets
*/
function _get_offsets_starting_from_zero() {
//-------------------------------------------
$offsets=$this->offsets;
return array_values($offsets);
}
/**
* Sorts the position array by key
*
* @return array $positions the ordered positions
*/
function _get_positions_ordered() {
//--------------------------------
$positions=$this->positions;
ksort($positions);
return $positions;
}
/**
* Fix the xref table by rebuilding its offsets entries
*
*/
function fix_xref_table() {
//------------------------
$xref_table=$this->get_xref_table();
$xLen=$xref_table["infos"]["count"];
$pdf_entries=&$this->pdf_entries;
//Do some checks
$offsets=$this->offsets;
//$offsets=array_values($offsets);
$oLen=count($offsets);
if($xLen == $oLen) { //...to rectify xref entries
//jump over len and header, this is the first entry with n
$first_xref_entry_line=$xref_table["infos"]["line"]+3;
//echo "xREF:{$pdf_entries[$first_xref_entry_line]}";
//!NOTE: xref table is ordered by object id (position's object is not defined linearly in the pdf !)
//$positions=$this->positions;
//ksort($positions);
$verbose_fix=($this->verbose&&($this->verbose>1));
$calculate_offset_value=((!$this->safe_mode)||$this->check_mode);
$extract_offset_value_from_file=($this->safe_mode||$this->check_mode);
//Get new file content (ie with values changed)
//$this->get_buffer()=$this->get_buffer();
for($i=0;$i<$xLen;$i++) {
$obj_id=$i+1;
//Try two way to retrieve xref offset value of an object of the given id
if($calculate_offset_value) {
$offset_value_calculated=$this->get_offset_object_value($obj_id);;
if(!$this->safe_mode) $offset_value=$offset_value_calculated;
}
if($extract_offset_value_from_file) {
$offset_value_read=$this->read_offset_object_value($obj_id);
if($this->safe_mode) $offset_value=$offset_value_read;
}
if($this->check_mode) {
if($offset_value_calculated != $offset_value_read) {
if($verbose_fix) echo "<br>Offset for object $obj_id read is <b>$offset_value_read</b>, calculated $offset_value_calculated";
$offset_value=$offset_value_read; //overrides to fix bad values
if($this->halt_mode) $this->Error("<br>Offset for object $obj_id read is <b>$offset_value_read</b>, calculated $offset_value_calculated");
}else {
if($verbose_fix) echo "<br>Offset for object $obj_id is correct and vaults <b>$offset_value</b>";
}
}
$pdf_entries[$first_xref_entry_line+$i]=sprintf('%010d 00000 n ',$offset_value);
}
}else {
//Congratulations you won the corrupted Error Prize
$this->Error("Number of objects ($oLen) differs with number of xrefs ($xLen), something , pdf xref table is corrupted :(");
}
}
/**
* Applies a shift offset $shift from the object whose id is given as param
*
* @note offset shift will affect the next objects taking into accound the order they appear in the file
* @access public
* @param int object_id the id whose size shift has changed
* @param int offset_shift the shift value to use
*/
function apply_offset_shift_from_object($object_id,$offset_shift) {
//---------------------------------------------------------
//get the position of object
$object_pos=$this->positions[$object_id];
//get the next object position
$next_object_pos=$object_pos+1;
//Applies offset change to next following objects
$this->_apply_offset_shift($next_object_pos,$offset_shift);
}
/**
* Applies a shift offset $shift starting at the index $from to the shifts array
*
* @access private
* @param int from the index to start apply the shift
* @param int shift the shift value to use
*/
function _apply_offset_shift($from,$shift) {
//------------------------------------------
$offsets=&$this->shifts;
$params=array($from,$shift);
foreach($offsets as $key=>$value) {
if($key>=$from) {
$offset=$offsets[$key]+$shift;
$offsets[$key]=$offset;
}
}
}
/**
* Decodes a PDF value according to the encoding
*
* @access public
* @param string $encoding the encoding to use for decoding the value, only 'hex' is supported
* @param string value a value to decode
* @return string the value decoded
*/
function decodeValue($encoding,$value) {
//----------------------------------------------
//echo "Decoding $encoding value($value)";
if($encoding=="hex")
$value=$this->pdf_decode_field_value($value);
return $value;
}
/**
*Retrieve the list of supported filters
*
*@note Uses $FPDM_FILTERS array built dynamically
*@param String $sep a separator to merge filter names, default is '|'
*@return String the suported filters
**/
function getFilters($sep="|") {
//---------------------
global $FPDM_FILTERS;
return implode($sep,$FPDM_FILTERS);
}
/**
*Get a filter by name
*
*@param name a string matching one of the supported default filters (marked with +) *
*Without parameters:
*+ ASCIIHexDecode : Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
*+ ASCII85Decode : Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
* RunLengthDecode : Decompresses data encoded using a byte-oriented run-length encoding algorithm, reproducing the original text or binary data (typically monochrome image data, or any data that contains frequent long runs of a single byte value).
* JPXDecode : (PDF 1.5) Decompresses data encoded using the wavelet-based JPEG2000 standard, reproducing the original image data.
*With parameter(s):
*+ LZWDecode : Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
*+ FlateDecode (PDF 1.2): Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
* CCITTFaxDecode : Decompresses data encoded using the CCITT facsimile standard, reproducing the original data (typically monochrome image data at 1 bit per pixel).
* JBIG2Decode (PDF 1.4) :Decompresses data encoded using the JBIG2 standard, reproducing the original monochrome (1 bit per pixel) image data (or an approximation of that data).
* DCTDecode : Decompresses data encoded using a DCT (discrete cosine transform) technique based on the JPEG standard, reproducing image sample data that approximates the original data.
* Crypt (PDF 1.5) :Decrypts data encrypted by a security handler, reproducing the data as it was before encryption.
*@return the wished filter class to access the stream
**/
function getFilter($name) {
//---------------------
switch($name) {
case "LZWDecode":
$filter=new FilterLZW();
break;
case "ASCIIHexDecode":
$filter=new FilterASCIIHex();
break;
case "ASCII85Decode":
$filter=new FilterASCII85();
break;
case "FlateDecode":
$filter=new FilterFlate();
break;
case "Standard": //Raw
$filter=new FilterStandard();
break;
default:
$this->Error("getFilter cannot open stream of object because filter '{$name}' is not supported, sorry.");
}
return $filter;
}
//========= Stream manipulation stuff (alpha, not used by now!) ================
/**
* Detect if the stream has a textual content
*
* @access public
* @param string $stream the string content of the stream
* @return boolean
*/
function is_text_stream($stream_content) {
//--------------------------------------
return preg_match("/(\s*Td\s+[\<\(])([^\>\)]+)([\>\)]\s+Tj)/",$stream_content);
}
/**
* changes the text value of a text stream
*
* @access public
* @param array $stream the stream defintion retrieved during PDF parsing
* @param string $value the new text value
*/
function change_stream_value($stream,$value) {
//--------------------------------------------
$entries=&$this->pdf_entries;
$verbose_parsing=($this->verbose&&($this->verbose_level>3));
if($is_text_stream) {
$OldLen=$stream["length"]["value"];
$lMin=$stream["start"];
$lMax=$stream["end"];
$stream_content=$this->_set_text_value($stream_content,$value);
$NewLen=strlen($stream_content);
for($l=$lMin;$l<=$lMax;$l++) {
if($l==$lMin) {
$entries[$lMin]=$stream_content;
//Update the length
$stream_def_line=$stream["length"]["line"];
$stream_def=$entries[$stream_def_line];
$stream_def=preg_replace("/\/Length\s*(\d+)/",'/Length '.$NewLen,$stream_def);
$entries[$stream_def_line]=$stream_def;
//update the filter type...
$stream_def_line=$stream["filters"]["line"];
$stream_def=$entries[$stream_def_line];
if($verbose_parsing) {
echo "<pre>";
echo htmlentities(print_r($stream_def,true));
echo "</pre>";
}
//...to filter Standard
$stream_def=preg_replace($this->streams_filter,'/Standard ',$stream_def);
$entries[$stream_def_line]=$stream_def;
//Update the shift
$size_shift=$NewLen-$OldLen;
$this->apply_offset_shift_from_object($obj,$size_shift);
}else if($lmin!=$lMax) {
unset($entries[$l]);
}
}
if($verbose_parsing) {
var_dump($stream_content);
}
}
}
/**
* Overrides value between Td and TJ, ommiting <>
*
* @note core method
* @access private
* @param array $stream the stream defintion retrieved during PDF parsing
* @param string $value the new text value
*/
function _set_text_value($stream,$value) {
//---------------------------------------
$chunks=preg_split("/(\s*Td\s+[\<\(])([^\>\)]+)([\>\)]\s+Tj)/",$stream,0,PREG_SPLIT_DELIM_CAPTURE);
$chunks[2]=$value;
$stream=implode($chunks,'');
return $stream;
}
//================================
function _extract_pdf_definition_value($name,$line,&$match) {
//-----------------------------------------------------------
global $FPDM_REGEXPS;
$value=preg_match($FPDM_REGEXPS["$name"],$line,$match);
if(!$value) { //value is concatained with name: /name/value
$value=preg_match("/".preg_quote($name,'/')."\/(\w+)/",$line,$match);
}
return $value;
}
function extract_pdf_definition_value($name,$line,&$match) {
//-----------------------------------------------------------
global $FPDM_REGEXPS;
if(array_key_exists($name,$FPDM_REGEXPS)) {
$value=$this->_extract_pdf_definition_value($name,$line,$match);
}else
$this->Error("extract_pdf_definition_value() does not support definition '$name'");
/*if($name=="/Type") {
if(preg_match("/\//",$line,$foo)) {
var_dump($match);
die("Decoding $name value in line ".htmlentities($line));
}
}*/
return $value;
}
/**
* Parses the lines entries of a PDF
*
* @access public
* @param array $lines the FDF content as an array of lines
* @return integer the number of lines the PDF has
*/
function parsePDFEntries(&$lines){
//--------------------------------
$entries=&$this->pdf_entries;
$CountLines = count($entries);
$Counter=0;
$obj=0; //this is an invalid object id, we use it to know if we are into an object
//FIX: parse checkbox definition
$ap_d_yes='';
$ap_d_no='';
$ap_line=0;
$ap_d_line=0;
$as='';
//ENDFIX
$type='';
$subtype='';
$name='';
$value='';
$default_maxLen=0; //No limit
$default_tooltip_line=0; //Tooltip is optional as it may not be defined
$xref_table=0;
$trailer_table=0;
$n=0; //Position of an object, in the order it is declared in the pdf file
$stream=array();
$id_def=false; //true when parsing/decoding trailer ID
$id_single_line_def=false; //true when the two ID chunks are one the same line
$id_multi_line_def=false; //true or OpenOffice 3.2
$creator='';
$producer='';
$creationDate='';
$verbose_parsing=($this->verbose&&($this->verbose_level>3));
$verbose_decoding=($this->verbose&&($this->verbose_level>4));
if($this->verbose) $this->dumpContent("Starting to parse $CountLines entries","PDF parse");
while ( $Counter < $CountLines ){
$CurLine = $entries[$Counter];
if($verbose_parsing) $this->dumpContent($CurLine,"====Parsing Line($Counter)");
if(!$xref_table) {
//Header of an object?
if(preg_match("/^(\d+) (\d+) obj/",$CurLine,$match)) {
$obj=intval($match[1]);
$this->offsets[$obj]=$this->pointer;
$this->positions[$obj]=$n;
$this->shifts[$n]=0;
$n++;
if($verbose_parsing) $this->dumpContent($CurLine,"====Opening object($obj) at line $Counter");
$object=array();
$object["values"]=array();
$object["constraints"]=array();
$object["constraints"]["maxlen"]=$default_maxLen;
$object["infos"]=array();
$object["infos"]["object"]=intval($obj);
$object["infos"]["tooltip"]=$default_tooltip_line;
} else {
//Object has been opened
if($obj) {
//Footer of an object?
if(preg_match("/endobj/",$CurLine,$match)) {
if($verbose_parsing) $this->dumpContent("","====Closing object($obj) at line $Counter");
//We process fields here, save only Annotations texts that are supported by now
if($subtype=="Widget") {
if($name != '') {
$lines["$name"]=$object;
if($verbose_parsing) $this->dumpContent("$type $subtype (obj id=$obj) is a text annotation of name '$name', saves it.");
}//else
// $this->Error("$type $subtype (obj id=$obj) is a text annotation without a name, this cannot be.");
$values=$object["values"];
//Sanity values checks, watchdog.
// if(!array_key_exists("current",$values)) $this->Error("Cannot find value (/V) for field $name");
// if(!array_key_exists("default",$values)) $this->Error("Cannot find default value (/DV) for field $name");
}else
if($verbose_parsing) $this->dumpContent("Object $type $subtype (obj id=$obj) is not supported");
$object=null;
$obj=0;
//FIX: parse checkbox definition
$ap_d_yes='';
$ap_d_no='';
$ap_line=0;
$ap_d_line=0;
$as='';
//ENDFIX
$type='';
$subtype='';
$name='';
$value='';
$maxLen=0;
} else {
if(preg_match("/\/Length\s*(\d+)/",$CurLine,$match)) {
$stream["length"]=array("line"=>$Counter,"value"=>$match[1]);
$stream["start"]=0;
$stream["end"]=0;
$stream["content"]='';
if($verbose_parsing) $this->dumpContent($CurLine,"->Stream filter length definition(<font color=\"darkorange\">{$match[1]}</font>) for object($obj) at line $Counter");
}
//Handles single filter /Filter /filter_type as well as well as filter chains such as /Filter [/filter_type1 /filter_type2 .../filter_typeN]
if(preg_match_all($this->streams_filter,$CurLine,$matches)) {
//$this->dumpContent($this->streams_filter);
/*$stream_filter=$match[1];
$stream_filter=trim(preg_replace('/(<<|\/Length\s*\d+|>>)/', '', $stream_filter),' ');
$stream_filters=preg_split('/\s*\//',$stream_filter);
array_shift($stream_filters);*/
$stream_filters=$matches[2];
$stream["filters"]=array("line"=>$Counter, "type"=>$stream_filters);
if($verbose_parsing) {
//var_dump($stream_filters);
$stream_filter=implode(" ",$stream_filters);
$this->dumpContent($CurLine,"->Stream filter type definition(<font color=\"darkorange\">$stream_filter</font>) for object($obj) at line $Counter");
}
}
if(array_key_exists("length",$stream)) { //length is mandatory
if(preg_match("/\b(stream|endstream)\b/",$CurLine,$match)) {
if(!array_key_exists("filters",$stream)) {//filter type is optional, if none is given, its standard
$stream["filters"]=array("type"=>array("Standard"));
if($verbose_parsing) {
var_dump($stream);
$this->dumpContent($CurLine,"->No stream filter type definition for object($obj) was found, setting it to '<font color=\"darkorange\">Standard</font>'");
}
}
if($match[1] == "stream") {
if($verbose_parsing) $this->dumpContent($CurLine,"->Opening stream for object($obj) at line $Counter");
$stream["start"]=$Counter+1;
}else {
$stream["end"]=$Counter-1;
$stream["content"]=implode("\n",array_slice($entries,$stream["start"],$stream["end"]-$stream["start"]+1));
$filters=$stream["filters"]["type"];
$f=count($filters);
$stream_content=$stream["content"];
//var_dump($filters);
//$filters_type=$filters["type"];
//now process the stream, ie unpack it if needed
//by decoding in the reverse order the streams have been encoded
//This is done by applying decode using the filters in the order given by /Filter.
foreach($filters as $filter_name) {
$stream_filter=$this->getFilter($filter_name);
$stream_content=$stream_filter->decode($stream_content);
if($verbose_decoding) {
echo "<br><font color=\"blue\"><u>Stream decoded using filter '<font color=\"darkorange\">$filter_name</font>'</u>:[<pre>";
var_dump($stream_content); //todo : manipulate this content and adjust offsets.
echo "</pre>]</font>";
}
}
if($verbose_parsing) {
$this->dumpEntries($stream);
echo "<font color=\"blue\">";
if($this->is_text_stream($stream_content)) {
echo "<u>Stream text unfiltered</u>:[<pre>";
} else {
echo "<u>Stream unfiltered</u>:[<pre>";
}
var_dump($stream_content);
echo "</pre>]</font>";
$this->dumpContent($CurLine,"->Closing stream for object($obj) at line $Counter");
}
$stream=array();
}
}else if($stream["start"]>0){
//stream content line that will be processed on endstream...
}
} else {
/*
Producer<FEFF004F00700065006E004F00660066006900630065002E006F0072006700200033002E0032>
/CreationDate (D:20101225151810+01'00')>>
*/
if(($creator=='')&&preg_match("/\/Creator\<([^\>]+)\>/",$CurLine,$values)) {
$creator=$this->decodeValue("hex",$values[1]);
if($verbose_parsing) echo("Creator read ($creator)");
$this->info["Creator"]=$creator;
}
if(($producer=='')&&preg_match("/\/Producer\<([^\>]+)\>/",$CurLine,$values)) {
$producer=$this->decodeValue("hex",$values[1]);
if($verbose_parsing) echo("Producer read ($producer)");
$this->info["Producer"]=$producer;
}
if(($creationDate=='')&&preg_match("/\/CreationDate\(([^\)]+)\)/",$CurLine,$values)) {
$creationDate=$values[1];
if($verbose_parsing) echo("Creation date read ($creationDate)");
$this->info["CreationDate"]=$creationDate;
}
//=== DEFINITION ====
//preg_match("/^\/Type\s+\/(\w+)$/",$CurLine,$match)
$match=array();
//FIX: parse checkbox definition
if($this->useCheckboxParser && ('' == $ap_d_yes || '' == $ap_d_no || '' == $as)) {
if (!$ap_line && '/AP' == substr($CurLine, 0, 3)) {
if ($verbose_parsing) {
echo("<br>Found AP Line '<i>$Counter</i>'");
}
$ap_line = $Counter;
} elseif (!$ap_d_line && '/D' == substr($CurLine, 0, 2)) {
if ($verbose_parsing) {
echo("<br>Found D Line '<i>$Counter</i>'");
}
$ap_d_line = $Counter;
} elseif (($ap_line==$Counter-4)&&($ap_d_line==$Counter-2)&&($ap_d_yes=='')&&$this->extract_pdf_definition_value("name", $CurLine, $match)) {
$ap_d_yes=$match[1];
if ($verbose_parsing) {
echo("<br>Object's checkbox_yes is '<i>$ap_d_yes</i>'");
}
$object["infos"]["checkbox_yes"]=$ap_d_yes;
} elseif (($ap_line==$Counter-5)&&($ap_d_line==$Counter-3)&&($ap_d_no=='')&&$this->extract_pdf_definition_value("name", $CurLine, $match)) {
$ap_d_no=$match[1];
if ($verbose_parsing) {
echo("<br>Object's checkbox_no is '<i>$ap_d_no</i>'");
}
$object["infos"]["checkbox_no"]=$ap_d_no;
} elseif (($as=='')&&$this->extract_pdf_definition_value("/AS", $CurLine, $match)) {
$as=$match[1];
if ($verbose_parsing) {
echo("<br>Object's AS is '<i>$as</i>'");
}
$object["infos"]["checkbox_state"]=$as;
$object["infos"]["checkbox_state_line"]=$Counter;
}
}
//ENDFIX
if(($type=='')||($subtype=='')||($name=="")) {
if(($type=='')&&$this->extract_pdf_definition_value("/Type",$CurLine,$match)) {
if($match[1]!='Border') {
$type=$match[1];
if($verbose_parsing) echo("<br>Object's type is '<i>$type</i>'");
}
}
if(($subtype=='')&&$this->extract_pdf_definition_value("/Subtype",$CurLine,$match)) {
$subtype=$match[1];
if($verbose_parsing) echo("<br>Object's subType is '<i>$subtype</i>'");
}
if(($name=="")&&preg_match("/^\/T\s?\((.+)\)\s*$/",$this->_protectContentValues($CurLine),$match)) {
$name=$this->_unprotectContentValues($match[1]);
//FIX: convert ASCII object names to utf-8
// don't use utf8_encode($name) yet, it's core function since php 7.2
$name = mb_convert_encoding($name, 'UTF-8', 'ASCII');
//ENDFIX
if($verbose_parsing) echo ("Object's name is '<i>$name</i>'");
$object["infos"]["name"]=$name; //Keep a track
$object["infos"]["name_line"]=$Counter;
//$this->dumpContent(" Name [$name]");
}
}// else {
//=== CONTENT ====
//$this->dumpContent($CurLine);
//=== Now, start the serious work , read DV, V Values and eventually TU
//note if(preg_match_all("/^\/(V|DV)\s+(\<|\))([^\)\>]+)(\)|\>)/",$CurLine,$matches)) {
//do not work as all is encoded on the same line...
if(preg_match("/^\/(V|DV|TU)\s+([\<\(])/",$CurLine,$def)) {
//get an human readable format of value type and encoding
if($def[1] == "TU") {
$valuetype="info";
$object["infos"]["tooltip"]=$Counter;
} else {
$valuetype=($def[1] == "DV") ? "default" : "current";
$object["values"]["$valuetype"]=$Counter; //Set a marker to process lately
}
$encoding=($def[2]=="<") ? "hex" : "plain";
if(preg_match("/^\/(V|DV|TU)\s+(\<|\)|\()([^\)\>]*)(\)|\>\))/",$CurLine,$values)) {
$value=$values[3];
$value=$this->decodeValue($encoding,$value);
}else
$value='';
if($verbose_parsing)
$this->dumpContent("$type $subtype (obj id=$obj) has $encoding $valuetype value [$value] at line $Counter");
}else if(preg_match("/^\/MaxLen\s+(\d+)/",$CurLine,$values)) {
$maxLen=$values[1];
$object["constraints"]["maxlen"]=intval($maxLen);
} else
if($verbose_parsing) echo("WARNING: definition ignored");
if(substr($CurLine,0,7)=='/Fields' && !$this->needAppearancesTrue) {
$CurLine='/NeedAppearances true '.$CurLine;
$entries[$Counter]=$CurLine;
}
//TODO: Fetch the XObject..and change Td <> Tj
/* if(preg_match("/^\/AP/",$CurLine,$values)) {
//die("stop");
$CurLine=''; //clear link to Xobject
$entries[$Counter]=$CurLine;
}*/
// }
}
}
}
//~~~~~Xref table header? ~~~~~~
if(preg_match("/\bxref\b/",$CurLine,$match)) {
$xref_table=1;
if($verbose_parsing) $this->dumpContent("->Starting xref table at line $Counter:[$CurLine]");
$lines['$_XREF_$']=array();
$lines['$_XREF_$']["entries"]=array();
$lines['$_XREF_$']["infos"]=array();
$lines['$_XREF_$']["infos"]["line"]=$Counter;
$lines['$_XREF_$']["infos"]["start"]=array();
$start_pointer=$this->pointer+strpos($CurLine,"xref"); //HACK for PDFcreator 1.0.0
$lines['$_XREF_$']["infos"]["start"]["pointer"]=$start_pointer;
}
}
$obj_header=false;
} else {
//We are inside the xref table
//$this->dumpContent($CurLine,"");
$xref_table=$xref_table+1;
switch($xref_table) {
case 2:
if(preg_match("/^(\d+) (\d+)/",$CurLine,$match)) {
$refs_count=intval($match[2]);//xref_table length+1 (includes this line)
$lines['$_XREF_$']["infos"]["count"]=$refs_count-1;
if($verbose_parsing) $this->dumpContent("Xref table length is $refs_count");
}else
if($verbose_parsing) $this->dumpContent("WARNING: Xref table length ignored!");
break;
case 3:
//Should be 0000000000 65535 f
if($verbose_parsing) $this->dumpContent("this is Xref table header, should be 0000000000 65535 f ");
break;
default:
//xref entries
if($refs_count>0) {
$xref=$xref_table-3;
if($refs_count == 1) {//Last one , due to the shift, is the trailer
if(!preg_match("/^trailer/",$CurLine)) //if not, Houston we have a problem
$this->Error("xref_table length corrupted?: Trailer not found at expected!");
else
$trailer_table=1;
}else {
$lines['$_XREF_$']["entries"][$xref]=$CurLine;
if($verbose_parsing) $this->dumpContent("Xref table entry for object $xref found.");
}
$refs_count--;
} else { //We are inside the trailer
if($trailer_table==1) { //should be <<
if(trim($CurLine) != '') { //HACK: PDFCreator Version 1.0.0 has an extra CR after trailer
if(!preg_match("/<</",$CurLine,$match))
$this->Error("trailer_table corrupted?; missing start delimiter << ");
$trailer_table++;
}
}else if(($trailer_table>0)&&((!is_null($id_def))||preg_match("/^\/(Size|Root|Info|ID|DocChecksum)/",$CurLine,$match))) {
//Value can be extracted using (\d+|\[[^\]]+\])
if(preg_match("/\/Size (\d+)/",$CurLine,$match)) {
//Seems to match with xref entries count..
$size_read=$match[1];
$this->info["size"]=$size_read;
if($verbose_parsing) $this->dumpContent("Size read ($size_read) for pdf found.");
}
if(preg_match("/^\/ID\s*\[\s*<([\da-fA-F]+)/",$CurLine,$match)) {
$oid=$match[1];
$id_def=true;
if($verbose_parsing) $this->dumpContent("ID chunk one ($oid) for pdf found.");
//Determines if the ID definition is one line...
if(preg_match("/\>\s?\</",$CurLine,$match))
$id_single_line_def=true;
}
if($id_def) {//we are inside the ID definition
if($id_single_line_def||$id_multi_line_def) {
//decode the second ID chunk
if(preg_match("/([\da-fA-F]+)>.*$/",$CurLine,$match)) {
$tid=$match[1];
$this->info["ID"]=array($oid,$tid);
if($verbose_parsing) $this->dumpContent("ID chunk two ($tid) for pdf found.");
$id_def=false;
}else
$this->Error("trailer_table corrupted?; ID chunk two can not be decoded ");
} else
$id_multi_line_def=true;
}
if(preg_match("/^\/DocChecksum \/([\da-fA-F]+)/",$CurLine,$match)) {
$checksum=$match[1];
$this->info["checksum"]=$checksum;
if($verbose_parsing) $this->dumpContent("Checksum read ($checksum) for pdf found.");
}
if(preg_match("/>>/",$CurLine,$match))
$trailer_table=-1;//negative value: expects startxref to follow
} else {
switch($trailer_table) {
case -1://startxref
if(!preg_match("/^startxref/",$CurLine,$match))
$this->Error("startxref tag expected, read $CurLine");
break;
case -2://startxref's value
if(preg_match("/^(\d+)/",$CurLine,$match)) {
$lines['$_XREF_$']["infos"]["start"]["value"]=intval($match[1]);
$lines['$_XREF_$']["infos"]["start"]["line"]=$Counter;
}else
$this->Error("startxref value expected, read $CurLine");
break;
default://%%EOF
}
$trailer_table--;
}
}
}
}
$this->pointer=$this->pointer+strlen($CurLine)+1; //+1 due to \n
$Counter++;
}
if($this->verbose) {
$refs=(array_key_exists('$_XREF_$',$lines)) ? $lines['$_XREF_$']["infos"]["count"] : 0;
if($refs) {
$this->dumpContent("PDF parse retrieved $refs refs");
}else {
$this->dumpContent("PDF parse retrieved no refs, seems the xref table is broken or inacessible, this is bad!");
}
}
return count($lines);
}
/**
* Protect ( ) that may be in value or names
*
* @access protected
* @param string $content the FDF content to protect values
* @return string the content protected
*/
function _protectContentValues($content) {
//-------------------------------------------------
$content=str_replace("\\(","$@#",$content);
$content=str_replace("\\)","#@$",$content);
return $content;
}
/**
* Unprotect ( ) that may be in value or names
*
* @access protected
* @param string $content the FDF content with protected values
* @return string the content unprotected
*/
function _unprotectContentValues($content) {
//--------------------------------------------------
$content=str_replace("$@#","\\(",$content);
$content=str_replace("#@$","\\)",$content);
$content=stripcslashes($content);
return $content;
}
/**
* Parses the content of a FDF file and saved extracted field data
*
*@access public
*@return array $fields the data of the fields parsed
*/
function parseFDFContent(){
//-------------------------
$content=$this->fdf_content;
$content=$this->_protectContentValues($content);//protect ( ) that may be in value or names...
if($this->verbose) $this->dumpEntries($content,"FDF parse");
//..so that this regexp can do its job without annoyances
if(preg_match_all("/(T|V)\s*\(([^\)]+)\)\s*\/(T|V)\s*\(([^\)]+)\)/", $content,$matches, PREG_PATTERN_ORDER)) {
$fMax=count($matches[0]);
$fields=array();
for($f=0;$f<$fMax;$f++) {
$value='';
$name='';
if($matches[1][$f]=="V") {
$value=$matches[2][$f];
if($matches[3][$f]=="T")
$name=$matches[4][$f];
else
$this->Error("Field $f ignored , incomplete field declaration, name is expected");
} else {
if($matches[1][$f]=="T") {
$name=$matches[2][$f];
if($matches[3][$f]=="V")
$value=$matches[4][$f];
else
$this->Error("Field $f ignored , incomplete field declaration, value is expected");
} else
$this->Error("Field $f ignored , Invalid field keys ({$matches[0][$f]})");
}
if($name!='') {
if(array_key_exists($name,$fields))
$this->Error("Field $f ignored , already defined");
else {
$name=$this->_unprotectContentValues($name);
$value=$this->_unprotectContentValues($value);
if($this->verbose)
$this->dumpContent("FDF field [$name] has its value set to \"$value\"");
$fields[$name]=$value;
}
} else
$this->Error("Field $f ignored , no name");
}
} else
if($this->verbose) $this->dumpContent($fields,"FDF has no fields",false);
if($this->verbose) $this->dumpContent($fields,"FDF parsed",false);
return $fields;
}
/**
* Close the opened file
*/
function closeFile() {
//--------------------
if (isset($this->f) && is_resource($this->f)) {
fclose($this->f);
unset($this->f);
}
}
/**
* Print Error and die
*
* @param string $msg Error-Message
*/
function Error($msg) {
//--------------------
die('<b>FPDF-Merge Error:</b> '.$msg);
}
}
}
unset($__tmp);