"/\/AS\s+\/(\w+)$/", "name"=>"/\/(\w+)/", // "/AP_D_SingleLine"=>"/\/D\s+\/(\w+)\s+\d+\s+\d+\s+R\s+\/(\w+)$/", //ENDFIX "/Type"=>"/\/Type\s+\/(\w+)$/", "/Subtype" =>"/^\/Subtype\s+\/(\w+)$/" ); //Major stream filters come from FPDI's stuff but I've added some :) if (!defined('FPDM_DIRECT')) { $FPDM_FILTERS = array("LZWDecode", "ASCIIHexDecode", "ASCII85Decode", "FlateDecode", "Standard" ); } // require_once("filters/FilterASCIIHex.php"); // require_once("filters/FilterASCII85.php"); // require_once("filters/FilterFlate.php"); // require_once("filters/FilterLZW.php"); // require_once("filters/FilterStandard.php"); $__tmp = version_compare(phpversion(), "5") == -1 ? array('FPDM') : array('FPDM', false); if (!call_user_func_array('class_exists', $__tmp)) { define('FPDM_VERSION',2.9); define('FPDM_INVALID',0); define('FPDM_STATIC',1); define('FPDM_COMMON',2); define('FPDM_VERBOSE',3); define('FPDM_CACHE',dirname(__FILE__).'/export/cache/'); //cache directory for fdf temporary files needed by pdftk. define('FPDM_PASSWORD_MAX_LEN',15); //Security to prevent shell overflow. class FPDM { //@@@@@@@@@ var $useCheckboxParser = false; //boolean: allows activation of custom checkbox parser (not available in original FPDM source) var $pdf_source = ''; //string: full pathname to the input pdf , a form file var $fdf_source = ''; //string: full pathname to the input fdf , a form data file var $pdf_output = ''; //string: full pathname to the resulting filled pdf var $n = 0; var $pdf_entries = array(); //array: Holds the content of the pdf file as array var $fdf_content = ''; //string: holds the content of the fdf file var $fdf_parse_needed = false;//boolean: false will use $fields data else extract data from fdf content var $value_entries = array(); //array: a map of values to faliclitate access and changes var $positions = array(); //array, stores what object id is at a given position n ($positions[n]=) var $offsets = array(); //array of offsets for objects, index is the object's id, starting at 1 var $pointer = 0; //integer, Current line position in the pdf file during the parsing var $shifts = array(); //array, Shifts of objects in the order positions they appear in the pdf, starting at 0. var $shift = 0; //integer, Global shift file size due to object values size changes var $streams = ''; //Holds streams configuration found during parsing var $streams_filter = ''; //Regexp to decode filter streams var $safe_mode = false; //boolean, if set, ignore previous offsets do no calculations for the new xref table, seek pos directly in file var $check_mode = false; //boolean, Use this to track offset calculations errors in corrupteds pdfs files for sample var $halt_mode = false; //if true, stops when offset error is encountered var $info = array(); //array, holds the info properties var $fields = array(); //array that holds fields-Data parsed from FDF var $verbose = false; //boolean , a debug flag to decide whether or not to show internal process var $verbose_level = 1; //integer default is 1 and if greater than 3, shows internal parsing as well var $support = ''; //string set to 'native' for fpdm or 'pdftk' for pdf toolkit var $flatten_mode = false; //if true, flatten field data as text and remove form fields (NOT YET SUPPORTED BY FPDM) var $compress_mode = false; //boolean , pdftk feature only to compress streams var $uncompress_mode = false; //boolean pdftk feature only to uncompress streams var $security = array(); //Array holding securtity settings //(password owner nad user, encrypt (set to 40 or 128 or 0), allow ] see pdfk help var $needAppearancesTrue = false; //boolean, indicates if /NeedAppearances is already set to true var $isUTF8 = false; //boolean (true for UTF-8, false for ISO-8859-1) /** * Constructor * *@example Common use: *@param string $pdf_source Source-Filename *@param string $fdf_source Source-Filename *@param boolean $verbose , optional false per default */ function __construct() { //============== $args=func_get_args(); $num_args=func_num_args(); $FDF_FILE=($num_args>=FPDM_COMMON); $VERBOSE_FLAG=($num_args>=FPDM_VERBOSE); $verbose=false; //We are not joking here, let's have a polymorphic constructor! switch($num_args) { case FPDM_INVALID: $this->Error("Invalid instantiation of FPDM, requires at least one param"); break; case FPDM_STATIC: if($args[0] =='[_STATIC_]') break; //static use, caller is anonymous function defined in _set_field_value //else this is the pdf_source then, fdf content is loaded using Load() function default: case FPDM_VERBOSE: //Use the verbose value provided if($VERBOSE_FLAG) $verbose=$args[2]; case FPDM_COMMON: //Common use $this->pdf_source = $args[0];//Blank pdf form if($FDF_FILE) { $this->fdf_source = $args[1];//Holds the data of the fields to fill the form $this->fdf_parse_needed=true; } //calculation and map $this->offsets=array(); $this->pointer=0; $this->shift=0; $this->shifts=array(); $this->n=0; //Stream filters $filters=$this->getFilters("|"); $this->streams_filter="/(\/($filters))+/"; //$this->dumpContent($this->streams_filter); $this->info=array(); //Debug modes $this->verbose=$verbose; $this->verbose_level=($verbose&&is_int($verbose)) ? $verbose : 1; $this->safe_mode=false; $this->check_mode=false; //script will takes much more time if you do so $this->halt_mode=true; $this->support='native'; //may ne overriden $this->security=array('password'=>array('owner'=>null,'user'=>null),'encrypt'=>0,'allow'=>array()); //echo "
filesize:".filesize($this->pdf_source); $this->load_file('PDF'); if($FDF_FILE) $this->load_file('FDF'); } } /** *Loads a form data to be merged * *@note this overrides fdf input source if it was previously defined *@access public *@param string|array $fdf_data a FDF file content or $pdf_data an array containing the values for the fields to change **/ function Load($data,$isUTF8=false) { //------------------------ $this->isUTF8 = $isUTF8; $this->load_file('FDF',$data); } /** *Loads a file according to its type * *@access private *@param string type 'PDF' or 'FDF' *@param String|array content the data content of FDF files only or directly the fields values as array **/ function load_file($type,$content=NULL) { //------------------------------------ switch($type) { case "PDF" : if($content) $this->Error("load_file do not accept PDF content, only FDF content sorry"); else $this->pdf_entries = $this->getEntries($this->pdf_source,'PDF'); break; case "FDF" : if(!is_null($content)) { if(is_array($content)) { $this->fields=$content; $this->fdf_parse_needed=false; //$this->dumpEntries($content,"PDF fields content"); } else if(is_string($content)){ //String $this->fdf_content = $content; //TODO: check content $this->fdf_parse_needed=true; } else $this->Error('Invalid content type for this FDF file!'); } else { $this->fdf_content = $this->getContent($this->fdf_source,'FDF'); $this->fdf_parse_needed=true; } break; default: $this->Error("Invalid file type $type"); } } /** *Set a mode and play with your power debug toys * *@access public *@note for big boys only coz it may hurt *@param string $mode a choice between 'safe','check','verbose','halt' or 'verbose_level' *@param string|int $value an integer for verbose_level **/ function set_modes($mode,$value) { //------------------------------- switch($mode) { case 'safe': $this->safe_mode=$value; break; case 'check': $this->check_mode=$value; break; case 'flatten': $this->flatten_mode=$value; break; case 'compress_mode': $this->compress_mode=$value; if($value) $this->uncompress_mode=false; break; case 'uncompress_mode': $this->uncompress_mode=$value; if($value) $this->compress_mode=false; break; case 'verbose': $this->verbose=$value; break; case 'halt': $this->halt_mode=$value; break; case 'verbose_level': $this->verbose_level=$value; break; default: $this->Error("set_modes error, Invalid mode '$mode'"); } } /** *Retrieves informations of the pdf * *@access public *@note To track PDF versions and so on... *@param Boolean output **/ function Info($asArray=false) { //---------------------- $info=$this->info; $info["Reader"]=($this->support == "native") ? 'FPDF-Merge '.FPDM_VERSION: $this->support; $info["Fields"]=$this->fields; $info["Modes"]=array( 'safe'=>($this->safe_mode)? 'Yes' :'No', 'check'=>($this->check_mode) ? 'Yes': 'No', 'flatten'=>($this->flatten_mode) ? 'Yes': 'No', 'compress_mode'=>($this->compress_mode) ? 'Yes': 'No', 'uncompress_mode'=>($this->uncompress_mode) ? 'Yes': 'No', 'verbose'=>$this->verbose, 'verbose_level'=>$this->verbose_level, 'halt'=>$this->halt_mode ); if($asArray) { return $info; } else { $this->dumpEntries($info); } } /** *Changes the support * *@access public *@internal fixes xref table offsets *@note special playskool toy for Christmas dedicated to my impatient fanclub (Grant, Kris, nejck,...) *@param String support Allow to use external support that has more advanced features (ie 'pdftk') **/ function Plays($cool) { //---------------------- if($cool=='pdftk') //Use a coolest support as .. $this->support='pdftk';//..Per DeFinition This is Kool! else $this->support='native'; } /** *Fixes a corrupted PDF file * *@access public *@internal fixes xref table offsets *@note Real work is not made here but by Merge that should be launched after to complete the work **/ function Fix() { //--------------- if(!$this->fields) $this->fields=array(); //Default: No field data $this->set_modes('check',true); //Compare xref table offsets with objects offsets in the pdf file $this->set_modes('halt',false); //Do no stop on errors so fix is applied during merge process } //######## pdftk's output configuration ####### /** *Decides to use the compress filter to restore compression. *@note This is only useful when you want to repack PDF that was previously edited in a text editor like vim or emacs. **/ function Compress() { //------------------- $this->set_modes('compress',true); $this->support="pdftk"; } /** *Decides to remove PDF page stream compression by applying the uncompress filter. *@note This is only useful when you want to edit PDF code in a text editor like vim or emacs. **/ function Uncompress() { //--------------------- $this->set_modes('uncompress',true); $this->support="pdftk"; } /** *Activates the flatten output to remove form from pdf file keeping field datas. **/ function Flatten() { //----------------- $this->set_modes('flatten',true); $this->support="pdftk"; } /*** *Defines a password type *@param String type , 'owner' or 'user' **/ function Password($type,$code) { //------------------------------ switch($type) { case 'owner': case 'user': $this->security["password"]["$type"]=$code; break; default: $this->Error("Unsupported password type ($type), specify 'owner' or 'user' instead."); } $this->support="pdftk"; } /** *Defines the encrytion to the given bits *@param integer $bits 0, 40 or 128 **/ function Encrypt($bits) { //----------------------- switch($bits) { case 0: case 40: case 128: $this->security["encrypt"]=$bits; break; default: $this->Error("Unsupported encrypt value of $bits, only 0, 40 and 128 are supported"); } $this->support="pdftk"; } /** *Allow permissions * *@param Array permmissions If no arg is given, show help. * Permissions are applied to the output PDF only if an encryption * strength is specified or an owner or user password is given. If * permissions are not specified, they default to 'none,' which * means all of the following features are disabled. * * The permissions section may include one or more of the following * features: * * Printing * Top Quality Printing * * DegradedPrinting * Lower Quality Printing * * ModifyContents * Also allows Assembly * * Assembly * * CopyContents * Also allows ScreenReaders * * ScreenReaders * * ModifyAnnotations * Also allows FillIn * * FillIn * * AllFeatures * Allows the user to perform all of the above, and top * quality printing. **/ function Allow($permissions=null) { //-------------------------- $perms_help=array( 'Printing'=>'Top Quality Printing', 'DegradedPrinting'=>'Lower Quality Printing', 'ModifyContents' =>'Also allows Assembly', 'Assembly' => '', 'CopyContents' => 'Also allows ScreenReaders', 'ScreenReaders' => '', 'ModifyAnnotations'=>'Also allows FillIn', 'FillIn'=>'', 'AllFeatures'=> "All above" ); if(is_null($permissions)) { echo '
Info Allow permissions:
'; print_r($perms_help); }else { if(is_string($permissions)) $permissions=array($permissions); $perms=array_keys($perms_help); $this->security["allow"]=array_intersect($permissions, $perms); $this->support="pdftk"; } } //############################# /** *Merge FDF file with a PDF file * *@access public *@note files has been provided during the instantiation of this class *@internal flatten mode is not yet supported *@param Boolean flatten Optional, false by default, if true will use pdftk (requires a shell) to flatten the pdf form **/ function Merge($flatten=false) { //------------------------------ if($flatten) $this->Flatten(); if($this->support == "native") { if($this->fdf_parse_needed) { $fields=$this->parseFDFContent(); }else { $fields=$this->fields; } $count_fields=count($fields); if($this->verbose&&($count_fields==0)) $this->dumpContent("The FDF content has either no field data or parsing may failed","FDF parser: "); $fields_value_definition_lines=array(); $count_entries=$this->parsePDFEntries($fields_value_definition_lines); if($count_entries) { $this->value_entries=$fields_value_definition_lines; if($this->verbose) { $this->dumpContent("$count_entries Field entry values found for $count_fields field values to fill","Merge info: "); } //==== Alterate work is made here: change values ============ if($count_fields) { foreach($fields as $name => $value) { $this->set_field_value("current",$name,$value); // $value=''; //Strategy applies only to current value, clear others // $this->set_field_value("default",$name,$value); // $this->set_field_value("tooltip",$name,$value); } } //=========================================================== //===== Cross refs/size fixes (offsets calculations for objects have been previously be done in set_field_value) ======= //Update cross reference table to match object size changes $this->fix_xref_table(); //update the pointer to the cross reference table $this->fix_xref_start(); }else $this->Error("PDF file is empty!"); } //else pdftk's job is done in Output, not here. } /** *Warns verbose/output conflicts * *@access private *@param string $dest a output destination **/ function Close($dest) { //---------------- $this->Error("Output: Verbose mode should be desactivated, it is incompatible with this output mode $dest"); } /** *Get current pdf content (without any offset fixes) * *@access private *@param String pdf_file, if given , use the content as buffer (note file will be deleted after!) *@return string buffer the pdf content **/ function get_buffer($pdf_file=''){ //--------------------- if($pdf_file == '') { $buffer=implode("\n",$this->pdf_entries); }else { $buffer=$this->getContent($pdf_file,'PDF'); //@unlink($pdf_file); } return $buffer; } /** *Output PDF to some destination * *@access public *@note reproduces the fpdf's behavior *@param string dest the destination *@param string name the filename **/ function Output($dest='', $name=''){ //----------------------------------- $pdf_file=''; if($this->support == "pdftk") { //As PDFTK can only merge FDF files not data directly, require_once("lib/url.php"); //we will need a url support because relative urls for pdf inside fdf files are not supported by PDFTK... require_once("export/fdf/fdf.php"); //...conjointly with my patched/bridged forge_fdf that provides fdf file generation support from array data. require_once("export/pdf/pdftk.php");//Of course don't forget to bridge to PDFTK! $tmp_file=false; $pdf_file=resolve_path(fix_path(dirname(__FILE__).'/'.$this->pdf_source)); //string: full pathname to the input pdf , a form file if($this->fdf_source) { //FDF file provided $fdf_file=resolve_path(fix_path(dirname(__FILE__).'/'.$this->fdf_source)); }else { $pdf_url=getUrlfromDir($pdf_file); //Normaly http scheme not local file if($this->fdf_parse_needed) { //fdf source was provided $pdf_data=$this->parseFDFContent(); }else { //fields data was provided as an array, we have to generate the fdf file $pdf_data=$this->fields; } $fdf_file=fix_path(FPDM_CACHE)."fields".rnunid().".fdf"; $tmp_file=true; $ret=output_fdf($pdf_url,$pdf_data,$fdf_file); if(!$ret["success"]) $this->Error("Output failed as something goes wrong (Pdf was $pdf_url)
during internal FDF generation of file $fdf_file,
Reason is given by {$ret['return']}"); } //Serializes security options (not deeply tested) $security=''; if(!is_null($this->security["password"]["owner"])) $security.=' owner_pw "'.substr($this->security["password"]["owner"],0,FPDM_PASSWORD_MAX_LEN).'"'; if(!is_null($this->security["password"]["user"])) $security.=' user_pw "'.substr($this->security["password"]["user"],0,FPDM_PASSWORD_MAX_LEN).'"'; if($this->security["encrypt"]!=0) $security.=' encrypt_'.$this->security["encrypt"].'bit'; if(count($this->security["allow"])>0) { $permissions=$this->security["allow"]; $security.=' allow '; foreach($permissions as $permission) $security.=' '.$permission; } //Serialize output modes $output_modes=''; if($this->flatten_mode) $output_modes.=' flatten'; if($this->compress_mode) $output_modes.=' compress'; if($this->uncompress_mode) $output_modes.=' uncompress'; $ret=pdftk($pdf_file,$fdf_file,array("security"=>$security,"output_modes"=>$output_modes)); if($tmp_file) @unlink($fdf_file); //Clear cache if($ret["success"]) { $pdf_file=$ret["return"]; }else $this->Error($ret["return"]); } //$this->buffer=$this->get_buffer($pdf_file); $dest=strtoupper($dest); if($dest=='') { if($name=='') { $name='doc.pdf'; $dest='I'; } else $dest='F'; } //Abort to avoid to polluate output if($this->verbose&&(($dest=='I')||($dest=='D'))) { $this->Close($dest); } switch($dest) { case 'I': //Send to standard output if(ob_get_length()) $this->Error('Some data has already been output, can\'t send PDF file'); if(php_sapi_name()!='cli') { //We send to a browser header('Content-Type: application/pdf'); if(headers_sent()) $this->Error('Some data has already been output, can\'t send PDF file'); header('Content-Length: '.strlen($this->get_buffer())); header('Content-Disposition: inline; filename="'.$name.'"'); header('Cache-Control: private, max-age=0, must-revalidate'); header('Pragma: public'); ini_set('zlib.output_compression','0'); } echo $this->get_buffer(); break; case 'D': //Download file if(ob_get_length()) $this->Error('Some data has already been output, can\'t send PDF file'); header('Content-Type: application/x-download'); if(headers_sent()) $this->Error('Some data has already been output, can\'t send PDF file'); header('Content-Length: '.strlen($this->get_buffer())); header('Content-Disposition: attachment; filename="'.$name.'"'); header("Expires: Mon, 26 Jul 1997 05:00:00 GMT"); // Date in the past header("Last-Modified: " . gmdate("D, d M Y H:i:s") . " GMT"); // always modified header("Cache-Control: no-store, no-cache, must-revalidate, max-age=0"); // HTTP/1.1 header("Cache-Control: post-check=0, pre-check=0", false); //header("Pragma: "); // HTTP/1.0 header('Cache-Control: private, max-age=0, must-revalidate'); header('Pragma: public,no-cache'); ini_set('zlib.output_compression','0'); echo $this->get_buffer(); break; case 'F': //Save to local file if($this->verbose) $this->dumpContent("Write file $name","Output"); $f=fopen($name,'wb'); if(!$f) $this->Error('Unable to create output file: '.$name.' (currently opened under Acrobat Reader?)'); fwrite($f,$this->get_buffer(),strlen($this->get_buffer())); fclose($f); break; case 'S': //Return as a string return $this->get_buffer(); default: $this->Error('Incorrect output destination: '.$dest); } return ''; } /** *Decodes and returns the binary form of a field hexified value * *@note static method due to callback.. *@param string value the hexified string *@return string call the binary string **/ function pdf_decode_field_value($value) { //---------------------------------------- $call=$this->static_method_call('_hex2bin',$value); return $call; } /** *Encodes and returns the headecimal form of a field binary value * *@note static method due to callback.. *@param string value the binary string *@return string call the hexified string **/ function pdf_encode_field_value($value) { //--------------------------------------- $value=$this->static_method_call('_bin2hex',$value); return $value; } /** *Universal Php4/5 static call helper * *@param String $method a name of a method belonging to this class *@return mixed the return value of the called method **/ function static_method_call($method) { //--------------------------------------------- $params_call=func_get_args(); array_shift($params_call); //var_dump($params_call); return call_user_func_array(array($this,$method),$params_call); } /** *Changes a field value that can be in hex <> or binary form () * *@param $matches the regexp matches of the line that contains the value to change *@param String $value the new value for the field property **/ function replace_value($matches,$value) { //---------------------------------------------- array_shift($matches); if(($value!='')&&($matches[1]=="<")) //Value must be hexified.. $value=$this->pdf_encode_field_value($value); $matches[2]=$value; $value_type_code=$matches[0]; //Should be V, DV or TU $matches[0]="/".$value_type_code." "; $value=implode("",$matches); //echo(htmlentities($value)); return $value; } /** *Core to change the value of a field property, inline. * *@access private *@param int $line the lien where the field property value is defined in the pdf file *@param string $value the new value to set *@return int $shift the size change of the field property value **/ function _set_field_value($line,$value) { //---------------------------------------- $verbose_set=($this->verbose&&($this->verbose_level>1)); //get the line content $CurLine =$this->pdf_entries[$line]; $OldLen=strlen($CurLine); //My PHP4/5 static call hack, only to make the callback $this->replace_value($matches,"$value") possible! $callback_code='$THIS=new FPDM("[_STATIC_]");return $THIS->replace_value($matches,"'.$value.'");'; $field_regexp='/^\/(\w+)\s?(\<|\()([^\)\>]*)(\)|\>)/'; if(preg_match($field_regexp,$CurLine)) { //modify it according to the new value $value $CurLine = preg_replace_callback( $field_regexp, create_function('$matches',$callback_code), $CurLine ); }else { if($verbose_set) echo("
WARNING:".htmlentities("Can not access to the value: $CurLine using regexp $field_regexp")); } $NewLen=strlen($CurLine); $Shift=$NewLen-$OldLen; $this->shift=$this->shift+$Shift; //Saves $this->pdf_entries[$line]=$CurLine; return $Shift; } function _encode_value($str) { if($this->isUTF8) $str="\xFE\xFF".iconv('UTF-8','UTF-16BE',$str); return $this->_bin2hex($str); } function _set_field_value2($line,$value,$append) { $CurLine=$this->pdf_entries[$line]; $OldLen=strlen($CurLine); if($append) { $CurLine .= ' /V <'.$this->_encode_value($value).'>'; } else { if(preg_match('#/V\s?[<(]([^>)]*)[>)]#', $CurLine, $a, PREG_OFFSET_CAPTURE)) { $len=strlen($a[1][0]); $pos1=$a[1][1]; $pos2=$pos1+$len; $CurLine=substr($CurLine,0,$pos1-1).'<'.$this->_encode_value($value).'>'.substr($CurLine,$pos2+1); } else $this->Error('/V not found'); } $NewLen=strlen($CurLine); $Shift=$NewLen-$OldLen; $this->shift=$this->shift+$Shift; $this->pdf_entries[$line]=$CurLine; return $Shift; } /** *Changes the value of a field property, inline. * *@param string $type supported values for type are 'default' , 'current' or 'tooltip' *@param string $name name of the field annotation to change the value *@param string $value the new value to set **/ function set_field_value($type,$name,$value) { //------------------------------------ $verbose_set=($this->verbose&&($this->verbose_level>1)); //Get the line(s) of the misc field values if(isset($this->value_entries["$name"])) { $object_id=$this->value_entries["$name"]["infos"]["object"]; if($type=="tooltip") { $offset_shift=$this->set_field_tooltip($name,$value); } elseif ($this->useCheckboxParser && isset($this->value_entries["$name"]['infos']['checkbox_state'])) { //FIX: set checkbox value $offset_shift=$this->set_field_checkbox($name, $value); //ENDFIX } else {//if(isset($this->value_entries["$name"]["values"]["$type"])) { // echo $this->value_entries["$name"]["values"]["$type"]; /* $field_value_line=$this->value_entries["$name"]["values"]["$type"]; $field_value_maxlen=$this->value_entries["$name"]["constraints"]["maxlen"]; if($field_value_maxlen) //Truncates the size if needed $value=substr($value, 0, $field_value_maxlen); if($verbose_set) echo "
Change $type value of the field $name at line $field_value_line to '$value'"; $offset_shift=$this->_set_field_value($field_value_line,$value);*/ if(isset($this->value_entries[$name]["values"]["current"])) $offset_shift=$this->_set_field_value2($this->value_entries[$name]["values"]["current"],$value,false); else $offset_shift=$this->_set_field_value2($this->value_entries[$name]["infos"]["name_line"],$value,true); } // }else // $this->Error("set_field_value failed as invalid valuetype $type for object $object_id"); //offset size shift will affect the next objects offsets taking into accound the order they appear in the file-- $this->apply_offset_shift_from_object($object_id,$offset_shift); } else $this->Error("field $name not found"); } /** *Changes the tooltip value of a field property, inline. * *@param string $name name of the field annotation to change the value *@param string $value the new value to set *@return int offset_shift the size variation **/ function set_field_tooltip($name,$value) { //------------------------------------ $offset_shift=0; $verbose_set=($this->verbose&&($this->verbose_level>1)); //Get the line(s) of the misc field values if(isset($this->value_entries["$name"])) { $field_tooltip_line=$this->value_entries["$name"]["infos"]["tooltip"]; if($field_tooltip_line) { if($verbose_set) echo "
Change tooltip of the field $name at line $field_tooltip_line to value [$value]"; $offset_shift=$this->_set_field_value($field_tooltip_line,$value); }else { if($verbose_set) echo "
Change toolpip value aborted, the field $name has no tooltip definition."; } } else $this->Error("set_field_tooltip failed as the field $name does not exist"); return $offset_shift; } //FIX: parse checkbox definition /** *Changes the checkbox state. * *@param string $name name of the field to change the state *@param string $value the new state to set *@return int offset_shift the size variation **/ public function set_field_checkbox($name, $value) { //------------------------------------ $offset_shift=0; $verbose_set=($this->verbose&&($this->verbose_level>1)); //Get the line(s) of the misc field values if (isset($this->value_entries["$name"])) { if (isset($this->value_entries["$name"]["infos"]["checkbox_state_line"]) && isset($this->value_entries["$name"]["infos"]["checkbox_no"]) && isset($this->value_entries["$name"]["infos"]["checkbox_yes"])) { $field_checkbox_line=$this->value_entries["$name"]["infos"]["checkbox_state_line"]; if ($field_checkbox_line) { if ($verbose_set) { echo "
Change checkbox of the field $name at line $field_checkbox_line to value [$value]"; } $state = $this->value_entries["$name"]["infos"]["checkbox_no"]; if ($value) { $state = $this->value_entries["$name"]["infos"]["checkbox_yes"]; } $CurLine =$this->pdf_entries[$field_checkbox_line]; $OldLen=strlen($CurLine); $CurLine = '/AS /'.$state; $NewLen=strlen($CurLine); $Shift=$NewLen-$OldLen; $this->shift=$this->shift+$Shift; //Saves $this->pdf_entries[$field_checkbox_line]=$CurLine; return $Shift; // $offset_shift=$this->_set_field_value($field_checkbox_line, $state); } else { if ($verbose_set) { echo "
Change checkbox value aborted, parsed checkbox definition incomplete."; } } } else { if ($verbose_set) { echo "
Change checkbox value aborted, the field $name has no checkbox definition."; } } } else { $this->Error("set_field_checkbox failed as the field $name does not exist"); } return $offset_shift; } //ENDFIX /** *Dumps the line entries * *@note for debug purposes *@access private *@param array entries the content to dump *@param string tag an optional tag to highlight *@param boolean halt decides to stop or not this script **/ function dumpEntries($entries,$tag="",$halt=false) { //------------------------------------------------------------ if($tag) echo "

$tag


"; if($entries) { echo "
";
				echo htmlentities(print_r($entries,true));
				echo "
"; } if($halt) exit(); } /** *Dumps the string content * *@note for debug purposes *@access private *@param string content the content to dump *@param string tag an optional tag to highlight *@param boolean halt decides to stop or not this script **/ function dumpContent($content,$tag="",$halt=false) { //-------------------------------------------------- if($tag) echo "

$tag

"; if($content) { echo "
";
				echo htmlentities($content);
				echo "
"; } if($halt) exit(); } /** *Retrieves the content of a file as a string * *@access private *@param string $filename the filename of the file *@param string $filetype the type of file as info *@return string $content **/ function getContent($filename,$filetype) { //---------------------------------------- //$content = file_get_contents($filename); $handle=fopen($filename,'rb'); $content = fread($handle, filesize($filename)); fclose($handle); if (!$content) $this->Error(sprintf('Cannot open '.$filetype.' file %s !', $filename)); if($filetype=='PDF') { $start = substr($content, 0, 2048); if(strpos($start, '/ObjStm')!==false) $this->Error('Object streams are not supported'); if(strpos($start, '/Linearized')!==false) $this->Error('Fast Web View mode is not supported'); $end = substr($content, -512); if(strpos($end, '/Prev')!==false) $this->Error('Incremental updates are not supported'); $this->needAppearancesTrue = (strpos($content, '/NeedAppearances true')!==false); } /* if($this->verbose) { $this->dumpContent($content,"$filetype file content read"); }*/ return $content; } /** *Retrieves the content of a file as an array of lines entries * *@access private *@param string $filename the filename of the file *@param string $filetype the type of file as info *@return array $entries **/ function getEntries($filename,$filetype) { //---------------------------------------- $content=$this->getContent($filename,$filetype); $entries=explode("\n",$content); /* if($this->verbose) { $this->dumpEntries($entries,"$filetype file entries"); }*/ return $entries; } /** *Retrieves a binary string from its hexadecimal representation * *@access private *@note Function was written because PHP has a bin2hex, but not a hex2bin! *@internal note pack(“C”,hexdec(substr($data,$i,2))) DOES NOT WORK *@param string $hexString the hexified string *@return string $bin a binary string **/ function _hex2bin ($hexString) { //echo "
_hex2bin($hexString)"; $BinStr = ''; $hexLength=strlen($hexString); // only hex numbers is allowed if ($hexLength % 2 != 0 || preg_match("/[^\da-fA-F]/",$hexString)) return FALSE; //Loop through the input and convert it for ($i = 0; $i < $hexLength; $i += 2) $BinStr .= '%'.substr ($hexString, $i, 2); // Raw url-decode and return the result return rawurldecode ($BinStr);//chr(hexdec()) } /** *Encodes a binary string to its hexadecimal representation * *@access private *@internal dechex(ord($str{$i})); is buggy because for hex value of 0-15 heading 0 is missing! Using sprintf() to get it right. *@param string $str a binary string *@return string $hex the hexified string **/ function _bin2hex($str) { //---------------------- $hex = ""; $i = 0; do { $hex .= sprintf("%02X", ord($str[$i])); $i++; } while ($i < strlen($str)); return $hex; } /** * Extracts the map object for the xref table * @note PDF lines should have been previouly been parsed to make this work * @return array a map that holds the xrefstart infos and values */ function get_xref_table() { //------------------------ return $this->value_entries['$_XREF_$']; } /** * Extracts the offset of the xref table * @note PDF lines should have been previouly been parsed to make this work * @return int the xrefstart value */ function get_xref_start() { //------------------------ return $this->value_entries['$_XREF_$']["infos"]["start"]["pointer"]; } /** * Extracts the line where the offset of the xref table is stored * @note PDF lines should have been previouly been parsed to make this work * @return int the wished line number */ function get_xref_start_line() { //------------------------------- return $this->value_entries['$_XREF_$']["infos"]["start"]["line"]; } /** * Calculates the offset of the xref table * * @return int the wished xrefstart offset value */ function get_xref_start_value() { //------------------------------- $size_shift=$this->shift; $xref_start=$this->get_xref_start(); return $xref_start+$size_shift; } /** * Read the offset of the xref table directly from file content * * @note content has been previously been defined in $this->get_buffer() * @param int $object_id an object id, a integer value starting from 1 * @return int the wished xrefstart offset value */ function read_xref_start_value() { //------------------------------ $buffer=$this->get_buffer(); $chunks = preg_split('/\bxref\b/', $buffer, -1, PREG_SPLIT_OFFSET_CAPTURE); return intval($chunks[1][1])-4; //-4 , relative to end of xref } /** * Calculates the new offset/xref for this object id by applying the offset_shift due to value changes * * @note uses internally precalculated $offsets,$positions and $shifts * @param int $object_id an object id, a integer value starting from 1 * @return int the wished offset */ function get_offset_object_value($object_id) { //-------------------------------------------- //Static is to keep forever... static $offsets=null; static $positions=null; static $shifts=null; //if(is_null($offsets)) { //...variables content set once. This is the beauty of php :) //!NOTE: xref table is ordered by object id (position's object is not defined linearly in the pdf !) $positions=$this->_get_positions_ordered(); //Makes it 0 indexed as object id starts from 1 and positions starts from 0 $offsets=$this->_get_offsets_starting_from_zero(); //Shifts are already 0 indexed, don't change. $shifts=$this->shifts; //} $p=$positions[$object_id]; $offset=$offsets[$p]; $shift=$shifts[$p]; //size shift of the object due to value changes return $offset+$shift; } /** * Reads the offset of the xref table directly from file content * * @note content has been previously been defined in $this->get_buffer() * @param int $object_id an object id, a integer value starting from 1 * @return int the wished offset */ function read_offset_object_value($object_id) { //------------------------------ $buffer=$this->get_buffer(); $previous_object_footer='';//'endobj' or comment; $object_header=$previous_object_footer.'\n'.$object_id.' 0 obj'; $chars = preg_split('/'.$object_header.'/', $buffer, -1, PREG_SPLIT_OFFSET_CAPTURE); $offset=intval($chars[1][1])-strlen($object_header)+strlen($previous_object_footer)+2; return $offset; } /** * Fix the offset of the xref table * */ function fix_xref_start() { //------------------------- $pdf_entries=&$this->pdf_entries; $verbose_fix=($this->verbose&&($this->verbose_level>1)); $calculate_xrefstart_value=((!$this->safe_mode)||$this->check_mode); $extract_xrefstart_value_from_file=($this->safe_mode||$this->check_mode); if($calculate_xrefstart_value) { $xref_start_value_calculated=$this->get_xref_start_value(); //get computed value from old one if(!$this->safe_mode) $xref_start_value=$xref_start_value_calculated; } if($extract_xrefstart_value_from_file) { $xref_start_value_safe=$this->read_xref_start_value();//read direct from new file content if($this->safe_mode) $xref_start_value=$xref_start_value_safe; } if($this->check_mode) { //Compared calculated value with position value read direct from file if($xref_start_value_calculated != $xref_start_value_safe) { if($verbose_fix) echo "
xrefstart's value must be $xref_start_value_safe calculated is $xref_start_value_calculated.Don't worry, FPDFM-merge will fix it for you.
"; $xref_start_value=$xref_start_value_safe; //Overrides with the good value if($this->halt_mode) $this->Error("Halt on error mode enabled, aborting. Use \$pdf->set_modes('halt',false); to disable this mode and go further fixing corrupted pdf."); } else { if($verbose_fix) echo "
xrefstart's value for the file is correct and vaults $xref_start_value"; } } //updates xrefstart's value $xref_start_line=$this->get_xref_start_line(); $pdf_entries[$xref_start_line]="$xref_start_value"; } /** * Get the offsets table 0 indexed * * @return array $offsets */ function _get_offsets_starting_from_zero() { //------------------------------------------- $offsets=$this->offsets; return array_values($offsets); } /** * Sorts the position array by key * * @return array $positions the ordered positions */ function _get_positions_ordered() { //-------------------------------- $positions=$this->positions; ksort($positions); return $positions; } /** * Fix the xref table by rebuilding its offsets entries * */ function fix_xref_table() { //------------------------ $xref_table=$this->get_xref_table(); $xLen=$xref_table["infos"]["count"]; $pdf_entries=&$this->pdf_entries; //Do some checks $offsets=$this->offsets; //$offsets=array_values($offsets); $oLen=count($offsets); if($xLen == $oLen) { //...to rectify xref entries //jump over len and header, this is the first entry with n $first_xref_entry_line=$xref_table["infos"]["line"]+3; //echo "xREF:{$pdf_entries[$first_xref_entry_line]}"; //!NOTE: xref table is ordered by object id (position's object is not defined linearly in the pdf !) //$positions=$this->positions; //ksort($positions); $verbose_fix=($this->verbose&&($this->verbose>1)); $calculate_offset_value=((!$this->safe_mode)||$this->check_mode); $extract_offset_value_from_file=($this->safe_mode||$this->check_mode); //Get new file content (ie with values changed) //$this->get_buffer()=$this->get_buffer(); for($i=0;$i<$xLen;$i++) { $obj_id=$i+1; //Try two way to retrieve xref offset value of an object of the given id if($calculate_offset_value) { $offset_value_calculated=$this->get_offset_object_value($obj_id);; if(!$this->safe_mode) $offset_value=$offset_value_calculated; } if($extract_offset_value_from_file) { $offset_value_read=$this->read_offset_object_value($obj_id); if($this->safe_mode) $offset_value=$offset_value_read; } if($this->check_mode) { if($offset_value_calculated != $offset_value_read) { if($verbose_fix) echo "
Offset for object $obj_id read is $offset_value_read, calculated $offset_value_calculated"; $offset_value=$offset_value_read; //overrides to fix bad values if($this->halt_mode) $this->Error("
Offset for object $obj_id read is $offset_value_read, calculated $offset_value_calculated"); }else { if($verbose_fix) echo "
Offset for object $obj_id is correct and vaults $offset_value"; } } $pdf_entries[$first_xref_entry_line+$i]=sprintf('%010d 00000 n ',$offset_value); } }else { //Congratulations you won the corrupted Error Prize $this->Error("Number of objects ($oLen) differs with number of xrefs ($xLen), something , pdf xref table is corrupted :("); } } /** * Applies a shift offset $shift from the object whose id is given as param * * @note offset shift will affect the next objects taking into accound the order they appear in the file * @access public * @param int object_id the id whose size shift has changed * @param int offset_shift the shift value to use */ function apply_offset_shift_from_object($object_id,$offset_shift) { //--------------------------------------------------------- //get the position of object $object_pos=$this->positions[$object_id]; //get the next object position $next_object_pos=$object_pos+1; //Applies offset change to next following objects $this->_apply_offset_shift($next_object_pos,$offset_shift); } /** * Applies a shift offset $shift starting at the index $from to the shifts array * * @access private * @param int from the index to start apply the shift * @param int shift the shift value to use */ function _apply_offset_shift($from,$shift) { //------------------------------------------ $offsets=&$this->shifts; $params=array($from,$shift); foreach($offsets as $key=>$value) { if($key>=$from) { $offset=$offsets[$key]+$shift; $offsets[$key]=$offset; } } } /** * Decodes a PDF value according to the encoding * * @access public * @param string $encoding the encoding to use for decoding the value, only 'hex' is supported * @param string value a value to decode * @return string the value decoded */ function decodeValue($encoding,$value) { //---------------------------------------------- //echo "Decoding $encoding value($value)"; if($encoding=="hex") $value=$this->pdf_decode_field_value($value); return $value; } /** *Retrieve the list of supported filters * *@note Uses $FPDM_FILTERS array built dynamically *@param String $sep a separator to merge filter names, default is '|' *@return String the suported filters **/ function getFilters($sep="|") { //--------------------- global $FPDM_FILTERS; return implode($sep,$FPDM_FILTERS); } /** *Get a filter by name * *@param name a string matching one of the supported default filters (marked with +) * *Without parameters: *+ ASCIIHexDecode : Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data. *+ ASCII85Decode : Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data. * RunLengthDecode : Decompresses data encoded using a byte-oriented run-length encoding algorithm, reproducing the original text or binary data (typically monochrome image data, or any data that contains frequent long runs of a single byte value). * JPXDecode : (PDF 1.5) Decompresses data encoded using the wavelet-based JPEG2000 standard, reproducing the original image data. *With parameter(s): *+ LZWDecode : Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data. *+ FlateDecode (PDF 1.2): Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data. * CCITTFaxDecode : Decompresses data encoded using the CCITT facsimile standard, reproducing the original data (typically monochrome image data at 1 bit per pixel). * JBIG2Decode (PDF 1.4) :Decompresses data encoded using the JBIG2 standard, reproducing the original monochrome (1 bit per pixel) image data (or an approximation of that data). * DCTDecode : Decompresses data encoded using a DCT (discrete cosine transform) technique based on the JPEG standard, reproducing image sample data that approximates the original data. * Crypt (PDF 1.5) :Decrypts data encrypted by a security handler, reproducing the data as it was before encryption. *@return the wished filter class to access the stream **/ function getFilter($name) { //--------------------- switch($name) { case "LZWDecode": $filter=new FilterLZW(); break; case "ASCIIHexDecode": $filter=new FilterASCIIHex(); break; case "ASCII85Decode": $filter=new FilterASCII85(); break; case "FlateDecode": $filter=new FilterFlate(); break; case "Standard": //Raw $filter=new FilterStandard(); break; default: $this->Error("getFilter cannot open stream of object because filter '{$name}' is not supported, sorry."); } return $filter; } //========= Stream manipulation stuff (alpha, not used by now!) ================ /** * Detect if the stream has a textual content * * @access public * @param string $stream the string content of the stream * @return boolean */ function is_text_stream($stream_content) { //-------------------------------------- return preg_match("/(\s*Td\s+[\<\(])([^\>\)]+)([\>\)]\s+Tj)/",$stream_content); } /** * changes the text value of a text stream * * @access public * @param array $stream the stream defintion retrieved during PDF parsing * @param string $value the new text value */ function change_stream_value($stream,$value) { //-------------------------------------------- $entries=&$this->pdf_entries; $verbose_parsing=($this->verbose&&($this->verbose_level>3)); if($is_text_stream) { $OldLen=$stream["length"]["value"]; $lMin=$stream["start"]; $lMax=$stream["end"]; $stream_content=$this->_set_text_value($stream_content,$value); $NewLen=strlen($stream_content); for($l=$lMin;$l<=$lMax;$l++) { if($l==$lMin) { $entries[$lMin]=$stream_content; //Update the length $stream_def_line=$stream["length"]["line"]; $stream_def=$entries[$stream_def_line]; $stream_def=preg_replace("/\/Length\s*(\d+)/",'/Length '.$NewLen,$stream_def); $entries[$stream_def_line]=$stream_def; //update the filter type... $stream_def_line=$stream["filters"]["line"]; $stream_def=$entries[$stream_def_line]; if($verbose_parsing) { echo "
";
							echo htmlentities(print_r($stream_def,true));
							echo "
"; } //...to filter Standard $stream_def=preg_replace($this->streams_filter,'/Standard ',$stream_def); $entries[$stream_def_line]=$stream_def; //Update the shift $size_shift=$NewLen-$OldLen; $this->apply_offset_shift_from_object($obj,$size_shift); }else if($lmin!=$lMax) { unset($entries[$l]); } } if($verbose_parsing) { var_dump($stream_content); } } } /** * Overrides value between Td and TJ, ommiting <> * * @note core method * @access private * @param array $stream the stream defintion retrieved during PDF parsing * @param string $value the new text value */ function _set_text_value($stream,$value) { //--------------------------------------- $chunks=preg_split("/(\s*Td\s+[\<\(])([^\>\)]+)([\>\)]\s+Tj)/",$stream,0,PREG_SPLIT_DELIM_CAPTURE); $chunks[2]=$value; $stream=implode($chunks,''); return $stream; } //================================ function _extract_pdf_definition_value($name,$line,&$match) { //----------------------------------------------------------- global $FPDM_REGEXPS; $value=preg_match($FPDM_REGEXPS["$name"],$line,$match); if(!$value) { //value is concatained with name: /name/value $value=preg_match("/".preg_quote($name,'/')."\/(\w+)/",$line,$match); } return $value; } function extract_pdf_definition_value($name,$line,&$match) { //----------------------------------------------------------- global $FPDM_REGEXPS; if(array_key_exists($name,$FPDM_REGEXPS)) { $value=$this->_extract_pdf_definition_value($name,$line,$match); }else $this->Error("extract_pdf_definition_value() does not support definition '$name'"); /*if($name=="/Type") { if(preg_match("/\//",$line,$foo)) { var_dump($match); die("Decoding $name value in line ".htmlentities($line)); } }*/ return $value; } /** * Parses the lines entries of a PDF * * @access public * @param array $lines the FDF content as an array of lines * @return integer the number of lines the PDF has */ function parsePDFEntries(&$lines){ //-------------------------------- $entries=&$this->pdf_entries; $CountLines = count($entries); $Counter=0; $obj=0; //this is an invalid object id, we use it to know if we are into an object //FIX: parse checkbox definition $ap_d_yes=''; $ap_d_no=''; $ap_line=0; $ap_d_line=0; $as=''; //ENDFIX $type=''; $subtype=''; $name=''; $value=''; $default_maxLen=0; //No limit $default_tooltip_line=0; //Tooltip is optional as it may not be defined $xref_table=0; $trailer_table=0; $n=0; //Position of an object, in the order it is declared in the pdf file $stream=array(); $id_def=false; //true when parsing/decoding trailer ID $id_single_line_def=false; //true when the two ID chunks are one the same line $id_multi_line_def=false; //true or OpenOffice 3.2 $creator=''; $producer=''; $creationDate=''; $verbose_parsing=($this->verbose&&($this->verbose_level>3)); $verbose_decoding=($this->verbose&&($this->verbose_level>4)); if($this->verbose) $this->dumpContent("Starting to parse $CountLines entries","PDF parse"); while ( $Counter < $CountLines ){ $CurLine = $entries[$Counter]; if($verbose_parsing) $this->dumpContent($CurLine,"====Parsing Line($Counter)"); if(!$xref_table) { //Header of an object? if(preg_match("/^(\d+) (\d+) obj/",$CurLine,$match)) { $obj=intval($match[1]); $this->offsets[$obj]=$this->pointer; $this->positions[$obj]=$n; $this->shifts[$n]=0; $n++; if($verbose_parsing) $this->dumpContent($CurLine,"====Opening object($obj) at line $Counter"); $object=array(); $object["values"]=array(); $object["constraints"]=array(); $object["constraints"]["maxlen"]=$default_maxLen; $object["infos"]=array(); $object["infos"]["object"]=intval($obj); $object["infos"]["tooltip"]=$default_tooltip_line; } else { //Object has been opened if($obj) { //Footer of an object? if(preg_match("/endobj/",$CurLine,$match)) { if($verbose_parsing) $this->dumpContent("","====Closing object($obj) at line $Counter"); //We process fields here, save only Annotations texts that are supported by now if($subtype=="Widget") { if($name != '') { $lines["$name"]=$object; if($verbose_parsing) $this->dumpContent("$type $subtype (obj id=$obj) is a text annotation of name '$name', saves it."); }//else // $this->Error("$type $subtype (obj id=$obj) is a text annotation without a name, this cannot be."); $values=$object["values"]; //Sanity values checks, watchdog. // if(!array_key_exists("current",$values)) $this->Error("Cannot find value (/V) for field $name"); // if(!array_key_exists("default",$values)) $this->Error("Cannot find default value (/DV) for field $name"); }else if($verbose_parsing) $this->dumpContent("Object $type $subtype (obj id=$obj) is not supported"); $object=null; $obj=0; //FIX: parse checkbox definition $ap_d_yes=''; $ap_d_no=''; $ap_line=0; $ap_d_line=0; $as=''; //ENDFIX $type=''; $subtype=''; $name=''; $value=''; $maxLen=0; } else { if(preg_match("/\/Length\s*(\d+)/",$CurLine,$match)) { $stream["length"]=array("line"=>$Counter,"value"=>$match[1]); $stream["start"]=0; $stream["end"]=0; $stream["content"]=''; if($verbose_parsing) $this->dumpContent($CurLine,"->Stream filter length definition({$match[1]}) for object($obj) at line $Counter"); } //Handles single filter /Filter /filter_type as well as well as filter chains such as /Filter [/filter_type1 /filter_type2 .../filter_typeN] if(preg_match_all($this->streams_filter,$CurLine,$matches)) { //$this->dumpContent($this->streams_filter); /*$stream_filter=$match[1]; $stream_filter=trim(preg_replace('/(<<|\/Length\s*\d+|>>)/', '', $stream_filter),' '); $stream_filters=preg_split('/\s*\//',$stream_filter); array_shift($stream_filters);*/ $stream_filters=$matches[2]; $stream["filters"]=array("line"=>$Counter, "type"=>$stream_filters); if($verbose_parsing) { //var_dump($stream_filters); $stream_filter=implode(" ",$stream_filters); $this->dumpContent($CurLine,"->Stream filter type definition($stream_filter) for object($obj) at line $Counter"); } } if(array_key_exists("length",$stream)) { //length is mandatory if(preg_match("/\b(stream|endstream)\b/",$CurLine,$match)) { if(!array_key_exists("filters",$stream)) {//filter type is optional, if none is given, its standard $stream["filters"]=array("type"=>array("Standard")); if($verbose_parsing) { var_dump($stream); $this->dumpContent($CurLine,"->No stream filter type definition for object($obj) was found, setting it to 'Standard'"); } } if($match[1] == "stream") { if($verbose_parsing) $this->dumpContent($CurLine,"->Opening stream for object($obj) at line $Counter"); $stream["start"]=$Counter+1; }else { $stream["end"]=$Counter-1; $stream["content"]=implode("\n",array_slice($entries,$stream["start"],$stream["end"]-$stream["start"]+1)); $filters=$stream["filters"]["type"]; $f=count($filters); $stream_content=$stream["content"]; //var_dump($filters); //$filters_type=$filters["type"]; //now process the stream, ie unpack it if needed //by decoding in the reverse order the streams have been encoded //This is done by applying decode using the filters in the order given by /Filter. foreach($filters as $filter_name) { $stream_filter=$this->getFilter($filter_name); $stream_content=$stream_filter->decode($stream_content); if($verbose_decoding) { echo "
Stream decoded using filter '$filter_name':[
";
													var_dump($stream_content); //todo : manipulate this content and adjust offsets.
													echo "
]
"; } } if($verbose_parsing) { $this->dumpEntries($stream); echo ""; if($this->is_text_stream($stream_content)) { echo "Stream text unfiltered:[
";
												} else {
													echo "Stream unfiltered:[
";
												}
												var_dump($stream_content); 
												echo "
]
"; $this->dumpContent($CurLine,"->Closing stream for object($obj) at line $Counter"); } $stream=array(); } }else if($stream["start"]>0){ //stream content line that will be processed on endstream... } } else { /* Producer /CreationDate (D:20101225151810+01'00')>> */ if(($creator=='')&&preg_match("/\/Creator\<([^\>]+)\>/",$CurLine,$values)) { $creator=$this->decodeValue("hex",$values[1]); if($verbose_parsing) echo("Creator read ($creator)"); $this->info["Creator"]=$creator; } if(($producer=='')&&preg_match("/\/Producer\<([^\>]+)\>/",$CurLine,$values)) { $producer=$this->decodeValue("hex",$values[1]); if($verbose_parsing) echo("Producer read ($producer)"); $this->info["Producer"]=$producer; } if(($creationDate=='')&&preg_match("/\/CreationDate\(([^\)]+)\)/",$CurLine,$values)) { $creationDate=$values[1]; if($verbose_parsing) echo("Creation date read ($creationDate)"); $this->info["CreationDate"]=$creationDate; } //=== DEFINITION ==== //preg_match("/^\/Type\s+\/(\w+)$/",$CurLine,$match) $match=array(); //FIX: parse checkbox definition if($this->useCheckboxParser && ('' == $ap_d_yes || '' == $ap_d_no || '' == $as)) { if (!$ap_line && '/AP' == substr($CurLine, 0, 3)) { if ($verbose_parsing) { echo("
Found AP Line '$Counter'"); } $ap_line = $Counter; } elseif (!$ap_d_line && '/D' == substr($CurLine, 0, 2)) { if ($verbose_parsing) { echo("
Found D Line '$Counter'"); } $ap_d_line = $Counter; } elseif (($ap_line==$Counter-4)&&($ap_d_line==$Counter-2)&&($ap_d_yes=='')&&$this->extract_pdf_definition_value("name", $CurLine, $match)) { $ap_d_yes=$match[1]; if ($verbose_parsing) { echo("
Object's checkbox_yes is '$ap_d_yes'"); } $object["infos"]["checkbox_yes"]=$ap_d_yes; } elseif (($ap_line==$Counter-5)&&($ap_d_line==$Counter-3)&&($ap_d_no=='')&&$this->extract_pdf_definition_value("name", $CurLine, $match)) { $ap_d_no=$match[1]; if ($verbose_parsing) { echo("
Object's checkbox_no is '$ap_d_no'"); } $object["infos"]["checkbox_no"]=$ap_d_no; } elseif (($as=='')&&$this->extract_pdf_definition_value("/AS", $CurLine, $match)) { $as=$match[1]; if ($verbose_parsing) { echo("
Object's AS is '$as'"); } $object["infos"]["checkbox_state"]=$as; $object["infos"]["checkbox_state_line"]=$Counter; } } //ENDFIX if(($type=='')||($subtype=='')||($name=="")) { if(($type=='')&&$this->extract_pdf_definition_value("/Type",$CurLine,$match)) { if($match[1]!='Border') { $type=$match[1]; if($verbose_parsing) echo("
Object's type is '$type'"); } } if(($subtype=='')&&$this->extract_pdf_definition_value("/Subtype",$CurLine,$match)) { $subtype=$match[1]; if($verbose_parsing) echo("
Object's subType is '$subtype'"); } if(($name=="")&&preg_match("/^\/T\s?\((.+)\)\s*$/",$this->_protectContentValues($CurLine),$match)) { $name=$this->_unprotectContentValues($match[1]); //FIX: convert ASCII object names to utf-8 // don't use utf8_encode($name) yet, it's core function since php 7.2 $name = mb_convert_encoding($name, 'UTF-8', 'ASCII'); //ENDFIX if($verbose_parsing) echo ("Object's name is '$name'"); $object["infos"]["name"]=$name; //Keep a track $object["infos"]["name_line"]=$Counter; //$this->dumpContent(" Name [$name]"); } }// else { //=== CONTENT ==== //$this->dumpContent($CurLine); //=== Now, start the serious work , read DV, V Values and eventually TU //note if(preg_match_all("/^\/(V|DV)\s+(\<|\))([^\)\>]+)(\)|\>)/",$CurLine,$matches)) { //do not work as all is encoded on the same line... if(preg_match("/^\/(V|DV|TU)\s+([\<\(])/",$CurLine,$def)) { //get an human readable format of value type and encoding if($def[1] == "TU") { $valuetype="info"; $object["infos"]["tooltip"]=$Counter; } else { $valuetype=($def[1] == "DV") ? "default" : "current"; $object["values"]["$valuetype"]=$Counter; //Set a marker to process lately } $encoding=($def[2]=="<") ? "hex" : "plain"; if(preg_match("/^\/(V|DV|TU)\s+(\<|\)|\()([^\)\>]*)(\)|\>\))/",$CurLine,$values)) { $value=$values[3]; $value=$this->decodeValue($encoding,$value); }else $value=''; if($verbose_parsing) $this->dumpContent("$type $subtype (obj id=$obj) has $encoding $valuetype value [$value] at line $Counter"); }else if(preg_match("/^\/MaxLen\s+(\d+)/",$CurLine,$values)) { $maxLen=$values[1]; $object["constraints"]["maxlen"]=intval($maxLen); } else if($verbose_parsing) echo("WARNING: definition ignored"); if(substr($CurLine,0,7)=='/Fields' && !$this->needAppearancesTrue) { $CurLine='/NeedAppearances true '.$CurLine; $entries[$Counter]=$CurLine; } //TODO: Fetch the XObject..and change Td <> Tj /* if(preg_match("/^\/AP/",$CurLine,$values)) { //die("stop"); $CurLine=''; //clear link to Xobject $entries[$Counter]=$CurLine; }*/ // } } } } //~~~~~Xref table header? ~~~~~~ if(preg_match("/\bxref\b/",$CurLine,$match)) { $xref_table=1; if($verbose_parsing) $this->dumpContent("->Starting xref table at line $Counter:[$CurLine]"); $lines['$_XREF_$']=array(); $lines['$_XREF_$']["entries"]=array(); $lines['$_XREF_$']["infos"]=array(); $lines['$_XREF_$']["infos"]["line"]=$Counter; $lines['$_XREF_$']["infos"]["start"]=array(); $start_pointer=$this->pointer+strpos($CurLine,"xref"); //HACK for PDFcreator 1.0.0 $lines['$_XREF_$']["infos"]["start"]["pointer"]=$start_pointer; } } $obj_header=false; } else { //We are inside the xref table //$this->dumpContent($CurLine,""); $xref_table=$xref_table+1; switch($xref_table) { case 2: if(preg_match("/^(\d+) (\d+)/",$CurLine,$match)) { $refs_count=intval($match[2]);//xref_table length+1 (includes this line) $lines['$_XREF_$']["infos"]["count"]=$refs_count-1; if($verbose_parsing) $this->dumpContent("Xref table length is $refs_count"); }else if($verbose_parsing) $this->dumpContent("WARNING: Xref table length ignored!"); break; case 3: //Should be 0000000000 65535 f if($verbose_parsing) $this->dumpContent("this is Xref table header, should be 0000000000 65535 f "); break; default: //xref entries if($refs_count>0) { $xref=$xref_table-3; if($refs_count == 1) {//Last one , due to the shift, is the trailer if(!preg_match("/^trailer/",$CurLine)) //if not, Houston we have a problem $this->Error("xref_table length corrupted?: Trailer not found at expected!"); else $trailer_table=1; }else { $lines['$_XREF_$']["entries"][$xref]=$CurLine; if($verbose_parsing) $this->dumpContent("Xref table entry for object $xref found."); } $refs_count--; } else { //We are inside the trailer if($trailer_table==1) { //should be << if(trim($CurLine) != '') { //HACK: PDFCreator Version 1.0.0 has an extra CR after trailer if(!preg_match("/<Error("trailer_table corrupted?; missing start delimiter << "); $trailer_table++; } }else if(($trailer_table>0)&&((!is_null($id_def))||preg_match("/^\/(Size|Root|Info|ID|DocChecksum)/",$CurLine,$match))) { //Value can be extracted using (\d+|\[[^\]]+\]) if(preg_match("/\/Size (\d+)/",$CurLine,$match)) { //Seems to match with xref entries count.. $size_read=$match[1]; $this->info["size"]=$size_read; if($verbose_parsing) $this->dumpContent("Size read ($size_read) for pdf found."); } if(preg_match("/^\/ID\s*\[\s*<([\da-fA-F]+)/",$CurLine,$match)) { $oid=$match[1]; $id_def=true; if($verbose_parsing) $this->dumpContent("ID chunk one ($oid) for pdf found."); //Determines if the ID definition is one line... if(preg_match("/\>\s?\.*$/",$CurLine,$match)) { $tid=$match[1]; $this->info["ID"]=array($oid,$tid); if($verbose_parsing) $this->dumpContent("ID chunk two ($tid) for pdf found."); $id_def=false; }else $this->Error("trailer_table corrupted?; ID chunk two can not be decoded "); } else $id_multi_line_def=true; } if(preg_match("/^\/DocChecksum \/([\da-fA-F]+)/",$CurLine,$match)) { $checksum=$match[1]; $this->info["checksum"]=$checksum; if($verbose_parsing) $this->dumpContent("Checksum read ($checksum) for pdf found."); } if(preg_match("/>>/",$CurLine,$match)) $trailer_table=-1;//negative value: expects startxref to follow } else { switch($trailer_table) { case -1://startxref if(!preg_match("/^startxref/",$CurLine,$match)) $this->Error("startxref tag expected, read $CurLine"); break; case -2://startxref's value if(preg_match("/^(\d+)/",$CurLine,$match)) { $lines['$_XREF_$']["infos"]["start"]["value"]=intval($match[1]); $lines['$_XREF_$']["infos"]["start"]["line"]=$Counter; }else $this->Error("startxref value expected, read $CurLine"); break; default://%%EOF } $trailer_table--; } } } } $this->pointer=$this->pointer+strlen($CurLine)+1; //+1 due to \n $Counter++; } if($this->verbose) { $refs=(array_key_exists('$_XREF_$',$lines)) ? $lines['$_XREF_$']["infos"]["count"] : 0; if($refs) { $this->dumpContent("PDF parse retrieved $refs refs"); }else { $this->dumpContent("PDF parse retrieved no refs, seems the xref table is broken or inacessible, this is bad!"); } } return count($lines); } /** * Protect ( ) that may be in value or names * * @access protected * @param string $content the FDF content to protect values * @return string the content protected */ function _protectContentValues($content) { //------------------------------------------------- $content=str_replace("\\(","$@#",$content); $content=str_replace("\\)","#@$",$content); return $content; } /** * Unprotect ( ) that may be in value or names * * @access protected * @param string $content the FDF content with protected values * @return string the content unprotected */ function _unprotectContentValues($content) { //-------------------------------------------------- $content=str_replace("$@#","\\(",$content); $content=str_replace("#@$","\\)",$content); $content=stripcslashes($content); return $content; } /** * Parses the content of a FDF file and saved extracted field data * *@access public *@return array $fields the data of the fields parsed */ function parseFDFContent(){ //------------------------- $content=$this->fdf_content; $content=$this->_protectContentValues($content);//protect ( ) that may be in value or names... if($this->verbose) $this->dumpEntries($content,"FDF parse"); //..so that this regexp can do its job without annoyances if(preg_match_all("/(T|V)\s*\(([^\)]+)\)\s*\/(T|V)\s*\(([^\)]+)\)/", $content,$matches, PREG_PATTERN_ORDER)) { $fMax=count($matches[0]); $fields=array(); for($f=0;$f<$fMax;$f++) { $value=''; $name=''; if($matches[1][$f]=="V") { $value=$matches[2][$f]; if($matches[3][$f]=="T") $name=$matches[4][$f]; else $this->Error("Field $f ignored , incomplete field declaration, name is expected"); } else { if($matches[1][$f]=="T") { $name=$matches[2][$f]; if($matches[3][$f]=="V") $value=$matches[4][$f]; else $this->Error("Field $f ignored , incomplete field declaration, value is expected"); } else $this->Error("Field $f ignored , Invalid field keys ({$matches[0][$f]})"); } if($name!='') { if(array_key_exists($name,$fields)) $this->Error("Field $f ignored , already defined"); else { $name=$this->_unprotectContentValues($name); $value=$this->_unprotectContentValues($value); if($this->verbose) $this->dumpContent("FDF field [$name] has its value set to \"$value\""); $fields[$name]=$value; } } else $this->Error("Field $f ignored , no name"); } } else if($this->verbose) $this->dumpContent($fields,"FDF has no fields",false); if($this->verbose) $this->dumpContent($fields,"FDF parsed",false); return $fields; } /** * Close the opened file */ function closeFile() { //-------------------- if (isset($this->f) && is_resource($this->f)) { fclose($this->f); unset($this->f); } } /** * Print Error and die * * @param string $msg Error-Message */ function Error($msg) { //-------------------- die('FPDF-Merge Error: '.$msg); } } } unset($__tmp);