831 lines
46 KiB
PHP
831 lines
46 KiB
PHP
|
<?php
|
||
|
/**
|
||
|
* jsmin.cls.php - modified PHP implementation of Matthias Mullie's JavaScript minifier
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* JavaScript Minifier Class
|
||
|
*
|
||
|
* @package Minify
|
||
|
* @author Matthias Mullie <minify@mullie.eu>
|
||
|
* @author Tijs Verkoyen <minify@verkoyen.eu>
|
||
|
* @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved
|
||
|
* @license MIT License
|
||
|
*/
|
||
|
|
||
|
namespace LiteSpeed\Lib ;
|
||
|
|
||
|
defined( 'WPINC' ) || exit ;
|
||
|
|
||
|
class JSMin
|
||
|
{
|
||
|
/**
|
||
|
* Var-matching regex based on http://stackoverflow.com/a/9337047/802993.
|
||
|
*
|
||
|
* Note that regular expressions using that bit must have the PCRE_UTF8
|
||
|
* pattern modifier (/u) set.
|
||
|
*
|
||
|
* @var string
|
||
|
*/
|
||
|
const REGEX_VARIABLE = '\b[$A-Z\_a-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\x{02c1}\x{02c6}-\x{02d1}\x{02e0}-\x{02e4}\x{02ec}\x{02ee}\x{0370}-\x{0374}\x{0376}\x{0377}\x{037a}-\x{037d}\x{0386}\x{0388}-\x{038a}\x{038c}\x{038e}-\x{03a1}\x{03a3}-\x{03f5}\x{03f7}-\x{0481}\x{048a}-\x{0527}\x{0531}-\x{0556}\x{0559}\x{0561}-\x{0587}\x{05d0}-\x{05ea}\x{05f0}-\x{05f2}\x{0620}-\x{064a}\x{066e}\x{066f}\x{0671}-\x{06d3}\x{06d5}\x{06e5}\x{06e6}\x{06ee}\x{06ef}\x{06fa}-\x{06fc}\x{06ff}\x{0710}\x{0712}-\x{072f}\x{074d}-\x{07a5}\x{07b1}\x{07ca}-\x{07ea}\x{07f4}\x{07f5}\x{07fa}\x{0800}-\x{0815}\x{081a}\x{0824}\x{0828}\x{0840}-\x{0858}\x{08a0}\x{08a2}-\x{08ac}\x{0904}-\x{0939}\x{093d}\x{0950}\x{0958}-\x{0961}\x{0971}-\x{0977}\x{0979}-\x{097f}\x{0985}-\x{098c}\x{098f}\x{0990}\x{0993}-\x{09a8}\x{09aa}-\x{09b0}\x{09b2}\x{09b6}-\x{09b9}\x{09bd}\x{09ce}\x{09dc}\x{09dd}\x{09df}-\x{09e1}\x{09f0}\x{09f1}\x{0a05}-\x{0a0a}\x{0a0f}\x{0a10}\x{0a13}-\x{0a28}\x{0a2a}-\x{0a30}\x{0a32}\x{0a33}\x{0a35}\x{0a36}\x{0a38}\x{0a39}\x{0a59}-\x{0a5c}\x{0a5e}\x{0a72}-\x{0a74}\x{0a85}-\x{0a8d}\x{0a8f}-\x{0a91}\x{0a93}-\x{0aa8}\x{0aaa}-\x{0ab0}\x{0ab2}\x{0ab3}\x{0ab5}-\x{0ab9}\x{0abd}\x{0ad0}\x{0ae0}\x{0ae1}\x{0b05}-\x{0b0c}\x{0b0f}\x{0b10}\x{0b13}-\x{0b28}\x{0b2a}-\x{0b30}\x{0b32}\x{0b33}\x{0b35}-\x{0b39}\x{0b3d}\x{0b5c}\x{0b5d}\x{0b5f}-\x{0b61}\x{0b71}\x{0b83}\x{0b85}-\x{0b8a}\x{0b8e}-\x{0b90}\x{0b92}-\x{0b95}\x{0b99}\x{0b9a}\x{0b9c}\x{0b9e}\x{0b9f}\x{0ba3}\x{0ba4}\x{0ba8}-\x{0baa}\x{0bae}-\x{0bb9}\x{0bd0}\x{0c05}-\x{0c0c}\x{0c0e}-\x{0c10}\x{0c12}-\x{0c28}\x{0c2a}-\x{0c33}\x{0c35}-\x{0c39}\x{0c3d}\x{0c58}\x{0c59}\x{0c60}\x{0c61}\x{0c85}-\x{0c8c}\x{0c8e}-\x{0c90}\x{0c92}-\x{0ca8}\x{0caa}-\x{0cb3}\x{0cb5}-\x{0cb9}\x{0cbd}\x{0cde}\x{0ce0}\x{0ce1}\x{0cf1}\x{0cf2}\x{0d05}-\x{0d0c}\x{0d0e}-\x{0d10}\x{0d12}-\x{0d3a}\x{0d3d}\x{0d4e}\x{0d60}\x{0d61}\x{0d7a}-\x{0d7f}\x{0d85}-\x{0d96}\x{0d9a}-\x{0db1}\x{0db3}-\x{0dbb}\x{0dbd}\x{0dc0}-\x{0dc6}\x{0e01}-\x{0e30}\x{0e32}\x{0e33}\x{0e40}-\x{0e46}\x{0e81}\x{0e82}\x{0e84}\x{0e87}\x{0e88}\x{0e8a}\x{0e8d}\x{0e94}-\x{0e97}\x{0e99}-\x{0e9f}\x{0ea1}-\x{0ea3}\x{0ea5}\x{0ea7}\x{0eaa}\x{0eab}\x{0ead}-\x{0eb0}\x{0eb2}\x{0eb3}\x{0ebd}\x{0ec0}-\x{0ec4}\x{0ec6}\x{0edc}-\x{0edf}\x{0f00}\x{0f40}-\x{0f47}\x{0f49}-\x{0f6c}\x{0f88}-\x{0f8c}\x{1000}-\x{102a}\x{103f}\x{1050}-\x{1055}\x{105a}-\x{105d}\x{1061}\x{1065}\x{1066}\x{106e}-\x{1070}\x{1075}-\x{1081}\x{108e}\x{10a0}-\x{10c5}\x{10c7}\x{10cd}\x{10d0}-\x{10fa}\x{10fc}-\x{1248}\x{124a}-\x{124d}\x{1250}-\x{1256}\x{1258}\x{125a}-\x{125d}\x{1260}-\x{1288}\x{128a}-\x{128d}\x{1290}-\x{12b0}\x{12b2}-\x{12b5}\x{12b8}-\x{12be}\x{12c0}\x{12c2}-\x{12c5}\x{12c8}-\x{12d6}\x{12d8}-\x{1310}\x{1312}-\x{1315}\x{1318}-\x{135a}\x{1380}-\x{138f}\x{13a0}-\x{13f4}\x{1401}-\x{166c}\x{166f}-\x{167f}\x{1681}-\x{169a}\x{16a0}-\x{16ea}\x{16ee}-\x{16f0}\x{1700}-\x{170c}\x{170e}-\x{1711}\x{1720}-\x{1731}\x{1740}-\x{1751}\x{1760}-\x{176c}\x{176e}-\x{1770}\x{1780}-\x{17b3}\x{17d7}\x{17dc}\x{1820}-\x{1877}\x{1880}-\x{18a8}\x{18aa}\x{18b0}-\x{18f5}\x{1900}-\x{191c}\x{1950}-\x{196d}\x{1970}-\x{1974}\x{1980}-\x{19ab}\x{19c1}-\x{19c7}\x{1a00}-\x{1a16}\x{1a20}-\x{1a54}\x{1aa7}\x{1b05}-\x{1b33}\x{1b45}-\x{1b4b}\x{1b83}-\x{1ba0}\x{1bae}\x{1baf}\x{1bba}-\x{1be5}\x{1c00}-\x{1c23}\x{1c4d}-\x{1c4f}\x{1c5a}-\x{1c7d}\x{1ce9}-\x{1cec}\x{1cee}-\x{1cf1}\x{1cf5}\x{1cf6}\x{1d00}-\x{1dbf}\x{1e00}-\x{1f15}\x{1f18}-\x{1f1d}\x{1f20}-\x{1f45}\x{1f48}-\x{1f4d}\x{1f50}-\x{1f57}\x{1f59}\x{1f5b}\x{1f5d}\x{1f5f}-\x{1f7d}\x{1f80}-\x{1fb4}\x{1fb6}-\x{1fbc}\x{1fbe}\x{1fc2}-\x{1fc4}\x{1fc6}-\x{1fcc}\x{1fd0}-\x{1fd3}\x{1fd6}-\x{1fdb}\x{1fe0}-\x{1fec}\x{1ff2}-\x{1ff4}\x{1ff6}-\x{1ffc}\x{2071}\x{207f}\x{2090}-\x{209c}\x{2102}\x{2107}\x{210a}-\x{2113}\x{2115}\x{2119}-\x{211d}\x{2124}\x{2126}\x{2128}\x{212a}-\x{212d}\x{212f}-\x{2139}\x{213c}-\x{213f}\x{2145}-\x{2149}\x{214e}\x{2160}-\x{2188}\x{2c00}-\x{2c2e}\x{2c30}-\x{2c5e}\x{2c60}-\x{2ce4}\x{2ceb}-\x{2cee}\x{2cf2}\x{2cf3}\x{2d00}-\x{2d25}\x{2d27}\x{2d2d}\x{2d30}-\x{2d67}\x{2d6f}\x{2d80}-\x{2d96}\x{2da0}-\x{2da6}\x{2da8}-\x{2dae}\x{2db0}-\x{2db6}\x{2db8}-\x{2dbe}\x{2dc0}-\x{2dc6}\x{2dc8}-\x{2dce}\x{2dd0}-\x{2dd6}
|
||
|
|
||
|
/**
|
||
|
* Array of patterns to match.
|
||
|
*
|
||
|
* @var string[]
|
||
|
*/
|
||
|
protected $patterns = array();
|
||
|
|
||
|
/**
|
||
|
* This array will hold content of strings and regular expressions that have
|
||
|
* been extracted from the JS source code, so we can reliably match "code",
|
||
|
* without having to worry about potential "code-like" characters inside.
|
||
|
*
|
||
|
* @var string[]
|
||
|
*/
|
||
|
public $extracted = array();
|
||
|
|
||
|
/**
|
||
|
* Full list of JavaScript reserved words.
|
||
|
* Will be loaded from /data/js/keywords_reserved.txt.
|
||
|
*
|
||
|
* @see https://mathiasbynens.be/notes/reserved-keywords
|
||
|
*
|
||
|
* @var string[]
|
||
|
*/
|
||
|
protected $keywordsReserved = array();
|
||
|
|
||
|
/**
|
||
|
* List of JavaScript reserved words that accept a <variable, value, ...>
|
||
|
* after them. Some end of lines are not the end of a statement, like with
|
||
|
* these keywords.
|
||
|
*
|
||
|
* E.g.: we shouldn't insert a ; after this else
|
||
|
* else
|
||
|
* console.log('this is quite fine')
|
||
|
*
|
||
|
* Will be loaded from /data/js/keywords_before.txt
|
||
|
*
|
||
|
* @var string[]
|
||
|
*/
|
||
|
protected $keywordsBefore = array();
|
||
|
|
||
|
/**
|
||
|
* List of JavaScript reserved words that accept a <variable, value, ...>
|
||
|
* before them. Some end of lines are not the end of a statement, like when
|
||
|
* continued by one of these keywords on the newline.
|
||
|
*
|
||
|
* E.g.: we shouldn't insert a ; before this instanceof
|
||
|
* variable
|
||
|
* instanceof String
|
||
|
*
|
||
|
* Will be loaded from /data/js/keywords_after.txt
|
||
|
*
|
||
|
* @var string[]
|
||
|
*/
|
||
|
protected $keywordsAfter = array();
|
||
|
|
||
|
/**
|
||
|
* List of all JavaScript operators.
|
||
|
*
|
||
|
* Will be loaded from /data/js/operators.txt
|
||
|
*
|
||
|
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Expressions_and_Operators
|
||
|
*
|
||
|
* @var string[]
|
||
|
*/
|
||
|
protected $operators = array();
|
||
|
|
||
|
/**
|
||
|
* List of JavaScript operators that accept a <variable, value, ...> after
|
||
|
* them. Some end of lines are not the end of a statement, like with these
|
||
|
* operators.
|
||
|
*
|
||
|
* Note: Most operators are fine, we've only removed ++ and --.
|
||
|
* ++ & -- have to be joined with the value they're in-/decrementing.
|
||
|
*
|
||
|
* Will be loaded from /data/js/operators_before.txt
|
||
|
*
|
||
|
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Expressions_and_Operators
|
||
|
*
|
||
|
* @var string[]
|
||
|
*/
|
||
|
protected $operatorsBefore = array();
|
||
|
|
||
|
/**
|
||
|
* List of JavaScript operators that accept a <variable, value, ...> before
|
||
|
* them. Some end of lines are not the end of a statement, like when
|
||
|
* continued by one of these operators on the newline.
|
||
|
*
|
||
|
* Note: Most operators are fine, we've only removed ), ], ++, --, ! and ~.
|
||
|
* There can't be a newline separating ! or ~ and whatever it is negating.
|
||
|
* ++ & -- have to be joined with the value they're in-/decrementing.
|
||
|
* ) & ] are "special" in that they have lots or usecases. () for example
|
||
|
* is used for function calls, for grouping, in if () and for (), ...
|
||
|
*
|
||
|
* Will be loaded from /data/js/operators_after.txt
|
||
|
*
|
||
|
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Expressions_and_Operators
|
||
|
*
|
||
|
* @var string[]
|
||
|
*/
|
||
|
protected $operatorsAfter = array();
|
||
|
|
||
|
/**
|
||
|
* {@inheritdoc}
|
||
|
*/
|
||
|
public function __construct()
|
||
|
{
|
||
|
$dataDir = __DIR__.'/jsmin_data/';
|
||
|
$options = FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES;
|
||
|
$this->keywordsReserved = file($dataDir.'keywords_reserved.txt', $options);
|
||
|
$this->keywordsBefore = file($dataDir.'keywords_before.txt', $options);
|
||
|
$this->keywordsAfter = file($dataDir.'keywords_after.txt', $options);
|
||
|
$this->operators = file($dataDir.'operators.txt', $options);
|
||
|
$this->operatorsBefore = file($dataDir.'operators_before.txt', $options);
|
||
|
$this->operatorsAfter = file($dataDir.'operators_after.txt', $options);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Minify the data.
|
||
|
* Perform JS optimizations.
|
||
|
*
|
||
|
* @param string[optional] $path Path to write the data to
|
||
|
*
|
||
|
* @return string The minified data
|
||
|
*/
|
||
|
public static function minify($js)
|
||
|
{
|
||
|
$jsmin = new JSMin();
|
||
|
return $jsmin->min($js);
|
||
|
}
|
||
|
|
||
|
public function min($js)
|
||
|
{
|
||
|
|
||
|
$output = '';
|
||
|
/*
|
||
|
* Let's first take out strings, comments and regular expressions.
|
||
|
* All of these can contain JS code-like characters, and we should make
|
||
|
* sure any further magic ignores anything inside of these.
|
||
|
*
|
||
|
* Consider this example, where we should not strip any whitespace:
|
||
|
* var str = "a test";
|
||
|
*
|
||
|
* Comments will be removed altogether, strings and regular expressions
|
||
|
* will be replaced by placeholder text, which we'll restore later.
|
||
|
*/
|
||
|
$this->extractStrings('\'"`');
|
||
|
$this->stripComments();
|
||
|
$this->extractRegex();
|
||
|
|
||
|
// take out strings, comments & regex (for which we've registered
|
||
|
// the regexes just a few lines earlier)
|
||
|
$js = $this->replace($js);
|
||
|
|
||
|
$js = $this->propertyNotation($js);
|
||
|
$js = $this->shortenBools($js);
|
||
|
$js = $this->stripWhitespace($js);
|
||
|
|
||
|
// combine js: separating the scripts by a ;
|
||
|
$output .= $js.";";
|
||
|
|
||
|
// clean up leftover `;`s from the combination of multiple scripts
|
||
|
$output = ltrim($output, ';');
|
||
|
$output = (string) substr($output, 0, -1);
|
||
|
|
||
|
/*
|
||
|
* Earlier, we extracted strings & regular expressions and replaced them
|
||
|
* with placeholder text. This will restore them.
|
||
|
*/
|
||
|
$output = $this->restoreExtractedData($output);
|
||
|
|
||
|
return $output;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Strings are a pattern we need to match, in order to ignore potential
|
||
|
* code-like content inside them, but we just want all of the string
|
||
|
* content to remain untouched.
|
||
|
*
|
||
|
* This method will replace all string content with simple STRING#
|
||
|
* placeholder text, so we've rid all strings from characters that may be
|
||
|
* misinterpreted. Original string content will be saved in $this->extracted
|
||
|
* and after doing all other minifying, we can restore the original content
|
||
|
* via restoreStrings().
|
||
|
*
|
||
|
* @param string[optional] $chars
|
||
|
* @param string[optional] $placeholderPrefix
|
||
|
*/
|
||
|
protected function extractStrings($chars = '\'"', $placeholderPrefix = '')
|
||
|
{
|
||
|
// PHP only supports $this inside anonymous functions since 5.4
|
||
|
$minifier = $this;
|
||
|
$callback = function ($match) use ($minifier, $placeholderPrefix) {
|
||
|
// check the second index here, because the first always contains a quote
|
||
|
if ($match[2] === '') {
|
||
|
/*
|
||
|
* Empty strings need no placeholder; they can't be confused for
|
||
|
* anything else anyway.
|
||
|
* But we still needed to match them, for the extraction routine
|
||
|
* to skip over this particular string.
|
||
|
*/
|
||
|
return $match[0];
|
||
|
}
|
||
|
|
||
|
$count = count($minifier->extracted);
|
||
|
$placeholder = $match[1].$placeholderPrefix.$count.$match[1];
|
||
|
$minifier->extracted[$placeholder] = $match[1].$match[2].$match[1];
|
||
|
|
||
|
return $placeholder;
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* The \\ messiness explained:
|
||
|
* * Don't count ' or " as end-of-string if it's escaped (has backslash
|
||
|
* in front of it)
|
||
|
* * Unless... that backslash itself is escaped (another leading slash),
|
||
|
* in which case it's no longer escaping the ' or "
|
||
|
* * So there can be either no backslash, or an even number
|
||
|
* * multiply all of that times 4, to account for the escaping that has
|
||
|
* to be done to pass the backslash into the PHP string without it being
|
||
|
* considered as escape-char (times 2) and to get it in the regex,
|
||
|
* escaped (times 2)
|
||
|
*/
|
||
|
$this->registerPattern('/(['.$chars.'])(.*?(?<!\\\\)(\\\\\\\\)*+)\\1/s', $callback);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Strip comments from source code.
|
||
|
*/
|
||
|
protected function stripComments()
|
||
|
{
|
||
|
// PHP only supports $this inside anonymous functions since 5.4
|
||
|
$minifier = $this;
|
||
|
$callback = function ($match) use ($minifier) {
|
||
|
if (
|
||
|
substr($match[1], 0, 1) === '!' ||
|
||
|
strpos($match[1], '@license') !== false ||
|
||
|
strpos($match[1], '@preserve') !== false
|
||
|
) {
|
||
|
// preserve multi-line comments that start with /*!
|
||
|
// or contain @license or @preserve annotations
|
||
|
$count = count($minifier->extracted);
|
||
|
$placeholder = '/*'.$count.'*/';
|
||
|
$minifier->extracted[$placeholder] = $match[0];
|
||
|
|
||
|
return $placeholder;
|
||
|
}
|
||
|
|
||
|
return '';
|
||
|
};
|
||
|
|
||
|
// multi-line comments
|
||
|
$this->registerPattern('/\n?\/\*(.*?)\*\/\n?/s', $callback);
|
||
|
|
||
|
// single-line comments
|
||
|
$this->registerPattern('/\/\/.*$/m', '');
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* JS can have /-delimited regular expressions, like: /ab+c/.match(string).
|
||
|
*
|
||
|
* The content inside the regex can contain characters that may be confused
|
||
|
* for JS code: e.g. it could contain whitespace it needs to match & we
|
||
|
* don't want to strip whitespace in there.
|
||
|
*
|
||
|
* The regex can be pretty simple: we don't have to care about comments,
|
||
|
* (which also use slashes) because stripComments() will have stripped those
|
||
|
* already.
|
||
|
*
|
||
|
* This method will replace all string content with simple REGEX#
|
||
|
* placeholder text, so we've rid all regular expressions from characters
|
||
|
* that may be misinterpreted. Original regex content will be saved in
|
||
|
* $this->extracted and after doing all other minifying, we can restore the
|
||
|
* original content via restoreRegex()
|
||
|
*/
|
||
|
protected function extractRegex()
|
||
|
{
|
||
|
// PHP only supports $this inside anonymous functions since 5.4
|
||
|
$minifier = $this;
|
||
|
$callback = function ($match) use ($minifier) {
|
||
|
$count = count($minifier->extracted);
|
||
|
$placeholder = '"'.$count.'"';
|
||
|
$minifier->extracted[$placeholder] = $match[0];
|
||
|
|
||
|
return $placeholder;
|
||
|
};
|
||
|
|
||
|
// match all chars except `/` and `\`
|
||
|
// `\` is allowed though, along with whatever char follows (which is the
|
||
|
// one being escaped)
|
||
|
// this should allow all chars, except for an unescaped `/` (= the one
|
||
|
// closing the regex)
|
||
|
// then also ignore bare `/` inside `[]`, where they don't need to be
|
||
|
// escaped: anything inside `[]` can be ignored safely
|
||
|
$pattern = '\\/(?!\*)(?:[^\\[\\/\\\\\n\r]++|(?:\\\\.)++|(?:\\[(?:[^\\]\\\\\n\r]++|(?:\\\\.)++)++\\])++)++\\/[gimuy]*';
|
||
|
|
||
|
// a regular expression can only be followed by a few operators or some
|
||
|
// of the RegExp methods (a `\` followed by a variable or value is
|
||
|
// likely part of a division, not a regex)
|
||
|
$keywords = array('do', 'in', 'new', 'else', 'throw', 'yield', 'delete', 'return', 'typeof');
|
||
|
$before = '(^|[=:,;\+\-\*\/\}\(\{\[&\|!]|'.implode('|', $keywords).')\s*';
|
||
|
$propertiesAndMethods = array(
|
||
|
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Properties_2
|
||
|
'constructor',
|
||
|
'flags',
|
||
|
'global',
|
||
|
'ignoreCase',
|
||
|
'multiline',
|
||
|
'source',
|
||
|
'sticky',
|
||
|
'unicode',
|
||
|
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Methods_2
|
||
|
'compile(',
|
||
|
'exec(',
|
||
|
'test(',
|
||
|
'toSource(',
|
||
|
'toString(',
|
||
|
);
|
||
|
$delimiters = array_fill(0, count($propertiesAndMethods), '/');
|
||
|
$propertiesAndMethods = array_map('preg_quote', $propertiesAndMethods, $delimiters);
|
||
|
$after = '(?=\s*([\.,;\)\}&\|+]|\/\/|$|\.('.implode('|', $propertiesAndMethods).')))';
|
||
|
$this->registerPattern('/'.$before.'\K'.$pattern.$after.'/', $callback);
|
||
|
|
||
|
// regular expressions following a `)` are rather annoying to detect...
|
||
|
// quite often, `/` after `)` is a division operator & if it happens to
|
||
|
// be followed by another one (or a comment), it is likely to be
|
||
|
// confused for a regular expression
|
||
|
// however, it's perfectly possible for a regex to follow a `)`: after
|
||
|
// a single-line `if()`, `while()`, ... statement, for example
|
||
|
// since, when they occur like that, they're always the start of a
|
||
|
// statement, there's only a limited amount of ways they can be useful:
|
||
|
// by calling the regex methods directly
|
||
|
// if a regex following `)` is not followed by `.<property or method>`,
|
||
|
// it's quite likely not a regex
|
||
|
$before = '\)\s*';
|
||
|
$after = '(?=\s*\.('.implode('|', $propertiesAndMethods).'))';
|
||
|
$this->registerPattern('/'.$before.'\K'.$pattern.$after.'/', $callback);
|
||
|
|
||
|
// 1 more edge case: a regex can be followed by a lot more operators or
|
||
|
// keywords if there's a newline (ASI) in between, where the operator
|
||
|
// actually starts a new statement
|
||
|
// (https://github.com/matthiasmullie/minify/issues/56)
|
||
|
$operators = $this->getOperatorsForRegex($this->operatorsBefore, '/');
|
||
|
$operators += $this->getOperatorsForRegex($this->keywordsReserved, '/');
|
||
|
$after = '(?=\s*\n\s*('.implode('|', $operators).'))';
|
||
|
$this->registerPattern('/'.$pattern.$after.'/', $callback);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* We can't "just" run some regular expressions against JavaScript: it's a
|
||
|
* complex language. E.g. having an occurrence of // xyz would be a comment,
|
||
|
* unless it's used within a string. Of you could have something that looks
|
||
|
* like a 'string', but inside a comment.
|
||
|
* The only way to accurately replace these pieces is to traverse the JS one
|
||
|
* character at a time and try to find whatever starts first.
|
||
|
*
|
||
|
* @param string $content The content to replace patterns in
|
||
|
*
|
||
|
* @return string The (manipulated) content
|
||
|
*/
|
||
|
protected function replace($content)
|
||
|
{
|
||
|
$contentLength = strlen($content);
|
||
|
$output = '';
|
||
|
$processedOffset = 0;
|
||
|
$positions = array_fill(0, count($this->patterns), -1);
|
||
|
$matches = array();
|
||
|
|
||
|
while ($processedOffset < $contentLength) {
|
||
|
// find first match for all patterns
|
||
|
foreach ($this->patterns as $i => $pattern) {
|
||
|
list($pattern, $replacement) = $pattern;
|
||
|
|
||
|
// we can safely ignore patterns for positions we've unset earlier,
|
||
|
// because we know these won't show up anymore
|
||
|
if (array_key_exists($i, $positions) == false) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// no need to re-run matches that are still in the part of the
|
||
|
// content that hasn't been processed
|
||
|
if ($positions[$i] >= $processedOffset) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$match = null;
|
||
|
if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE, $processedOffset)) {
|
||
|
$matches[$i] = $match;
|
||
|
|
||
|
// we'll store the match position as well; that way, we
|
||
|
// don't have to redo all preg_matches after changing only
|
||
|
// the first (we'll still know where those others are)
|
||
|
$positions[$i] = $match[0][1];
|
||
|
} else {
|
||
|
// if the pattern couldn't be matched, there's no point in
|
||
|
// executing it again in later runs on this same content;
|
||
|
// ignore this one until we reach end of content
|
||
|
unset($matches[$i], $positions[$i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// no more matches to find: everything's been processed, break out
|
||
|
if (!$matches) {
|
||
|
// output the remaining content
|
||
|
$output .= substr($content, $processedOffset);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// see which of the patterns actually found the first thing (we'll
|
||
|
// only want to execute that one, since we're unsure if what the
|
||
|
// other found was not inside what the first found)
|
||
|
$matchOffset = min($positions);
|
||
|
$firstPattern = array_search($matchOffset, $positions);
|
||
|
$match = $matches[$firstPattern];
|
||
|
|
||
|
// execute the pattern that matches earliest in the content string
|
||
|
list(, $replacement) = $this->patterns[$firstPattern];
|
||
|
|
||
|
// add the part of the input between $processedOffset and the first match;
|
||
|
// that content wasn't matched by anything
|
||
|
$output .= substr($content, $processedOffset, $matchOffset - $processedOffset);
|
||
|
// add the replacement for the match
|
||
|
$output .= $this->executeReplacement($replacement, $match);
|
||
|
// advance $processedOffset past the match
|
||
|
$processedOffset = $matchOffset + strlen($match[0][0]);
|
||
|
}
|
||
|
|
||
|
return $output;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Replaces all occurrences of array['key'] by array.key.
|
||
|
*
|
||
|
* @param string $content
|
||
|
*
|
||
|
* @return string
|
||
|
*/
|
||
|
protected function propertyNotation($content)
|
||
|
{
|
||
|
// PHP only supports $this inside anonymous functions since 5.4
|
||
|
$minifier = $this;
|
||
|
$keywords = $this->keywordsReserved;
|
||
|
$callback = function ($match) use ($minifier, $keywords) {
|
||
|
$property = trim($minifier->extracted[$match[1]], '\'"');
|
||
|
|
||
|
/*
|
||
|
* Check if the property is a reserved keyword. In this context (as
|
||
|
* property of an object literal/array) it shouldn't matter, but IE8
|
||
|
* freaks out with "Expected identifier".
|
||
|
*/
|
||
|
if (in_array($property, $keywords)) {
|
||
|
return $match[0];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* See if the property is in a variable-like format (e.g.
|
||
|
* array['key-here'] can't be replaced by array.key-here since '-'
|
||
|
* is not a valid character there.
|
||
|
*/
|
||
|
if (!preg_match('/^'.$minifier::REGEX_VARIABLE.'$/u', $property)) {
|
||
|
return $match[0];
|
||
|
}
|
||
|
|
||
|
return '.'.$property;
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* Figure out if previous character is a variable name (of the array
|
||
|
* we want to use property notation on) - this is to make sure
|
||
|
* standalone ['value'] arrays aren't confused for keys-of-an-array.
|
||
|
* We can (and only have to) check the last character, because PHP's
|
||
|
* regex implementation doesn't allow unfixed-length look-behind
|
||
|
* assertions.
|
||
|
*/
|
||
|
preg_match('/(\[[^\]]+\])[^\]]*$/', static::REGEX_VARIABLE, $previousChar);
|
||
|
$previousChar = $previousChar[1];
|
||
|
|
||
|
/*
|
||
|
* Make sure word preceding the ['value'] is not a keyword, e.g.
|
||
|
* return['x']. Because -again- PHP's regex implementation doesn't allow
|
||
|
* unfixed-length look-behind assertions, I'm just going to do a lot of
|
||
|
* separate look-behind assertions, one for each keyword.
|
||
|
*/
|
||
|
$keywords = $this->getKeywordsForRegex($keywords);
|
||
|
$keywords = '(?<!'.implode(')(?<!', $keywords).')';
|
||
|
|
||
|
return preg_replace_callback('/(?<='.$previousChar.'|\])'.$keywords.'\[\s*(([\'"])[0-9]+\\2)\s*\]/u', $callback, $content);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Replaces true & false by !0 and !1.
|
||
|
*
|
||
|
* @param string $content
|
||
|
*
|
||
|
* @return string
|
||
|
*/
|
||
|
protected function shortenBools($content)
|
||
|
{
|
||
|
/*
|
||
|
* 'true' or 'false' could be used as property names (which may be
|
||
|
* followed by whitespace) - we must not replace those!
|
||
|
* Since PHP doesn't allow variable-length (to account for the
|
||
|
* whitespace) lookbehind assertions, I need to capture the leading
|
||
|
* character and check if it's a `.`
|
||
|
*/
|
||
|
$callback = function ($match) {
|
||
|
if (trim($match[1]) === '.') {
|
||
|
return $match[0];
|
||
|
}
|
||
|
|
||
|
return $match[1].($match[2] === 'true' ? '!0' : '!1');
|
||
|
};
|
||
|
$content = preg_replace_callback('/(^|.\s*)\b(true|false)\b(?!:)/', $callback, $content);
|
||
|
|
||
|
// for(;;) is exactly the same as while(true), but shorter :)
|
||
|
$content = preg_replace('/\bwhile\(!0\){/', 'for(;;){', $content);
|
||
|
|
||
|
// now make sure we didn't turn any do ... while(true) into do ... for(;;)
|
||
|
preg_match_all('/\bdo\b/', $content, $dos, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
|
||
|
|
||
|
// go backward to make sure positional offsets aren't altered when $content changes
|
||
|
$dos = array_reverse($dos);
|
||
|
foreach ($dos as $do) {
|
||
|
$offsetDo = $do[0][1];
|
||
|
|
||
|
// find all `while` (now `for`) following `do`: one of those must be
|
||
|
// associated with the `do` and be turned back into `while`
|
||
|
preg_match_all('/\bfor\(;;\)/', $content, $whiles, PREG_OFFSET_CAPTURE | PREG_SET_ORDER, $offsetDo);
|
||
|
foreach ($whiles as $while) {
|
||
|
$offsetWhile = $while[0][1];
|
||
|
|
||
|
$open = substr_count($content, '{', $offsetDo, $offsetWhile - $offsetDo);
|
||
|
$close = substr_count($content, '}', $offsetDo, $offsetWhile - $offsetDo);
|
||
|
if ($open === $close) {
|
||
|
// only restore `while` if amount of `{` and `}` are the same;
|
||
|
// otherwise, that `for` isn't associated with this `do`
|
||
|
$content = substr_replace($content, 'while(!0)', $offsetWhile, strlen('for(;;)'));
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $content;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Strip whitespace.
|
||
|
*
|
||
|
* We won't strip *all* whitespace, but as much as possible. The thing that
|
||
|
* we'll preserve are newlines we're unsure about.
|
||
|
* JavaScript doesn't require statements to be terminated with a semicolon.
|
||
|
* It will automatically fix missing semicolons with ASI (automatic semi-
|
||
|
* colon insertion) at the end of line causing errors (without semicolon.)
|
||
|
*
|
||
|
* Because it's sometimes hard to tell if a newline is part of a statement
|
||
|
* that should be terminated or not, we'll just leave some of them alone.
|
||
|
*
|
||
|
* @param string $content The content to strip the whitespace for
|
||
|
*
|
||
|
* @return string
|
||
|
*/
|
||
|
protected function stripWhitespace($content)
|
||
|
{
|
||
|
// uniform line endings, make them all line feed
|
||
|
$content = str_replace(array("\r\n", "\r"), "\n", $content);
|
||
|
|
||
|
// collapse all non-line feed whitespace into a single space
|
||
|
$content = preg_replace('/[^\S\n]+/', ' ', $content);
|
||
|
|
||
|
// strip leading & trailing whitespace
|
||
|
$content = str_replace(array(" \n", "\n "), "\n", $content);
|
||
|
|
||
|
// collapse consecutive line feeds into just 1
|
||
|
$content = preg_replace('/\n+/', "\n", $content);
|
||
|
|
||
|
$operatorsBefore = $this->getOperatorsForRegex($this->operatorsBefore, '/');
|
||
|
$operatorsAfter = $this->getOperatorsForRegex($this->operatorsAfter, '/');
|
||
|
$operators = $this->getOperatorsForRegex($this->operators, '/');
|
||
|
$keywordsBefore = $this->getKeywordsForRegex($this->keywordsBefore, '/');
|
||
|
$keywordsAfter = $this->getKeywordsForRegex($this->keywordsAfter, '/');
|
||
|
|
||
|
// strip whitespace that ends in (or next line begin with) an operator
|
||
|
// that allows statements to be broken up over multiple lines
|
||
|
unset($operatorsBefore['+'], $operatorsBefore['-'], $operatorsAfter['+'], $operatorsAfter['-']);
|
||
|
$content = preg_replace(
|
||
|
array(
|
||
|
'/('.implode('|', $operatorsBefore).')\s+/',
|
||
|
'/\s+('.implode('|', $operatorsAfter).')/',
|
||
|
),
|
||
|
'\\1',
|
||
|
$content
|
||
|
);
|
||
|
|
||
|
// make sure + and - can't be mistaken for, or joined into ++ and --
|
||
|
$content = preg_replace(
|
||
|
array(
|
||
|
'/(?<![\+\-])\s*([\+\-])(?![\+\-])/',
|
||
|
'/(?<![\+\-])([\+\-])\s*(?![\+\-])/',
|
||
|
),
|
||
|
'\\1',
|
||
|
$content
|
||
|
);
|
||
|
|
||
|
// collapse whitespace around reserved words into single space
|
||
|
$content = preg_replace('/(^|[;\}\s])\K('.implode('|', $keywordsBefore).')\s+/', '\\2 ', $content);
|
||
|
$content = preg_replace('/\s+('.implode('|', $keywordsAfter).')(?=([;\{\s]|$))/', ' \\1', $content);
|
||
|
|
||
|
/*
|
||
|
* We didn't strip whitespace after a couple of operators because they
|
||
|
* could be used in different contexts and we can't be sure it's ok to
|
||
|
* strip the newlines. However, we can safely strip any non-line feed
|
||
|
* whitespace that follows them.
|
||
|
*/
|
||
|
$operatorsDiffBefore = array_diff($operators, $operatorsBefore);
|
||
|
$operatorsDiffAfter = array_diff($operators, $operatorsAfter);
|
||
|
$content = preg_replace('/('.implode('|', $operatorsDiffBefore).')[^\S\n]+/', '\\1', $content);
|
||
|
$content = preg_replace('/[^\S\n]+('.implode('|', $operatorsDiffAfter).')/', '\\1', $content);
|
||
|
|
||
|
/*
|
||
|
* Whitespace after `return` can be omitted in a few occasions
|
||
|
* (such as when followed by a string or regex)
|
||
|
* Same for whitespace in between `)` and `{`, or between `{` and some
|
||
|
* keywords.
|
||
|
*/
|
||
|
$content = preg_replace('/\breturn\s+(["\'\/\+\-])/', 'return$1', $content);
|
||
|
$content = preg_replace('/\)\s+\{/', '){', $content);
|
||
|
$content = preg_replace('/}\n(else|catch|finally)\b/', '}$1', $content);
|
||
|
|
||
|
/*
|
||
|
* Get rid of double semicolons, except where they can be used like:
|
||
|
* "for(v=1,_=b;;)", "for(v=1;;v++)" or "for(;;ja||(ja=true))".
|
||
|
* I'll safeguard these double semicolons inside for-loops by
|
||
|
* temporarily replacing them with an invalid condition: they won't have
|
||
|
* a double semicolon and will be easy to spot to restore afterwards.
|
||
|
*/
|
||
|
$content = preg_replace('/\bfor\(([^;]*);;([^;]*)\)/', 'for(\\1;-;\\2)', $content);
|
||
|
$content = preg_replace('/;+/', ';', $content);
|
||
|
$content = preg_replace('/\bfor\(([^;]*);-;([^;]*)\)/', 'for(\\1;;\\2)', $content);
|
||
|
|
||
|
/*
|
||
|
* Next, we'll be removing all semicolons where ASI kicks in.
|
||
|
* for-loops however, can have an empty body (ending in only a
|
||
|
* semicolon), like: `for(i=1;i<3;i++);`, of `for(i in list);`
|
||
|
* Here, nothing happens during the loop; it's just used to keep
|
||
|
* increasing `i`. With that ; omitted, the next line would be expected
|
||
|
* to be the for-loop's body... Same goes for while loops.
|
||
|
* I'm going to double that semicolon (if any) so after the next line,
|
||
|
* which strips semicolons here & there, we're still left with this one.
|
||
|
*/
|
||
|
$content = preg_replace('/(for\([^;\{]*;[^;\{]*;[^;\{]*\));(\}|$)/s', '\\1;;\\2', $content);
|
||
|
$content = preg_replace('/(for\([^;\{]+\s+in\s+[^;\{]+\));(\}|$)/s', '\\1;;\\2', $content);
|
||
|
/*
|
||
|
* Below will also keep `;` after a `do{}while();` along with `while();`
|
||
|
* While these could be stripped after do-while, detecting this
|
||
|
* distinction is cumbersome, so I'll play it safe and make sure `;`
|
||
|
* after any kind of `while` is kept.
|
||
|
*/
|
||
|
$content = preg_replace('/(while\([^;\{]+\));(\}|$)/s', '\\1;;\\2', $content);
|
||
|
|
||
|
/*
|
||
|
* We also can't strip empty else-statements. Even though they're
|
||
|
* useless and probably shouldn't be in the code in the first place, we
|
||
|
* shouldn't be stripping the `;` that follows it as it breaks the code.
|
||
|
* We can just remove those useless else-statements completely.
|
||
|
*
|
||
|
* @see https://github.com/matthiasmullie/minify/issues/91
|
||
|
*/
|
||
|
$content = preg_replace('/else;/s', '', $content);
|
||
|
|
||
|
/*
|
||
|
* We also don't really want to terminate statements followed by closing
|
||
|
* curly braces (which we've ignored completely up until now) or end-of-
|
||
|
* script: ASI will kick in here & we're all about minifying.
|
||
|
* Semicolons at beginning of the file don't make any sense either.
|
||
|
*/
|
||
|
$content = preg_replace('/;(\}|$)/s', '\\1', $content);
|
||
|
$content = ltrim($content, ';');
|
||
|
|
||
|
// get rid of remaining whitespace af beginning/end
|
||
|
return trim($content);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* This method will restore all extracted data (strings, regexes) that were
|
||
|
* replaced with placeholder text in extract*(). The original content was
|
||
|
* saved in $this->extracted.
|
||
|
*
|
||
|
* @param string $content
|
||
|
*
|
||
|
* @return string
|
||
|
*/
|
||
|
protected function restoreExtractedData($content)
|
||
|
{
|
||
|
if (!$this->extracted) {
|
||
|
// nothing was extracted, nothing to restore
|
||
|
return $content;
|
||
|
}
|
||
|
|
||
|
$content = strtr($content, $this->extracted);
|
||
|
|
||
|
$this->extracted = array();
|
||
|
|
||
|
return $content;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* We'll strip whitespace around certain operators with regular expressions.
|
||
|
* This will prepare the given array by escaping all characters.
|
||
|
*
|
||
|
* @param string[] $operators
|
||
|
* @param string $delimiter
|
||
|
*
|
||
|
* @return string[]
|
||
|
*/
|
||
|
protected function getOperatorsForRegex(array $operators, $delimiter = '/')
|
||
|
{
|
||
|
// escape operators for use in regex
|
||
|
$delimiters = array_fill(0, count($operators), $delimiter);
|
||
|
$escaped = array_map('preg_quote', $operators, $delimiters);
|
||
|
|
||
|
$operators = array_combine($operators, $escaped);
|
||
|
|
||
|
// ignore + & - for now, they'll get special treatment
|
||
|
unset($operators['+'], $operators['-']);
|
||
|
|
||
|
// dot can not just immediately follow a number; it can be confused for
|
||
|
// decimal point, or calling a method on it, e.g. 42 .toString()
|
||
|
$operators['.'] = '(?<![0-9]\s)\.';
|
||
|
|
||
|
// don't confuse = with other assignment shortcuts (e.g. +=)
|
||
|
$chars = preg_quote('+-*\=<>%&|', $delimiter);
|
||
|
$operators['='] = '(?<!['.$chars.'])\=';
|
||
|
|
||
|
return $operators;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* We'll strip whitespace around certain keywords with regular expressions.
|
||
|
* This will prepare the given array by escaping all characters.
|
||
|
*
|
||
|
* @param string[] $keywords
|
||
|
* @param string $delimiter
|
||
|
*
|
||
|
* @return string[]
|
||
|
*/
|
||
|
protected function getKeywordsForRegex(array $keywords, $delimiter = '/')
|
||
|
{
|
||
|
// escape keywords for use in regex
|
||
|
$delimiter = array_fill(0, count($keywords), $delimiter);
|
||
|
$escaped = array_map('preg_quote', $keywords, $delimiter);
|
||
|
|
||
|
// add word boundaries
|
||
|
array_walk($keywords, function ($value) {
|
||
|
return '\b'.$value.'\b';
|
||
|
});
|
||
|
|
||
|
$keywords = array_combine($keywords, $escaped);
|
||
|
|
||
|
return $keywords;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* If $replacement is a callback, execute it, passing in the match data.
|
||
|
* If it's a string, just pass it through.
|
||
|
*
|
||
|
* @param string|callable $replacement Replacement value
|
||
|
* @param array $match Match data, in PREG_OFFSET_CAPTURE form
|
||
|
*
|
||
|
* @return string
|
||
|
*/
|
||
|
protected function executeReplacement($replacement, $match)
|
||
|
{
|
||
|
if (!is_callable($replacement)) {
|
||
|
return $replacement;
|
||
|
}
|
||
|
// convert $match from the PREG_OFFSET_CAPTURE form to the form the callback expects
|
||
|
foreach ($match as &$matchItem) {
|
||
|
$matchItem = $matchItem[0];
|
||
|
}
|
||
|
return $replacement($match);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Register a pattern to execute against the source content.
|
||
|
*
|
||
|
* If $replacement is a string, it must be plain text. Placeholders like $1 or \2 don't work.
|
||
|
* If you need that functionality, use a callback instead.
|
||
|
*
|
||
|
* @param string $pattern PCRE pattern
|
||
|
* @param string|callable $replacement Replacement value for matched pattern
|
||
|
*/
|
||
|
protected function registerPattern($pattern, $replacement = '')
|
||
|
{
|
||
|
// study the pattern, we'll execute it more than once
|
||
|
$pattern .= 'S';
|
||
|
|
||
|
$this->patterns[] = array($pattern, $replacement);
|
||
|
}
|
||
|
}
|