2 # Based on "Pack.js" by Dean Edwards <http://dean.edwards.name/>
\r
3 # Ported to Perl by Rob Seiler, ELR Software Pty Ltd <http://www.elr.com.au>
\r
4 # Copyright 2005. License <http://creativecommons.org/licenses/LGPL/2.1/>
\r
12 # Package wide variable declarations
\r
13 use vars qw/$VERSION $PM_VERSION
\r
14 $_X_encodePrivate $_JSunpack $_JSdecode %baseLookup
\r
15 $_X_encode10 $_X_encode36 $_X_encode62 $_X_encode95
\r
16 $_JSencode10 $_JSencode36 $_JSencode62 $_JSencode95
\r
18 $_X_script $_X_encoding $_X_fastDecode $_X_specialChars
\r
21 $PM_VERSION = $ParseMaster::VERSION;
\r
23 # Package wide constants
\r
24 my $X_IGNORE = q{$1};
\r
25 my $X_ENCODE = q/\x24encode\(\x24count\)/; # NB: requires g modifier
\r
26 my $PERL = 'perl'; # Flag to indicate whether we need to use one of our "internal" Perl encoding functions
\r
27 my $JSCRIPT = 'jscript'; # or embed a pre-build JScript encoding function
\r
28 ########################################
\r
31 sub pack($$$$) { # require 4 arguments
\r
34 ($_X_script, $_X_encoding, $_X_fastDecode, $_X_specialChars) = @_;
\r
35 # validate parameters (sort of!)
\r
37 $_X_encoding = ($_X_encoding > 95) ? 95 : $_X_encoding;
\r
39 @_X_parsers = (); # Reset parsers
\r
41 ####################
\r
42 sub _X_pack($) { # require 1 argument
\r
43 ####################
\r
44 # apply all parsing routines
\r
45 my $X_script = shift;
\r
46 for (my $i = 0; $i<scalar(@_X_parsers); $i++) {
\r
47 my $X_parse = $_X_parsers[$i];
\r
48 $X_script = &$X_parse($X_script);
\r
53 ######################
\r
54 sub _X_addParser { #
\r
55 ######################
\r
56 # keep a list of parsing functions, they'll be executed all at once
\r
57 my $X_parser = shift;
\r
58 push (@_X_parsers,$X_parser);
\r
61 #############################
\r
62 sub _X_basicCompression { #
\r
63 #############################
\r
64 # zero encoding - just removal of white space and comments
\r
65 my $X_script = shift;
\r
66 my $parser = ParseMaster->new();
\r
68 $parser->escapeChar("\\");
\r
70 $parser->add(q/'[^'\n\r]*'/, $X_IGNORE);
\r
71 $parser->add(q/"[^"\n\r]*"/, $X_IGNORE);
\r
73 $parser->add(q/\/\/[^\n\r]*[\n\r]/);
\r
74 $parser->add(q/\/\*[^*]*\*+([^\/][^*]*\*+)*\//);
\r
75 # protect regular expressions
\r
76 $parser->add(q/\s+(\/[^\/\n\r\*][^\/\n\r]*\/g?i?)/, q{$2}); # IGNORE
\r
77 $parser->add(q/[^\w\x24\/'"*)\?:]\/[^\/\n\r\*][^\/\n\r]*\/g?i?/, $X_IGNORE);
\r
78 # remove: ;;; doSomething();
\r
79 $parser->add(q/;;[^\n\r]+[\n\r]/) if ($_X_specialChars);
\r
80 # remove redundant semi-colons
\r
81 $parser->add(q/;+\s*([};])/, q{$2});
\r
82 # remove white-space
\r
83 $parser->add(q/(\b|\x24)\s+(\b|\x24)/, q{$2 $3});
\r
84 $parser->add(q/([+\-])\s+([+\-])/, q{$2 $3});
\r
85 $parser->add(q/\s+/, '');
\r
87 return $parser->exec($X_script);
\r
90 ###############################
\r
91 sub _X_encodeSpecialChars { #
\r
92 ###############################
\r
93 my $X_script = shift;
\r
94 my $parser = ParseMaster->new();
\r
95 # replace: $name -> n, $$name -> $$na
\r
96 $parser->add(q/((\x24+)([a-zA-Z\x24_]+))(\d*)/,
\r
100 my $X_length = length($X_match[$X_offset+2]);
\r
101 my $lengthnext = length($X_match[$X_offset+3]);
\r
102 my $X_start = $X_length - ((($X_length - $lengthnext) > 0) ? ($X_length - $lengthnext) : 0);
\r
103 my $str = $X_match[$X_offset+1];
\r
104 $str = substr($str,$X_start,$X_length) . $X_match[$X_offset+4];
\r
107 # replace: _name -> _0, double-underscore (__name) is ignored
\r
108 my $X_regexp = q/\b_[A-Za-z\d]\w*/;
\r
109 # build the word list
\r
110 my %X_keywords = &_X_analyze($X_script, $X_regexp, $_X_encodePrivate);
\r
111 #print Dumper(%X_keywords);
\r
113 my $X_encoded = \$X_keywords{X_encoded}; # eg _private1 => '_0',_private2 => '_1';
\r
114 #print Dumper($X_encoded);
\r
115 $parser->add($X_regexp, sub {my $X_offset = pop; my @X_match = @_; return ${$X_encoded}->{$X_match[$X_offset]};});
\r
117 return $parser->exec($X_script);
\r
120 ###########################
\r
121 sub _X_encodeKeywords { #
\r
122 ###########################
\r
123 my $X_script = shift;
\r
124 # escape high-ascii values already in the script (i.e. in strings)
\r
125 if ($_X_encoding > 62) {$X_script = &_X_escape95($X_script)};
\r
126 # create the parser
\r
127 my $parser = ParseMaster->new();
\r
128 my $X_encode = &_X_getEncoder($_X_encoding,$PERL);
\r
129 # for high-ascii, don't encode single character low-ascii
\r
130 my $X_regexp = ($_X_encoding > 62) ? q/\w\w+/ : q/\w+/;
\r
131 # build the word list
\r
132 my %X_keywords = &_X_analyze($X_script, $X_regexp, $X_encode);
\r
133 #print Dumper(%X_keywords);
\r
134 my $X_encoded = \$X_keywords{X_encoded}; # eg alert => 2, function => 10 etc
\r
136 $parser->add($X_regexp, sub {my $X_offset = pop; my @X_match = @_; return ${$X_encoded}->{$X_match[$X_offset]};});
\r
137 # if encoded, wrap the script in a decoding function
\r
139 return $X_script && _X_bootStrap(\$parser->exec($X_script), \%X_keywords);
\r
142 ####################
\r
144 ####################
\r
146 my ($X_script, $X_regexp, $X_encode) = @_;
\r
148 # retreive all words in the script
\r
149 my @X_all = $X_script =~ m/$X_regexp/g; # Save all captures in a list context
\r
150 my %XX_sorted = (); # list of words sorted by frequency
\r
151 my %XX_encoded = (); # dictionary of word->encoding
\r
152 my %XX_protected = (); # instances of "protected" words
\r
154 my @X_unsorted = (); # same list, not sorted
\r
155 my %X_protected = (); # "protected" words (dictionary of word->"word")
\r
156 my %X_values = (); # dictionary of charCode->encoding (eg. 256->ff)
\r
157 my %X_count = (); # word->count
\r
158 my $i = scalar(@X_all); my $j = 0; my $X_word = '';
\r
159 # count the occurrences - used for sorting later
\r
161 $X_word = '$' . $X_all[--$i];
\r
162 if (!exists($X_count{$X_word})) {
\r
163 $X_count{$X_word} = [0,$i]; # Store both the usage count and original array position (ie a secondary sort key)
\r
164 $X_unsorted[$j] = $X_word;
\r
165 # make a dictionary of all of the protected words in this script
\r
166 # these are words that might be mistaken for encoding
\r
167 $X_values{$j} = &$X_encode($j);
\r
168 my $v = '$'.$X_values{$j};
\r
169 $X_protected{$v} = $j++;
\r
171 # increment the word counter
\r
172 $X_count{$X_word}[0]++;
\r
174 #print Dumper (%X_values);
\r
175 #print Dumper (@X_unsorted);
\r
176 #print Dumper (%X_protected);
\r
177 # prepare to sort the word list, first we must protect
\r
178 # words that are also used as codes. we assign them a code
\r
179 # equivalent to the word itself.
\r
180 # e.g. if "do" falls within our encoding range
\r
181 # then we store keywords["do"] = "do";
\r
182 # this avoids problems when decoding
\r
183 $i = scalar(@X_unsorted);
\r
185 $X_word = $X_unsorted[--$i];
\r
186 if (exists($X_protected{$X_word})) {
\r
187 $XX_sorted{$X_protected{$X_word}} = substr($X_word,1);
\r
188 $XX_protected{$X_protected{$X_word}} = 1; # true
\r
189 $X_count{$X_word}[0] = 0;
\r
192 #print Dumper (%XX_protected);
\r
193 #print Dumper (%XX_sorted);
\r
194 #print Dumper (%X_count);
\r
195 # sort the words by frequency
\r
196 # Sort with count a primary key and original array order as secondary key - which is apparently the default in javascript!
\r
197 @X_unsorted = sort ({($X_count{$b}[0] - $X_count{$a}[0]) or ($X_count{$b}[1] <=> $X_count{$a}[1])} @X_unsorted);
\r
198 #print Dumper (@X_unsorted) . "\n";
\r
201 # because there are "protected" words in the list
\r
202 # we must add the sorted words around them
\r
204 if (!exists($XX_sorted{$i})) {$XX_sorted{$i} = substr($X_unsorted[$j++],1)}
\r
205 $XX_encoded{$XX_sorted{$i}} = $X_values{$i};
\r
206 } while (++$i < scalar(@X_unsorted));
\r
208 #print Dumper(X_sorted => \%XX_sorted, X_encoded => \%XX_encoded, X_protected => \%XX_protected);
\r
209 return (X_sorted => \%XX_sorted, X_encoded => \%XX_encoded, X_protected => \%XX_protected);
\r
212 ######################
\r
213 sub _X_bootStrap { #
\r
214 ######################
\r
215 # build the boot function used for loading and decoding
\r
216 my ($X_packed, $X_keywords) = @_; # Reference arguments!
\r
217 #print Dumper ($X_keywords) . "\n";
\r
219 # $packed: the packed script - dereference and escape
\r
220 $X_packed = "'" . &_X_escape($$X_packed) ."'";
\r
222 my %sorted = %{$$X_keywords{X_sorted}}; # Dereference to local variables
\r
223 my %protected = %{$$X_keywords{X_protected}}; # for simplicity
\r
226 foreach my $key (keys %sorted) {$sorted[$key] = $sorted{$key}}; # Convert hash to a standard list
\r
228 # ascii: base for encoding
\r
229 my $X_ascii = ((scalar(@sorted) > $_X_encoding) ? $_X_encoding : scalar(@sorted)) || 1;
\r
231 # count: number of (unique {RS}) words contained in the script
\r
232 my $X_count = scalar(@sorted); # Use $X_count for assigning $X_ascii
\r
234 # keywords: list of words contained in the script
\r
235 foreach my $i (keys %protected) {$sorted[$i] = ''}; # Blank out protected words
\r
236 #print Dumper(@sorted) . "\n";
\r
238 # convert from a string to an array - prepare keywords as a JScript string->array {RS}
\r
239 $X_keywords = "'" . join('|',@sorted) . "'.split('|')";
\r
241 # encode: encoding function (used for decoding the script)
\r
242 my $X_encode = $_X_encoding > 62 ? $_JSencode95 : &_X_getEncoder($X_ascii,$JSCRIPT); # This is a JScript function (as a string)
\r
243 $X_encode =~ s/_encoding/\x24ascii/g; $X_encode =~ s/arguments\.callee/\x24encode/g;
\r
244 my $X_inline = '$count' . ($X_ascii > 10 ? '.toString($ascii)' : '');
\r
246 # decode: code snippet to speed up decoding
\r
248 if ($_X_fastDecode) {
\r
249 # create the decoder
\r
250 $X_decode = &_X_getFunctionBody($_JSdecode); # ie from the Javascript literal function
\r
251 if ($_X_encoding > 62) {$X_decode =~ s/\\\\w/[\\xa1-\\xff]/g}
\r
252 # perform the encoding inline for lower ascii values
\r
253 elsif ($X_ascii < 36) {$X_decode =~ s/$X_ENCODE/$X_inline/g}
\r
254 # special case: when $X_count==0 there ar no keywords. i want to keep
\r
255 # the basic shape of the unpacking funcion so i'll frig the code...
\r
256 if (!$X_count) {$X_decode =~ s/(\x24count)\s*=\s*1/$1=0/}
\r
260 my $X_unpack = $_JSunpack;
\r
261 if ($_X_fastDecode) {
\r
262 # insert the decoder
\r
263 $X_unpack =~ s/\{/\{$X_decode;/;
\r
265 $X_unpack =~ s/"/'/g;
\r
266 if ($_X_encoding > 62) { # high-ascii
\r
267 # get rid of the word-boundaries for regexp matches
\r
268 $X_unpack =~ s/'\\\\b'\s*\+|\+\s*'\\\\b'//g; # Not checked! {RS}
\r
270 if ($X_ascii > 36 || $_X_encoding > 62 || $_X_fastDecode) {
\r
271 # insert the encode function
\r
272 $X_unpack =~ s/\{/\{\$encode=$X_encode;/;
\r
274 # perform the encoding inline
\r
275 $X_unpack =~ s/$X_ENCODE/$X_inline/;
\r
278 # arguments {RS} Do this before using &pack because &pack changes the pack parameters (eg $fastDecode) in Perl!!
\r
279 my $X_params = "$X_packed,$X_ascii,$X_count,$X_keywords"; # Interpolate to comma separated string
\r
280 if ($_X_fastDecode) {
\r
281 # insert placeholders for the decoder
\r
282 $X_params .= ',0,{}';
\r
285 # pack the boot function too
\r
286 $X_unpack = &pack($X_unpack,0,0,1);
\r
289 return "eval(" . $X_unpack . "(" . $X_params . "))\n";
\r
292 #######################
\r
293 sub _X_getEncoder { #
\r
294 #######################
\r
295 # mmm.. ..which one do i need ?? ({RS} Perl or JScript ??)
\r
296 my ($X_ascii,$language) = @_;
\r
297 my $perl_encoder = ($X_ascii > 10) ? ($X_ascii > 36) ? ($X_ascii > 62) ? $_X_encode95 : $_X_encode62 : $_X_encode36 : $_X_encode10;
\r
298 my $jscript_encoder = ($X_ascii > 10) ? ($X_ascii > 36) ? ($X_ascii > 62) ? $_JSencode95 : $_JSencode62 : $_JSencode36 : $_JSencode10;
\r
299 return ($language eq $JSCRIPT) ? $jscript_encoder : $perl_encoder;
\r
302 #############################
\r
303 # Perl versions of encoders #
\r
304 #############################
\r
305 # base10 zero encoding - characters: 0123456789
\r
306 $_X_encode10 = sub {return &_encodeBase(shift,10)};
\r
307 # base36 - characters: 0123456789abcdefghijklmnopqrstuvwxyz
\r
308 $_X_encode36 = sub {return &_encodeBase(shift,36)};
\r
309 # base62 - characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
\r
310 $_X_encode62 = sub {return &_encodeBase(shift,62)};
\r
311 # high-ascii values - characters: ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ
\r
312 $_X_encode95 = sub {return &_encodeBase(shift,95)};
\r
313 # Lookup character sets for baseN encoding
\r
314 $baseLookup{10} = [(0..9)[0..9]]; # base 10
\r
315 $baseLookup{36} = [(0..9,'a'..'z')[0..35]]; # base 36
\r
316 $baseLookup{62} = [(0..9,'a'..'z','A'..'Z')[0..61]]; # base 62
\r
317 $baseLookup{95} = (); for (my $i=0; $i<95; $i++) {$baseLookup{95}[$i] = chr($i+161)}; # base95 (high ascii)
\r
318 #print Dumper(%baseLookup);
\r
319 #####################
\r
320 sub _encodeBase { #
\r
321 #####################
\r
322 # Generic base conversion function using defined lookup arrays (perl version only)
\r
323 my ($X_charCode, $base) = @_;
\r
324 my $X_encoded = '';
\r
325 # Do we know this encoding?
\r
326 if (exists ($baseLookup{$base})) {
\r
327 if ($X_charCode == 0) {$X_encoded = $baseLookup{$base}[0]}
\r
328 while($X_charCode > 0) {
\r
329 $X_encoded = $baseLookup{$base}[$X_charCode % $base] . $X_encoded;
\r
330 $X_charCode = int($X_charCode / $base);
\r
333 else {$X_encoded = "$X_charCode"} # default is to return unchanged (ie as for base 10) if no baselookup is available
\r
337 #############################
\r
338 $_X_encodePrivate = sub { #
\r
339 #############################
\r
341 my $X_charCode = shift;
\r
342 return '_' . $X_charCode;
\r
345 ############################
\r
346 sub _X_escape($script) { #
\r
347 ############################
\r
348 # protect characters used by the parser
\r
349 my $X_script = shift;
\r
350 $X_script =~ s/([\\'])/\\$1/g;
\r
354 #####################
\r
355 sub _X_escape95 { #
\r
356 #####################
\r
357 # protect high-ascii characters already in the script
\r
358 my $X_script = shift;
\r
359 $X_script =~ s/([\xa1-\xff])/sprintf("\\x%1x",ord($1))/eg;
\r
363 ############################
\r
364 sub _X_getFunctionBody { #
\r
365 ############################
\r
366 # extract the body of a function (ie between opening/closing {}) - consistent with Dean Edwards approach
\r
367 my $X_function = shift;
\r
368 $X_function =~ m/^.*\{(.*)\}*$/sg; # Multiline, global (greedy)
\r
369 my $start = index($X_function,'{');
\r
370 my $end = rindex($X_function,'}');
\r
371 $X_function = substr($X_function,($start+1),($end-1-$start));
\r
372 return $X_function;
\r
375 ######################
\r
376 sub _X_globalize { #
\r
377 ######################
\r
378 # set the global flag on a RegExp (you have to create a new one) !!! Unused in perl version
\r
379 # my $X_regexp = shift;
\r
382 # build the parsing routine
\r
383 &_X_addParser(\&_X_basicCompression);
\r
384 &_X_addParser(\&_X_encodeSpecialChars) if ($_X_specialChars);
\r
385 &_X_addParser(\&_X_encodeKeywords) if ($_X_encoding);
\r
388 return &_X_pack($_X_script);
\r
391 ########################
\r
392 # Javascript Literals #
\r
393 ########################
\r
395 # JScript function "_unpack" - from DeanEdwards pack.js (NB: No ";" after final "}")
\r
396 ($_JSunpack) = <<'END_JSCRIPT_UNPACK';
\r
397 /* unpacking function - this is the boot strap function */
\r
398 /* data extracted from this packing routine is passed to */
\r
399 /* this function when decoded in the target */
\r
400 function($packed, $ascii, $count, $keywords, $encode, $decode) {
\r
402 if ($keywords[$count])
\r
403 $packed = $packed.replace(new RegExp('\\b' + $encode($count) + '\\b', 'g'), $keywords[$count]);
\r
404 /* RS_Debug = $packed; */ /* {RS} !!!!!!!!! */
\r
409 # JScript function "_decode" - from DeanEdwards pack.js
\r
410 ($_JSdecode) = <<'END_JSCRIPT_DECODE';
\r
411 /* code-snippet inserted into the unpacker to speed up decoding */
\r
413 /* does the browser support String.replace where the */
\r
414 /* replacement value is a function? */
\r
415 if (!''.replace(/^/, String)) {
\r
416 /* decode all the values we need */
\r
417 while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
\r
418 /* global replacement function */
\r
419 $keywords = [function($encoded){return $decode[$encoded]}];
\r
420 /* generic match */
\r
421 $encode = function(){return'\\w+'};
\r
422 /* reset the loop counter - we are now doing a global replace */
\r
428 # JScript versions of encoders
\r
429 ($_JSencode10) = <<'END_JSCRIPT_ENCODE10';
\r
430 /* zero encoding */
\r
431 /* characters: 0123456789 */
\r
432 function($charCode) {
\r
435 END_JSCRIPT_ENCODE10
\r
437 ($_JSencode36) = <<'END_JSCRIPT_ENCODE36';
\r
438 /* inherent base36 support */
\r
439 /* characters: 0123456789abcdefghijklmnopqrstuvwxyz */
\r
440 function($charCode) {
\r
441 return $charCode.toString(36);
\r
443 END_JSCRIPT_ENCODE36
\r
445 ($_JSencode62) = <<'END_JSCRIPT_ENCODE62';
\r
446 /* hitch a ride on base36 and add the upper case alpha characters */
\r
447 /* characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ */
\r
448 function($charCode) {
\r
449 return ($charCode < _encoding ? '' : arguments.callee(parseInt($charCode / _encoding))) +
\r
450 (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
\r
452 END_JSCRIPT_ENCODE62
\r
454 ($_JSencode95) = <<'END_JSCRIPT_ENCODE95';
\r
455 /* use high-ascii values */
\r
456 /* characters: ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ */
\r
457 function($charCode) {
\r
458 return ($charCode < _encoding ? '' : arguments.callee($charCode / _encoding)) +
\r
459 String.fromCharCode($charCode % _encoding + 161);
\r
461 END_JSCRIPT_ENCODE95
\r