#!/usr/bin/perl -w # # Zsolt's Perl Compressor & Obfuscator v1 # # This program breaks up a perl script into basic code units # and then changes all the variable names and then puts the # pieces back together with only the minimum number of # spaces needed. # # Written by Zsolt N. Perry in January 2024, Pensacola, Fla. # For questions, bug reports, feature requests, general feedback, # write to Zsolt at zsolt500n@gmail.com or zsnp500@cox.net. # # THIS SOFTWARE IS DISTRIBUTED FOR FREE "AS IS" WITHOUT WARRANTY # OF ANY KIND. PERMISSION IS GRANTED TO ALL PEOPLE EVERYWHERE TO # USE THIS SOFTWARE IN WHOLE OR IN PARTS FOR ANY PURPOSE INCLUDING # COMMERCIAL OR EDUCATIONAL APPLICATIONS, AND TO ALTER IT, LICENSE # IT, REDISTRIBUTE IT, OR SELL IT. THE AUTHOR AND CONTRIBUTORS OF # THIS SOFTWARE SHALL NOT BE HELD LIABLE FOR ANY LOSS RESULTING # FROM THE USE/MISUSE OF THIS SOFTWARE OR ANY DERIVATIVES THEREOF. # #################################################################### use 5.004; $| = 1; # Stop buffering stdout. my $INPUT_FILE = "K:\\HOME\\DESKTOP\\COMPRESS.pl"; my $OUTPUT_FILE = "Z:\\Test.pl"; my $OBFUSCATE_VARIABLES = 1; my $ALIGN_SUBS = 0; print "\n\nZsolt's Perl Compressor & Obfuscator v1 FREEWARE"; # STEP 1: Read the file plain text. my $FILESIZE = -s $INPUT_FILE; print "\nReading file: $INPUT_FILE (", Commify($FILESIZE), ' bytes)'; my $DATA = ReadTheEntireFile($INPUT_FILE); # STEP 2: Get rid of all binary characters from the script: $DATA =~ tr|\r\n\t\x20-\x7E||cd; my $BINCOUNT = $FILESIZE - length($DATA); print "\nBinary characters removed: $BINCOUNT"; # STEP 3: Convert line breaks to Linux format. $DATA =~ s/\r\n/\n/g; # Convert DOS to Linux $DATA =~ tr|\r|\n|; # Convert Mac to Linux my $LINES = $DATA =~ tr|\n|\n|; # Count number of line breaks print "\nNumber of lines: ", ($LINES + 1); # STEP 4: Create a list of obfuscated variable names. # These are special variables which we should not rename: my $SPECIAL_VARIABLES = ' a b _ 0 1 2 3 4 5 6 7 8 9 ENV ARG ARGV ARGVOUT PID GID EGID UID EUID SUBSEP F INC ISA OSNAME SIG BASETIME MATCH PREMATCH POSTMATCH OFS NR RS ORS WARNING ERRNO PERLDB LIST_SEPARATOR PROCESS_ID PROGRAM_NAME REAL_GROUP_ID EFFECTIVE_GROUP_ID REAL_USER_ID EFFECTIVE_USER_ID SUBSCRIPT_SEPARATOR OLD_PERL_VERSION SYSTEM_FD_MAX INPLACE_EDIT PERL_VERSION EXECUTABLE_NAME LAST_PAREN_MATCH LAST_SUBMATCH_RESULT LAST_MATCH_END LAST_PAREN_MATCH LAST_MATCH_START LAST_REGEXP_CODE_RESULT OUTPUT_FIELD_SEPARATOR INPUT_LINE_NUMBER INPUT_RECORD_SEPARATOR OUTPUT_RECORD_SEPARATOR OUTPUT_AUTOFLUSH ACCUMULATOR FORMAT_FORMFEED FORMAT_PAGE_NUMBER FORMAT_LINES_LEFT FORMAT_LINE_BREAK_CHARACTERS FORMAT_LINES_PER_PAGE FORMAT_TOP_NAME FORMAT_NAME EXTENDED_OS_ERROR EXCEPTIONS_BEING_CAUGHT OS_ERROR EVAL_ERROR COMPILING DEBUGGING VERSION '; my @NEWVARS; my $MAXVARS = 2000; if ($OBFUSCATE_VARIABLES) { @NEWVARS = GenerateShortVariableNames($MAXVARS); Shuffle(@NEWVARS); } # STEP 5: Analyze the perl script character by character. print "\nProcessing code..."; my @OUTPUT; # Global Output buffer my $C = ''; # Current character as string my $A = 0; # Current character as integer (ASCII code) my $PREV; # Previous character my $PREVWORD = ''; # Previous bareword captured my $PREVSYMB = ''; # Previous symbol captured my $PREVTYPE = ''; # Previous object type captured ( NUM | VAR | STR | WORD | REGEX | SYM | BLOCK ) my $DEPTH = 0; # Indentation depth my $SPACE = 0; # Was there a whitespace between this and the previous object? ( 0=NO | 1=YES ) my %VARS; # Use this dictionary to rename variables ( $VARS{OLDNAME} => "NEWNAME" ) my $PTR = 0; # Global File Pointer for (; $PTR < length($DATA); $PTR++) # MAIN LOOP { $PREV = $C; $C = substr($DATA, $PTR, 1); $A = vec($DATA, $PTR, 8); # Capture whitespace: if ($A == 32 || $A == 9 || $A == 10) { $SPACE = 1; next; } # Track code depth by capturing the brace characters: { } if ($A == 123) { $PREVTYPE = 'BLOCK'; push(@OUTPUT, '{'); $DEPTH++; next; } if ($A == 125) { $PREVTYPE = 'BLOCK'; push(@OUTPUT, '}'); if ($DEPTH > 0) { $DEPTH--; } else { print "\nWarning: Missing opening brace.\n"; } if ($ALIGN_SUBS && $DEPTH == 0) { push(@OUTPUT, "\n"); } next; } # Capture the < sign and see if it's a heredoc: if ($A == 60) { if ($PREVSYMB eq '=' || ($PREVTYPE ne 'VAR' && $PREVTYPE ne 'NUM')) { my $TEST = substr($DATA, $PTR, 32); if ($TEST =~ m/\<\<\s*['"]{,1}|\<\<[a-zA-Z\_]+/) { CaptureHeredoc() and next; } # False alarm. It was not a heredoc. # Maybe it was just a shift operator. } } # Capture POD documentation if ($C eq '=' && $PREV eq "\n") { CapturePOD() and next; } # Capture regex ( anything that begins with / or =~ or !~ ) if ($A == 47 && ($PREVTYPE eq 'WORD' && index('])}', $PREVSYMB) < 0) || ($C eq '~' && index('=!', $PREV) >= 0)) { CaptureRegex() and next; } if ($A == 36 || $A == 64 || $A == 37) # Capture variables { # We may see a situation such as print FILEHANDLE $STRING; # where a bareword or variable is immediately followed by # another variable, and we must insert a space between the two # (unless there was no space there in the original script). # However when the word "my" appears before a variable, # these two do not need a space in between. if (index('*+-[{(;,=', $PREVSYMB) < 0) { if ($SPACE && ($PREVTYPE eq 'VAR' && $A == 36) || ($PREVTYPE eq 'WORD' && index(' my our defined undef return ', " $PREVWORD ") < 0)) { push(@OUTPUT, ' '); } } CaptureVariable() and next; } if ($C eq '#' && $PREV ne '$') # Capture and eliminate comments { CaptureComment(); next; } # Process misc "leftover" symbols: my $MISC = index('[]()<>;,!?=$@%&:+*^|\\\/~-.', $C); if ($MISC >= 0) { if ($SPACE && $C eq '.' && $PREVTYPE eq 'NUM') { push(@OUTPUT, ' '); } push(@OUTPUT, $C); $PREVSYMB = $C; $SPACE = 0; next; } if ($A > 47 && $A < 58) # Digits found? { # If the number was preceded by period and then space, # we must not erase that space! Also: If the number # was preceded by a bareword or variable name, we # must not erase the space in front of the number. if (index('<>,:;|[(%/*=+-', $PREVSYMB) < 0) { if ($SPACE && ((index('.', $PREVSYMB) >= 0) || ($PREVTYPE eq 'WORD' || $PREVTYPE eq 'VAR'))) { push(@OUTPUT, ' '); } } CaptureNumber(); next; } if ($A == 34 || $A == 39 || $A == 96) # Capture strings { # If there's a variable or even a lonely $ sign by itself, # if it is immediately followed by a space and a string, we # should not eliminate that space in front of the string! if (index('={}[]();,.', $PREVSYMB) < 0) { if ($SPACE && $PREVSYMB eq '$' || $PREVTYPE eq 'STR' || $PREVTYPE eq 'VAR') { push(@OUTPUT, ' '); } } CaptureString(); next; } # Capture bareword: if ($A == 95 || ($A > 96 && $A < 123) || ($A > 64 && $A < 91)) { if (index('*+-[{(;,=', $PREVSYMB) < 0) { if ($SPACE && ($PREVTYPE eq 'WORD' || $PREVTYPE eq 'VAR' || $PREVTYPE eq 'SYM')) { push(@OUTPUT, ' '); } } $SPACE = 1; $PREVSYMB = 'x'; my $WORD = CaptureWord(); if ($A == 113 && index(' qq qw qx qr q ', " $WORD ") >= 0) { $PREVWORD = $WORD; CaptureQQ(); next; } if ($PREVWORD eq 'sub') { print '.'; } if ($WORD eq '__END__') { pop(@OUTPUT); last; } # We exit here. if ($WORD eq '__DATA__') { $PTR++; CaptureTail(); last; } $PREVWORD = $WORD; next; } } undef %VARS; # Free up some memory. undef $DATA; undef @NEWVARS; # STEP 6: Write output. print "\nSaving file: $OUTPUT_FILE "; CreateFile($OUTPUT_FILE, @OUTPUT); $FILESIZE = -s $OUTPUT_FILE; print '(', Commify($FILESIZE), " bytes)\n\n"; exit; ################################################## # Perl | v2024.1.10 # This function captures a regex from start to finish. # # Usage: CaptureRegex() # sub CaptureRegex { my $START = $PTR; my $SEPARATOR = ''; my $OUT = ''; my $TEST = substr($DATA, $PTR, 256); my $LEN = 0; # Identify tr operator: if ($TEST =~ m/^(\~\s*tr\s*)([^\t\n ]{1})/) { $OUT = '~tr'; $LEN = length($1); $SEPARATOR = $2 x 2; } # Identify y operator: elsif ($TEST =~ m/^(\~\s*y\s*)([^\t\n ]{1})/) { $OUT = '~y'; $LEN = length($1); $SEPARATOR = $2 x 2; } # Identify regex replace: elsif ($TEST =~ m/^(\~\s*s\s*)([^\t\n ]{1})/) { $OUT = '~s'; $LEN = length($1); $SEPARATOR = $2 x 2; } # Identify regex match: elsif ($TEST =~ m/^(\~\s*m\s*)([^\t\n ]{1})/) { $OUT = '~m'; $LEN = length($1); $SEPARATOR = $2; } # Identify simple regex match separated by / elsif ($TEST =~ m/^(\~?)(\s*)\//) { $OUT = $1; $LEN = length($OUT . $2); $SEPARATOR = '/'; } else { $PTR = $START; return 0; } # It's not a regex. length($OUT) and push(@OUTPUT, $OUT); $PTR += $LEN; # Skip through the "introduction" $SEPARATOR = GetSeparatorCharacters($SEPARATOR); CapturePattern($SEPARATOR); $PREVTYPE = 'REGEX'; $PREVWORD = ''; $PREVSYMB = 'x'; $SPACE = 0; return 1; } ################################################## # Perl | v2024.1.10 # This function captures a quoted list. # # When calling this function, the $PTR global pointer # must point to the last letter of the word qq qw qx qr or q. # And when this function exits, $PTR will point to the # closing quote character. # # This function correctly captures things like # qr(( )( )); # q/rain\/snow\/water\/vapor/; # qw<\<\<\<>; # qq#apple orange kiwi#; # qx\command\; # # Usage: CaptureQQ() # sub CaptureQQ { my $OPENING = substr($DATA, ++$PTR, 1); my $SEPARATOR = GetSeparatorCharacters($OPENING); CapturePattern($SEPARATOR); $PREVTYPE = 'REGEX'; $PREVWORD = ''; $PREVSYMB = 'x'; $SPACE = 0; return 1; } ################################################## # Perl | v2024.1.10 # This function captures a regex pattern or a quoted list. # # When calling this function, the $PTR pointer must be # pointing to the first separator character of the # regex or quoted list. The only argument this function # requires is the list of separator characters. # When processing a simple regex match such as /abc/ # the SEPARATOR should hold a single / character. # If we're processing regex replace such as =~ s(WAS)(IS)gi; # then the SEPARATOR will be '()()' << that's four letters. # When doing $STR =~ tr|||; the SEPARATOR will be '|||' # # Usage: CapturePattern(SEPARATORS) # sub CapturePattern { my $SEPARATOR = shift; my $OPENING = substr($SEPARATOR, 0, 1); my $CLOSING = substr($SEPARATOR, 1, 1); my $GROUPS = (length($SEPARATOR) == 2) ? 1 : 2; my $SAME = $OPENING eq $CLOSING; if ($SAME) { $GROUPS++; $SEPARATOR = $OPENING; } my $BACKSLASH = 0; my $QDEPTH = 0; my $START = $PTR; my $REGEX = ''; for (; $PTR < length($DATA); $PTR++) { my $C = substr($DATA, $PTR, 1); $REGEX .= $C; # Check for embedded variable names in regex if (($C eq '\\' && ($BACKSLASH & 1) == 0) || $C eq '$' || $C eq '@') { my $TEST = substr($DATA, $PTR, 500); # Replace embedded variable names in regex: if ($TEST =~ m/^([\$\@]{1}[a-zA-Z\_]{1}[a-zA-Z\_0-9]*)/) { my $VAR = $1; $PTR += length($VAR) - 1; $REGEX .= ReplaceVariable($VAR); } # Replace embedded variable names in regex between \Q and \E elsif ($TEST =~ m/^(Q\s*)([\$\@]{1}[a-zA-Z0-9\_]+)(\s*\\E)/) { my $PREFIX = $1; my $VAR = $2; my $SUFFIX = $3; $PTR += length($PREFIX) + length($VAR) + length($SUFFIX) - 1; $REGEX .= ReplaceVariable($VAR) . "\\E"; } } if (($BACKSLASH & 1) == 0) { if ($SAME) { if ($C eq $SEPARATOR) { $GROUPS--; } } else { if ($C eq $OPENING) { $QDEPTH++; } if ($C eq $CLOSING) { $QDEPTH--; if ($QDEPTH == 0) { $GROUPS--; } } } if ($GROUPS == 0) { last; } } if ($C eq '\\') { $BACKSLASH++; } else { $BACKSLASH = 0; } } if ($PTR + 1 == length($DATA)) { print "\nError: Unexpected end of file in the middle of regex or quoted list.\n"; return 0; } push(@OUTPUT, $REGEX); return 1; } ################################################## # Perl | v2024.1.10 # This function helps the regex match to correctly # identify the boundaries of a regex pattern by # returning separator characters to look for. # For example, some regex patterns are separated by # the // characters, and some are separated by () # # This function expects one argument which should be # a separator character or multiple separators and # it determines when the closing character needs to # be the same and when it needs to be different. # For example $STR =~ tr|||; << Here the closing # separator is the same as the opening separator. # But that's not true here: $STR =~ s((asd)(dsa))()gi; # # Usage: STRING = GetSeparatorCharacters(STRING) # sub GetSeparatorCharacters { my $SEPARATOR = defined $_[0] ? $_[0] : ''; length($SEPARATOR) or return ''; my $OPENING = substr($SEPARATOR, 0, 1); my $CLOSING = $OPENING; if ($OPENING eq '(') { $CLOSING = ')'; } if ($OPENING eq '[') { $CLOSING = ']'; } if ($OPENING eq '<') { $CLOSING = '>'; } if ($OPENING eq '{') { $CLOSING = '}'; } if ($OPENING eq $CLOSING) { return (length($SEPARATOR) == 1) ? $OPENING x 2 : $OPENING x 3; } my $PAIR = $OPENING . $CLOSING; return (length($SEPARATOR) == 1) ? $PAIR : $PAIR x 2; } ################################################## # Perl | v2024.1.9 # This function skips through documentations and # moves the $PTR pointer to the letter "t" in the # "=cut" string. # # Usage: CapturePOD() # sub CapturePOD { my $TEST = substr($DATA, $PTR, 64); if ($TEST =~ m/^(\=[a-zA-Z]+)/) { $PTR += length($1); $PTR = index($DATA, "\n=cut", $PTR); if ($PTR > 0) { $PTR += 4; } else { $PTR = length($DATA); } $PREVSYMB = 'x'; $PREVWORD = ''; $SPACE = 1; return 1; } else { return 0; } } ################################################## # Perl | v2024.1.9 # This function captures a number. The $PTR pointer # must point to the first digit of the number. And # when this function exists, $PTR will point to the # last digit of the number. # # The following number formats are recognized # by this function: # 0.123 # 1234567 # 1234.000 # 1234.123456 # 1.234e+5 or 1.234e-5 # 1_000_000_000_000 # 0xA5C9FF07 # 0b01011101 # 0777 # # Note: This function will not capture the period in # front of a decimal when there is no preceding zero # and it will not capture the minus or plus sign in # front of a number. Why? Because this function is # called only when a digit is encountered in the code. # # Usage: CaptureNumber() # sub CaptureNumber { my $TEST = substr($DATA, $PTR, 512); my $NUM = ''; if ($TEST =~ m/([0-9]+)/) { $NUM = $1; } # Simple number: 900 or 0777 elsif ($TEST =~ m/(0x[0-9a-fA-F]+)/) { $NUM = $1; } # Hexadecimal number: 0xC9FF elsif ($TEST =~ m/(0b[01]+)/) { $NUM = $1; } # Binary number: 0b01001011 elsif ($TEST =~ m/([0-9\_]*[0-9]{3})/) { $NUM = $1; } # Big number: 1_000_000_000_000 elsif ($TEST =~ m/([0-9]+\.[0-9]+)/) { $NUM = $1; } # Float: 123.4567 elsif ($TEST =~ m/([0-9]+[0-9.]*[eE]{1}[\-\+]{,1}[0-9]+)/) { $NUM = $1; } # Scientific notation: 1.23456e+19 elsif ($TEST =~ m/([0-9.]+)/) { $NUM = $1; } # Version number: 1.2.3.4 or List: 0..123 if (length($NUM)) { $PTR += length($NUM) - 1; push(@OUTPUT, $NUM); } else { print "\nWarning: Unrecognized number.\n"; return 0; } $PREVTYPE = 'NUM'; $PREVSYMB = 'x'; $SPACE = 0; } ################################################## # Perl | v2024.1.9 # This function captures a bareword starting at the # current position $PTR. ($PTR is a global integer, # pointing to characters within $DATA. And $DATA is # a global string variable which contains the # entire file's contents we're processing.) # # A word is a series of characters made up of # letters, digits, and the underscore character. # A word cannot start with digits. A word cannot # contain space, tab, line break, $ # @ : or any # other special characters. # # This function returns the captured word string # and moves the $PTR pointer forward pointing to # the last letter of the word. When calling # this function, the $PTR pointer should point # to the first letter of the word. This function # also updates the value of $PREVTYPE which is a # global string variable that holds the previous # object's type. Its value can be NUMBER, WORD, # STRING, REGEX, etc... The $PREVTYPE value helps # us determine whether we must insert a space between # two objects or not. For example, two barewords # should be separated by a space. # # This function also resets the global $SPACE variable # to zero, which indicates whether two objects were # originally separated by a space or not. If we encounter # a whitespace after this bareword we just capture, then # the value of $SPACE will be set to 1. # # Usage: STRING = CaptureWord() # sub CaptureWord { # First, let's grab a test string that is long enough # to hold all the characters of the word we may find. # Note: The longest perl function name is 252 bytes. my $TEST = substr($DATA, $PTR, 512); my $WORD = ''; if ($TEST =~ m/^([a-zA-Z0-9\_]+)/) { $WORD = $1; if ($ALIGN_SUBS && $WORD eq 'sub') { # Insert a new line character in front of each sub declaration if (@OUTPUT && $OUTPUT[$#OUTPUT] ne "\n") { push(@OUTPUT, "\n"); } } push(@OUTPUT, $WORD); $PTR += length($WORD) - 1; } $PREVTYPE = 'WORD'; $PREVSYMB = 'x'; $SPACE = 0; return $WORD; } ################################################## # Perl | v2024.1.8 # This function captures a heredoc from start to # finish and sets the $PTR pointer to point to the # new line character that follows the ending pattern # of the heredoc. # # If this function successfully captured a heredoc, # it will return 1. If the text was not a heredoc # but a simple shift operator or less than sign, # then the function returns 0. # # Usage: INTEGER = CaptureHeredoc() # sub CaptureHeredoc { my $TERMINATOR = ''; my $PREFIX = ''; my $START = $PTR; # First we try to identify the terminator pattern. my $TEST = substr($DATA, $PTR, 512); # A heredoc must begin with: # 1) << '_+/Some \'code\' 123'; # 2) << "_+/Some \"code\" 123"; # 3) < To capture variables inside strings, we use the # ReplaceVarNameInsideString() function. # => To capture variables outside of strings, # we use the CaptureVariable() function. # # This function will also replace the variable # with a shorter name. # # Usage: CaptureVariable() # sub CaptureVariable { # Longest perl variable name is 252 bytes, # so we grab a chunk of code that is more than twice as long. # Why? Because we may encounter something like this in the wild: # $A_252_byte_long_variable_name[$Another_252_byte_long_variable_name] my $TEST = substr($DATA, $PTR, 512); my $STR = ''; if ($TEST =~ m/^([\$\@\%\#]{1,2}[a-zA-Z\_]{1}[a-zA-Z0-9\_]*)/) { $STR = $1; } # $AAA or $$AAA or $#AAA or @AAA elsif ($TEST =~ m/^([\$\@\%]{1}[0-9<>()!?^~=:;,.`'%@&#\-\+"\/\]\[\|\\]{1})/) { $STR = $1; } # Special variables: $% elsif ($TEST =~ m/^([\$\@\%]{1,2}[a-zA-Z]*[\{\[]{1}[a-zA-Z0-9\_]+[\}\]]{1})/) { $STR = $1; } # $AAA{BBB} or $AAA[123] elsif ($TEST =~ m/^([\$\@\%]{1,2}\^[a-zA-Z0-9\_]+\{[a-zA-Z0-9\_]+\})/) { $STR = $1; } # $^AAA{AAA} else { return 0; } # Not a variable? $PTR += length($STR) - 1; push(@OUTPUT, ReplaceVariable($STR)); $PREVTYPE = 'VAR'; $PREVSYMB = 'x'; $SPACE = 0; return 1; } ################################################## # Perl | v2024.1.9 # This function comes up with shorter variable name # for each variable name in the source script # unless that variable name happens to be a special # variable that should not be modified. # # The first argument must contain the name of the # variable preceded by its symbol. The following # variable formats are recognized: $ABC @ABC %ABC # $#ABC $$ABC $@ABC, etc... # # The function returns a new variable name with # the same prefix as the incoming variable. # Example: # $$myText => $$o # @list => @p # %state => %q # # Note: This function does not write to output; # it simply returns a new variable substitute # for an old one. # # Usage: NewName = ReplaceVariable(OldName) # sub ReplaceVariable { my $FULLNAME = shift; # Exit here if we don't want to shorten any variables: $OBFUSCATE_VARIABLES or return $FULLNAME; my $NAME = ''; my $PREFIX = ''; if ($FULLNAME =~ m/^([\$\%\@\#]{1,2})([a-zA-Z0-9\_]+)/) { $PREFIX = $1; $NAME = $2; } if (length($NAME) == 0 || length($PREFIX) == 0) { return $FULLNAME; } # It's not a variable. # Is this old variable name already in our dictionary? if (exists($VARS{$NAME})) { return $PREFIX . $VARS{$NAME}; } # Look it up and return it # Is this variable a special variable that should not be renamed? if (index($SPECIAL_VARIABLES, " $NAME ") >= 0) { return $FULLNAME; } # YES - Return unchanged. # Let's pick a new variable name: my $COUNT = scalar keys %VARS; # Let's see how many variables we use rigth now. if (@NEWVARS < $COUNT) # Oops. We ran out of short variable names. { print "\nWarning: Ran out of variable names! Please increase the value of \$MAXVARS\n"; return $FULLNAME; } my $NEW = $NEWVARS[$COUNT]; # Get new variable name $VARS{$NAME} = $NEW; # Write into our dictionary: old name => new name return $PREFIX . $NEW; # Return new variable } ################################################## # Perl | v2024.1.8 # This function captures a variable name inside # a double-quoted string and replaces it with # a shorter variable name. # # The only argument required is the string that # has been captured so far. This function will append # this string with a short variable name and also # increment the global $PTR pointer, so it will # point to the last letter of the variable name. # # Usage: STRING = ReplaceVarNameInsideString(STRING) # sub ReplaceVarNameInsideString { my $TEST = substr($DATA, $PTR, 512); # Detect variable names with an index number such as $var[3] or $$var[3] if ($TEST =~ m/^([\$\@\%]{1,2}[a-zA-Z0-9\_]+)(\[[0-9]+\])/) { my $A = $1; my $X = $2; $PTR += length($A . $X) - 1; $A = ReplaceVariable($A); $_[0] .= $A . $X; return $A . $X; } # Detect variable names with a variable as the index such as $A[$B] or $$A[$$B] if ($TEST =~ m/^([\$\@\%]{1,2}[a-zA-Z0-9\_]+)\[([\$]{1,2}[a-zA-Z0-9\_]+)\]/) { my $A = $1; my $B = $2; $PTR += length($A . $B) + 1; $A = ReplaceVariable($A); $B = ReplaceVariable($B); $_[0] .= $A . '[' . $B . ']'; return $A . '[' . $B . ']'; } # Detect variable names enclosed with braces such as ${ABC} if ($TEST =~ m/^([\$\@\%\#]{1,2})\{([a-zA-Z0-9\_]+)\}/) { my $PREFIX = $1; my $NAME = $2; my $VAR = $PREFIX . $NAME; $PTR += length($VAR) + 1; $NAME = substr(ReplaceVariable($VAR), 1); $_[0] .= $PREFIX . '{' . $NAME . '}'; return $PREFIX . '{' . $NAME . '}'; } # Detect simple variable names such as $ABC or $$ABC or $#ABC if ($TEST =~ m/^([\$\@\%\#]{1,2}[a-zA-Z\_]{1}[a-zA-Z0-9\_]*)/) { my $VAR = $1; $PTR += length($VAR) - 1; $VAR = ReplaceVariable($VAR); $_[0] .= $VAR; return $VAR; } # Detect special 2-letter variable names such as $' or $[ or $3 if ($TEST =~ m/^(\$[0-9\<\>\(\)!?~=:;,.`'%@&#\+\_\[\]\|\/\\]{1})/) { my $VAR = $1; $PTR += length($VAR) - 1; $_[0] .= $VAR; return $VAR; } # Detect special variables that start with $^ if ($TEST =~ m/^(\$\^[a-zA-Z0-9\_]+)/) { my $VAR = $1; $PTR += length($VAR) - 1; $_[0] .= $VAR; return $VAR; } return ''; } ################################################## # Perl | v2024.1.10 # This function captures the end of the script # which begins with the word __DATA__. # # Usage: CaptureTail() # sub CaptureTail { pop(@OUTPUT); push(@OUTPUT, "\n__DATA__"); push(@OUTPUT, substr($DATA, $PTR)); return 1; } ################################################## # Perl | v2024.1.5 # This function captures a comment. The $PTR pointer # must point to the # character when calling this # function. The $PTR pointer will point to the \n # character when this function exits. # # Usage: CaptureComment() # sub CaptureComment { my $End = index($DATA, "\n", $PTR); # Find end of line if ($End < 0) { $PTR = length($DATA); return 1; # End of File } my $COMMENT = substr($DATA, $PTR, $End - $PTR); $PTR = $End; # We delete all comments except the first line. if (@OUTPUT == 0 && $COMMENT =~ m/^\#\!\//) { push(@OUTPUT, $COMMENT . "\n"); } $PREVWORD = ''; $PREVSYMB = 'x'; $SPACE = 1; return 1; } ################################################## # Perl | v2024.1.8 # This function captures a literal string within the # script and outputs one block of string enclosed with # quotation marks or backticks. This function also # increments the global $PTR pointer, so when this # function returns, $PTR will point to the # string's closing quote character. # # Usage: CaptureString() # sub CaptureString { # Grab first character of string which is the quotation mark: my $QT = substr($DATA, $PTR, 1); my $STRING = $QT; my $BACKSLASH = 0; # Count number of consecutive \\ characters for ($PTR++; $PTR < length($DATA); $PTR++) { my $C = substr($DATA, $PTR, 1); # Get next character # Fix line break within a string. If a string stretches across # multiple lines, we'll merge it into a single line and add # "\n" characters where needed. if ($C eq "\n") { # If the line ends with a lonely backslash character, # then we just put "n" after it, but otherwise we add "\n" $STRING .= ($BACKSLASH & 1) ? 'n' : '\\n'; $BACKSLASH = 0; next; } # We also replace variable names inside strings. # First we must verify three things: # 1) The mention of a variable must occur within double-quoted string or backticks # 2) The number of preceding consecutive backslash characters must be an even number # 3) The mention of a variable must begin with one of these characters: $ @ % if (($QT eq '"' || $QT eq '`') && ($BACKSLASH & 1) == 0 && index('$@', $C) >= 0) { ReplaceVarNameInsideString($STRING); next; } $STRING .= $C; # Add this character # We reach the end of string when the current character # matches the opening quote character: if ($C eq $QT) { if (($BACKSLASH & 1) == 0) { # Under normal circumstances, we exit here. push(@OUTPUT, $STRING); $PREVTYPE = 'STR'; $PREVSYMB = 'x'; $SPACE = 0; return 1; } } # Keep count of consecutive \\ characters: if ($C eq '\\') { $BACKSLASH++; } else { $BACKSLASH = 0; } } # If the string was never terminated, then we exit here. push(@OUTPUT, $STRING); print "\nWarning: Unterminated string constant.\n"; return 1; } ################################################## # Perl | v2024.1.8 # This function generates N number of short variable # names and returns the list. These names will be # used to replace longer variable names in the script. # # Usage: LIST = GenerateShortVariableNames(N) # sub GenerateShortVariableNames { my $COUNT = shift; my @VARLIST = ('a'..'z', 'A'..'Z', '_', 0..9); my $S = join('', @VARLIST); my $END2 = length($S); my $END1 = $END2 - 10; my $NAME; @VARLIST = (); for (my $i = 0; $i < $END1; $i++) { $NAME = substr($S, $i, 1); if (index($SPECIAL_VARIABLES, " $NAME ") >= 0) { next; } # Skip push(@VARLIST, $NAME); if (@VARLIST >= $COUNT) { return @VARLIST; } } for (my $i = 0; $i < $END1; $i++) { for (my $j = 0; $j < $END2; $j++) { $NAME = substr($S, $i, 1) . substr($S, $j, 1); if (index($SPECIAL_VARIABLES, " $NAME ") >= 0) { next; } # Skip push(@VARLIST, $NAME); if (@VARLIST >= $COUNT) { return @VARLIST; } } } for (my $i = 0; $i < $END1; $i++) { for (my $j = 0; $j < $END2; $i++) { for (my $k = 0; $k < $END2; $j++) { $NAME = substr($S, $i, 1) . substr($S, $j, 1) . substr($S, $k, 1); if (index($SPECIAL_VARIABLES, " $NAME ") >= 0) { next; } # Skip push(@VARLIST, $NAME); if (@VARLIST >= $COUNT) { return @VARLIST; } } } } return @VARLIST; } ################################################## # File | v2024.1.9 # This function reads an entire file in binary mode # using the sysopen() and sysread() functions. # # Usage: STRING = ReadTheEntireFile(FILENAME) # sub ReadTheEntireFile { my $F = defined $_[0] ? $_[0] : ''; $F =~ tr`<>*?\"\|\x00-\x1F``d; # Remove illegal characters. length($F) or return ''; # Missing file name? -e $F and -f $F or return ''; # Make sure file exists. my $SIZE = -s $F; # Get file size. $SIZE or return ''; # Return zero bytes. my $BUF = ''; # Read buffer local *FILE; sysopen(FILE, $F, 0) or return ''; # Open file for read only. my $L = sysread(FILE, $BUF, $SIZE); # Read the entire file close FILE; return $BUF; } ################################################## # File | v2023.12.27 # This function creates a file in binary mode and # writes some content into it. If the file has # already existed, the old content will be deleted # and replaced with the new content. # Returns 1 on success or 0 if something went wrong. # # This function can have two or more arguments. # The first argument must be the file name, followed # by string(s) to be written to the file in binary # mode starting at the beginning of the file. # # Usage: STATUS = CreateFile(FILENAME, [STRINGS...]) # sub CreateFile { defined $_[0] or return 0; my $F = shift; # Remove illegal characters from file name: $F =~ tr`<>*?\"\|\x00-\x1F``d; length($F) or return 0; # No file name? local *FILE; open(FILE, ">$F") or return 0; # Create the file. binmode FILE; foreach (@_) { defined $_ and length($_) and print FILE $_; } close FILE; -e $F or return 0; # File exists? -f $F or return 0; # It's a plain file? return 1; } ################################################## # Array | v2024.1.9 # This function shuffles an array and returns zero. # Updates the original array! # # Usage: Shuffle(ARRAY) # sub Shuffle { for (my $i = 0; $i < @_; $i++) { my $R = int(rand(@_)); @_[$R, $i] = @_[$i, $R]; } return 0; } ################################################## # String | v2018.6.5 # This function inserts commas into a number at # every 3 digits and returns a string. # # This one-liner was originally written by # Martin Fabiani (strat) and was posted on the PerlMonks # website at www.PerlMonks.org/?node_id=157725 # # Usage: STRING = Commify(INTEGER) # sub Commify { my $N = reverse $_[0]; $N =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $N; } ##################################################