#!/usr/bin/perl

#####################################################################
#
#  COUNTLINES v1.1                             Last Update: 2023.1.26
#
#  This program reads a Perl script and prints the number of lines
#  of code, number of blank lines, number of semicolons, number of
#  subs, number of comment lines, average line length, etc.
#
#  Written by Zsolt N Perry in Dec. 2022. For questions, comments,
#  feature requests, or bug reports, write to zsnp@juno.com.
#  This Perl script was tested with TinyPerl 5.8 under Windows XP.
#  This file was downloaded from http://www.wzsn.net/perl
#
#               THIS ENTIRE SOURCE CODE IS FREEWARE.
#  If you want to incorporate it or parts of it into your program,
#  just copy and paste whatever you need! There is no need to ask
#  for permission. This software is distributed "AS IS." There is
#  no warranty of any kind. The author will not be held liable for
#  any loss resulting from the use or misuse of this software.
#
#####################################################################

use 5.004;
use strict;
use warnings;
$| = 1;

my $F = "countlines.pl";    # Input file name

my $SEMICOLONS      = 0;
my $BLANK_LINES     = 0;
my $COMMENT_LINES   = 0;
my $LINES_OF_CODE   = 0;
my $CODE_SIZE       = 0;
my $NUMBER_OF_SUBS  = 0;
my $TOTAL_LINES     = 0;
my $LONGEST_SUBNAME = 0;

# Longest line of code:
my $LONGEST_LINE      = 0;   # Length
my $LONGEST_LINENUM   = -1;  # Line number
my $LONGEST_LINESTR   = '';  # Actual line (text)

############################################################
# Read entire Perl script.

$F =~ tr/\x00-\x1F\"\|*%$?<>//d;   # Remove illegal characters.
-e $F or ERROR("File doesn't exist - $F");
-f $F or ERROR("Not a plain file - $F");
my $FILESIZE = -s $F;      # Get file size.

local *FILE;
my $BUFFER = '';
sysopen(FILE, $F, 0) or ERROR("Cannot read file - $F");
binmode FILE;
sysread(FILE, $BUFFER, $FILESIZE);
close FILE;

############################################################
# Analyze code...

# First, we remove all binary characters from the input
# file content, and then we count the length again to
# see how many characters were removed:
my $TOTALBYTES = length($BUFFER);
$BUFFER =~ tr|\x0B\x0C\x00-\x08\x0E-\x1F\x7F-\xFF||d;
my $BINLEN = $TOTALBYTES - length($BUFFER);

my @LINES = split(/\n/, $BUFFER);
undef $BUFFER;

print "\n\nFile: $F";
my @ALLSUBS;
foreach (@LINES)
{
  my $LINE = Trim($_);
  $TOTAL_LINES++;

  my $LL = length($LINE);

  # Skip blank lines
  if ($LL == 0)
    { $BLANK_LINES++; next; }

  # Skip comment lines
  if (substr($LINE, 0, 1) eq '#')
    { $COMMENT_LINES++; next; }

  # Find the longest line.
  if ($LONGEST_LINE < $LL)
  {
    $LONGEST_LINE = $LL;
    $LONGEST_LINENUM = $TOTAL_LINES;
    #$LONGEST_LINESTR = $LINE;   # We don't need to know...
  }

  $LINES_OF_CODE++;   # Count lines of code
  $CODE_SIZE += $LL;  # Count bytes of code
  $SEMICOLONS += $LINE =~ tr|;|;|;  # Count semicolons

  # Count sub-routines
  if (substr($LINE, 0, 4) eq 'sub ')
  {
    $NUMBER_OF_SUBS++;
    my $SUBNAME = substr($LINE, 4);
    if ($SUBNAME =~ m/([a-zA-Z0-9\_\:]+)/)
    {
      push(@ALLSUBS, $1);
      if (length($1) > $LONGEST_SUBNAME)
        { $LONGEST_SUBNAME = length($1); }
    }
  }
}

############################################################
# Print all subs

# Sort subs in alphabetical order:
@ALLSUBS = sort(@ALLSUBS);
# Sort subs by name length:
@ALLSUBS = sort { length($a) <=> length($b) } @ALLSUBS;

my $COUNTER = 0;
my $SPACING = 6;
my $COLUMNS = ($LONGEST_SUBNAME < 30) ? 3 : 2;
if ($LONGEST_SUBNAME >= 40) { $COLUMNS = 1; }
my $ROWS = int(@ALLSUBS / $COLUMNS + 0.5);
my @OUTPUT = ('') x $ROWS;
for (my $c = 0; $c < $COLUMNS; $c++)
{
  my @SUBLIST;
  my $LONGEST_OUTPUT_LINE = 0;
  my $SAVE_COUNTER = $COUNTER;
  for (my $r = 0; $r < $ROWS; $r++)
  {
    if ($COUNTER >= @ALLSUBS) { $r = $ROWS; next; }
    my $LINE = $ALLSUBS[$COUNTER++];
    if (length($LINE) > $LONGEST_OUTPUT_LINE)
    { $LONGEST_OUTPUT_LINE = length($LINE); }
    $SUBLIST[$r] = $LINE;
  }
  $COUNTER = $SAVE_COUNTER;
  @SUBLIST = sort(@SUBLIST);
  for (my $r = 0; $r < $ROWS; $r++)
  {
    $COUNTER++;
    my $OFFSET = $SPACING - length($COUNTER);
    $OUTPUT[$r] .= (' ' x $OFFSET) . "$COUNTER. $SUBLIST[$r]" .
            (' ' x ($LONGEST_OUTPUT_LINE - length($SUBLIST[$r])));
    if ($COUNTER >= @ALLSUBS) { $r = $ROWS; next; }
  }
}
print "\n\nList of all subs:\n", '-' x 78, "\n", join("\n", @OUTPUT);

############################################################
# Print code statistics

my $AVG_LEN = int($CODE_SIZE / $LINES_OF_CODE);
my $BINPERCENT = int(($BINLEN / $TOTALBYTES * 100));
my $TXTPERCENT = 100 - $BINPERCENT;

print "\n", '-' x 78, "\n",
      "\n * File size in bytes .......", NUM($FILESIZE),
      "  ( $TXTPERCENT% text + $BINPERCENT% binary )",
      "\n * Bytes of code ............", NUM($CODE_SIZE),
          PercentOf($FILESIZE, $CODE_SIZE),
      "\n\n * Number of total lines ....", NUM($TOTAL_LINES),
          '  (100%)',
      "\n * Number of comment lines ..", NUM($COMMENT_LINES),
          PercentOf($TOTAL_LINES, $COMMENT_LINES),
      "\n * Number of lines of code ..", NUM($LINES_OF_CODE),
          PercentOf($TOTAL_LINES, $LINES_OF_CODE),
      "\n * Number of semicolons .....", NUM($SEMICOLONS),
          PercentOf($TOTAL_LINES, $SEMICOLONS),
      "\n * Number of blank lines ....", NUM($BLANK_LINES),
          PercentOf($TOTAL_LINES, $BLANK_LINES),
      "\n * Number of subs ...........", NUM($NUMBER_OF_SUBS),
      "\n * Average code line length .", NUM($AVG_LEN),
      "\n * Longest line of code .....", NUM($LONGEST_LINE);

print "\n\nThe longest line of code is found on line # ",
      $LONGEST_LINENUM, "\n\n";
exit;

##################################################
sub ERROR
{
  my $MSG = defined $_[0] ? $_[0] : '';
  if (length($MSG)) { print "\nERROR: $MSG\n"; }
  exit(1);
}
##################################################
sub NUM
{
  my $N = defined $_[0] ? Commify($_[0]) : 0;
  $N = ' ' . $N;
  while (length($N) < 11) { $N = '.' . $N; }
  return $N;
}
##################################################
#
# Usage: STRING = PercentOf(TOTAL, N)
#
sub PercentOf
{
  my $TOTAL = defined $_[0] ? $_[0] : 0;
  my $N = defined $_[1] ? $_[1] : 0;
  if ($TOTAL == 0) { return ''; }
  my $OUTPUT = int($N / $TOTAL * 100);
  while (length($OUTPUT) < 3) { $OUTPUT = ' ' . $OUTPUT; }
  return '  (' . $OUTPUT . '%)';
}
##################################################
#                             String | v2022.10.22
# This function removes whitespace from the left
# and right side of STRING. Whitespace is here defined
# as any character whose ASCII value is less than 33.
# This includes SPACE, TAB, ESC, NULL, CR, LF, etc...
# Usage: STRING = Trim(STRING)
#
sub Trim
{
  my $P = 0;
  (my $L = defined $_[0] ? length($_[0]) : 0) || return '';
  while ($P <= $L && ord(substr($_[0], $P++, 1)) < 33) {} $P--;
  while ($P <= $L && ord(substr($_[0], $L--, 1)) < 33) {}
  return substr($_[0], $P, $L - $P + 2);
}
##################################################
#
# This function inserts commas into a number at
# every 3 digits and returns a string.
# Usage: STRING = Commify(INTEGER)
# Copied from www.PerlMonks.org/?node_id=157725
#
sub Commify
{
  my $N = reverse $_[0];
  $N =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
  return scalar reverse $N;
}
##################################################