Files
development/bin/Progress.pm
Clemens Schwaighofer 50db770992 Move the convert config setting to config other
the convert constant setting for inmage magick is now in config.other as
it is no longer core

Also add the base Progress and functions perl modules for central
tagging.

We will add config.pm, import_functions.pm, layout.pm and a basic test
script later too
2019-12-11 18:02:42 +09:00

499 lines
17 KiB
Perl

package Progress;
# AUTHOR: Clemens Schwaighofer
# DATE CREATED: 2009/6/16
# DESCRIPTION: progress percent class
# METHODS
# * init
# my $prg = Progress->new();
# will init a new progress class in the var $prg
# the following parameters can be set directly during a new call
# - verbose (1/0)
# - precision (-1~10)
# - wide_time (0/1)
# - microtime (0/1)
# setting is done via
# my $prg = Progress->new(verbose => 1, microtime = 1);
# * setting methods
# verbose($level int)
# $level has to be int, if not set there is no output show, at least 1 has to be given to see visible output
# precision($decimals int)
# $decimals has to be int, if set to -1 then the steps are done in 10 increase, else it sets how many decimals are visible, 0 for no decimals
# wide_time(0/1 int)
# sets the flag for wide time, if set to 1 the estimated time to end and time run is left prefixed with 15 chars
# microtime(0/1 int)
# sets the flag to always show microtime (1) or only if the previous time was the same (0)
# reset()
# resets all the internal vars for another new run
# SetStartTime(optional timestamp)
# sets the start times for this progress run, the overall start/end time is set, and the time used for the actual progress
# in case there is some processing done before the run starts, it is highly recommended to call SetETAStartTime before the actual processing starts
# if no timestamp is given, internal timestamp is used (this is recommended)
# SetETAStartTime(optional timestamp)
# only sets the start/end time for the actual "estimated time" calculation. It is recommended to call this right before the processing loop starts
# eg if there is a big query running that takes a lot of time, this method should be called before the reading loop
# as with SetStartTime a timestamp can be given, if not then the internal timestamp is used (this is recommended)
# SetEndTime(optional timestamp)
# sets the end time for the overall processing. This should be called at the very end of the script before any final stat data is printed
# linecount($lines int)
# sets the maximum lines that will be processed, used for percentage calculation. If non int is given, will set to 1. This will be only set once, to
# reset used reset() method.
# Either this or filesize NEED to be set
# filesize($bytes int)
# filesize in bytes, if non valid data is given, then it is set to 1.
# filesize() and linecount() can both be set, but at least one of them has to be set.
# if filesize is set a byte data output is added, if only linecount is given, only the linecount output will be given (no bytes per second, etc)
# ShowPosition(optional current byte position int)
# this is the main processing and has to be called at the end of the loop where the data is processed. If no bytes are given the internal counter (linecount)
# is used.
# for bytes it is recommended to use IO::File and $FH->tell to pass on the bytes
#
# VARIABLES
# * internal set
# change: flagged 1 if output is given or would be given. can be used for any post processing after the ShowPosition is called
# precision_ten_step: flagged 1 if the precision was set to -1
# start: overall start time
# end: overall end time
# count: count of processed lines
# [TODO: describe the others too, at the moment only below in %fields]
use strict;
use warnings;
use utf8;
BEGIN {
use POSIX;
use Carp;
use Time::HiRes qw(time);
use File::Basename;
use Number::Format qw(format_number);
use vars qw($AUTOLOAD);
push(@INC, File::Basename::dirname($0).'/');
}
# important includes
use functions;
# variable declarationf or access
# * can be set
# = only for read
# unmarked are internal only, but can be read if they are needed in further processing in the script
my %fields = (
linecount => 0, # * max lines in input
filesize => 0, # * max file size
precision => 1, # * comma after percent
wide_time => 0, # * if flagged 1, then the wide 15 char left bound format is used
verbose => 0, # * verbose status from outside
microtime => 0, # * microtime output for last run time (1 for enable, 0 for auto, -1 for disable)
change => 0, # = flag if output was given
start => undef, # = global start for the full script running time
start_run => undef, # = for the eta time, can be set after a query or long read in, to not create a wrong ETA time
start_time => undef, # loop start
end => undef, # = global end
end_time => undef, # loop end
count_size => undef, # = filesize current
count => 0, # = position current
current_count => 0, # last count (position)
lines_processed => 0, # lines processed in the last run
last_group => 0, # time in seconds for the last group run (until percent change)
lines_in_last_group => 0, # float value, lines processed per second to the last group run
lines_in_global => 0, # float values, lines processed per second to complete run
bytes_in_last_group => 0, # flaot value, bytes processes per second in the last group run
bytes_in_global => 0, # float value, bytes processed per second to complete run
size_in_last_group => 0, # bytes processed in last run (in bytes)
current_size => 0, # current file position (size)
last_percent => 0, # last percent position
precision_ten_step => 0, # if we have normal % or in steps of 10
percent_print => 5, # the default size, this is precision + 4
percent_precision => 1, # this is 1 if it is 1 or 0 for precision, or precision size
eta => undef, # estimated time to finish
full_time_needed => undef, # run time since start
lg_microtime => 0 # last group microtime, this is auto set during process.
);
# class init
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my %data = @_;
my $self = {
_permitted => \%fields,
%fields,
};
# vars to init
bless ($self, $class);
if ($data{'verbose'} && $data{'verbose'} =~ /^\d{1}$/) {
$self->{verbose} = $data{'verbose'};
}
if (exists($data{'precision'}) && (($data{'precision'} || $data{'precision'} == 0) && $data{'precision'} =~ /^\-?\d{1,2}$/)) {
$self->precision($data{'precision'});
}
if ($data{'microtime'} && $data{'microtime'} =~ /^(0|1)$/) {
$self->microtime($data{'microtime'});
}
if ($data{'wide_time'} && $data{'wide_time'} =~ /^(0|1)$/) {
$self->wide_time($data{'wide_time'});
}
return $self;
}
# auto load for vars
sub AUTOLOAD
{
my $self = shift;
my $type = ref($self) || croak "$self is not an object";
my $name = $AUTOLOAD;
$name =~ s/.*://;
unless (exists $self->{_permitted}->{$name}) {
croak "Can't access '$name' field in class $type";
}
if (@_) {
return $self->{$name} = shift;
} else {
return $self->{$name};
}
}
# destructor
sub DESTROY
{
# do nothing, there is nothing to close or finish
}
# SUB: reset
# PARAMS: none
# DESC: resets all the current counters only and current start times
sub reset
{
my $self = shift;
# reset what always gets reset
$self->{count} = 0;
$self->{count_size} = undef;
$self->{current_count} = 0;
$self->{linecount} = 0;
$self->{lines_processed} = 0;
$self->{last_group} = 0;
$self->{lines_in_last_group} = 0;
$self->{lines_in_global} = 0;
$self->{bytes_in_last_group} = 0;
$self->{bytes_in_global} = 0;
$self->{size_in_last_group} = 0;
$self->{filesize} = 0;
$self->{current_size} = 0;
$self->{last_percent} = 0;
$self->{eta} = 0;
$self->{full_time_needed} = 0;
$self->{start_run} = undef;
$self->{start_time} = undef;
$self->{end_time} = undef;
}
# SUB: microtime
# PARAMS: 1/0
# DESC: flag to set microtime on or off in the time output
# if not 1 or 0, set to 0
sub microtime
{
my $self = shift;
my $microtime;
if (@_) {
$microtime = shift;
if ($microtime == 1 || $microtime == 0) {
$self->{microtime} = $microtime;
} else {
$self->{microtime} = 0;
}
}
return $self->{microtime};
}
# SUB: wide_time
# PARAMS: 1/0
# DESC: flag to set wide_time (15 char spacer).
# if not 1 or 0, set to 0
sub wide_time
{
my $self = shift;
my $wide;
if (@_) {
$wide = shift;
if ($wide == 1 || $wide == 0) {
$self->{wide_time} = $wide;
} else {
$self->{wide_time} = 0;
}
}
return $self->{wide_time};
}
# SUB: precision
# PARAMS: precision in int
# DESC: sets the output percent precision calculation and printf width
# if negative, to ten step, if bigger 10, set to one
sub precision
{
my $self = shift;
my $comma;
if (@_) {
$comma = shift;
$comma = 0 if ($comma !~ /^\-?\d{1,}$/);
if ($comma < 0) {
# -2 is 5 step
# -1 is 10 step
if ($comma < -1) {
$self->{precision_ten_step} = 5;
} else {
$self->{precision_ten_step} = 10;
}
$self->{precision} = 0; # no comma
$self->{percent_precision} = 0; # no print precision
$self->{percent_print} = 3; # max 3 length
} else {
$self->{precision} = $comma < 0 || $comma > 10 ? 10 : $comma;
$self->{percent_precision} = $comma < 0 || $comma > 10 ? 10 : $comma;
$self->{percent_print} = ($comma == 0 ? 3 : 4) + $self->{percent_precision};
}
}
return $self->{precision};
}
# SUB: linecount
# PARAMS: max number of lines to be processed
# DESC: sets the max number for lines for the percent calculation, if negative or not number, set to 1
# can only be set ONCE
sub linecount
{
my $self = shift;
my $linecount;
if (!$self->{linecount}) {
if (@_) {
$linecount = shift;
$self->{linecount} = $linecount;
$self->{linecount} = 1 if ($linecount < 0 || $linecount !~ /\d+/)
}
}
return $self->{linecount};
}
# SUB: filesize
# PARAMS: max filesize for the to processed data
# DESC: sets the max filesize for the to processed data, if negative or not number, set to 1
# input data has to be in bytes without any suffix (no b, kb, etc)
# can only be set ONCE
sub filesize
{
my $self = shift;
my $filesize;
if (!$self->{filesize}) {
if (@_) {
$filesize = shift;
$self->{filesize} = $filesize;
$self->{filesize} = 1 if ($filesize < 0 || $filesize !~ /\d+/)
}
}
return $self->{filesize};
}
# SUB: SetStartTime
# PARAMS: time, or nothing
# DESC: sets all the start times
sub SetStartTime
{
my $self = shift;
if (@_) {
$self->{start} = shift;
} else {
$self->{start} = time();
}
$self->{start_time} = $self->{start};
$self->{start_run} = $self->{start};
}
# SUB: SetETAStartTime
# PARAMS: time, or nothing
# DESC: sets the loop & run time, for correct ETA callculation
sub SetETAStartTime
{
my $self = shift;
if (@_) {
$self->{start_time} = shift;
} else {
$self->{start_time} = time();
}
$self->{start_run} = $self->{start_time};
}
# SUB: SetEndTime
# PARAMS: time, or nothing
# DESC: sets the end time for running time calculation
sub SetEndTime
{
my $self = shift;
if (@_) {
$self->{end} = shift;
} else {
$self->{end} = time();
}
}
# SUB: ShowPosition
# PARAMS: optiona; file position (via file pointer)
# RETURN: string for percent position output
# DESC: calculates the current percent position based on the passed parameter, if no parameter uses intneral counter
sub ShowPosition
{
my $self = shift;
# set local vars
my $percent; # current percent
my $full_time_needed; # complete process time
my $full_time_per_line; # time per line
my $eta; # estimated end time
my $string = ''; # percent string that gets output
my $show_filesize = 1;
# microtime flags
my $eta_microtime = 0;
my $ftn_microtime = 0;
my $lg_microtime = 0;
# percent precision calc
my $_p_spf = "%.".$self->{precision}."f";
# output format for percent
my $_pr_p_spf = "%".$self->{percent_print}.".".$self->{percent_precision}."f";
# set the linecount precision based on the final linecount, if not, leave it empty
my $_pr_lc = "%s";
$_pr_lc = "%".length(format_number($self->{linecount}))."s" if ($self->{linecount});
# time format, if flag is set, the wide format is used
my $_pr_tf = "%s";
$_pr_tf = "%-15s" if ($self->{'wide_time'});
# do the smae for file size
# my $_pr_fs = "%s";
# $_pr_fs = "%".length(function::convert_number($self->{filesize}))."s" if ($self->{filesize});
# increase position by one
$self->{count} ++;
# see if we get anything from IO tell
if (@_) {
$self->{file_pos} = shift;
} else {
# we did not, so we set internal value
$self->{file_pos} = $self->{count};
# we also check if the filesize was set now
if (!$self->{filesize}) {
$self->{filesize} = $self->{linecount};
}
# set ignore filesize output (no data)
$show_filesize = 0;
}
# set the count size based on the file pos, is only used if we have filesize
$self->{count_size} = $self->{file_pos};
# do normal or down to 10 (0, 10, ...) %
if ($self->{precision_ten_step}) {
# calc 0 comma precision, so just do a floor
my $_percent = sprintf("%d", ($self->{file_pos} / $self->{filesize}) * 100);
# mod that to 10
my $mod = $_percent % $self->{precision_ten_step};
# either write this one, or write the previous, old one
$percent = $mod == 0 ? $_percent : $self->last_percent;
# print "P: $percent, Last: ".$self->last_percent.", Mod: ".$mod.", Calc: ".$_percent."\n";
} else {
$percent = sprintf($_p_spf, ($self->{file_pos} / $self->{filesize}) * 100);
}
# print "POS: ".$self->{file_pos}.", PERCENT: $percent / ".$self->last_percent."\n";
if ($percent != $self->last_percent) {
$self->{end_time} = time();
# for from the beginning
$full_time_needed = $self->{end_time} - $self->{start_run}; # how long from the start;
$self->{last_group} = $self->{end_time} - $self->{start_time};
$self->{lines_processed} = $self->{count} - $self->{current_count};
# lines in last group
$self->{lines_in_last_group} = $self->{'last_group'} ? ($self->{lines_processed} / $self->{last_group}) : 0;
# lines in global
$self->{lines_in_global} = $full_time_needed ? ($self->{'count'} / $full_time_needed) : 0;
# if we have linecount
if (!$self->{linecount}) {
$full_time_per_line = (($full_time_needed) ? $full_time_needed : 1) / $self->{count_size}; # how long for all
$eta = $full_time_per_line * ($self->{filesize} - $self->{count_size}); # estimate for the rest
} else {
$full_time_per_line = (($full_time_needed) ? $full_time_needed : 1) / $self->{count}; # how long for all
$eta = $full_time_per_line * ($self->{linecount} - $self->{count}); # estimate for the rest
}
# just in case ...
$eta = '0' if ($eta < 0);
# check if to show microtime
# ON: if microtime is flagged as one
$eta_microtime = $ftn_microtime = $lg_microtime = 1 if ($self->{microtime} == 1);
# AUTO: foir microtime
if ($self->{microtime} == 0) {
$eta_microtime = 1 if ($eta > 0 && $eta < 1);
$ftn_microtime = 1 if ($full_time_needed > 0 && $full_time_needed < 1);
# pre check last group: if pre comma part is same add microtime anyway
$lg_microtime = 1 if ($self->{last_group} > 0 && $self->{last_group} < 1);
}
# print out
if ($show_filesize) {
# last group size
$self->{size_in_last_group} = $self->{count_size} - $self->{current_size};
# calc kb/s if there is any filesize data
# last group
$self->{bytes_in_last_group} = $self->{'last_group'} ? ($self->{size_in_last_group} / $self->{last_group}) : 0;
# global
$self->{bytes_in_global} = $full_time_needed ? ($self->{count_size} / $full_time_needed) : 0;
# only used if we run with file size for the next check
$self->{current_size} = $self->{count_size};
$string = sprintf(
"Processed ".$_pr_p_spf."%% [%s / %s] | ".$_pr_lc." / ".$_pr_lc." Lines | ETA: ".$_pr_tf." / TR: ".$_pr_tf." / LR: %s lines (%s) in %s, %s (%s) lines/s, %s (%s) b/s\n",
$percent,
function::convert_number($self->{count_size}),
function::convert_number($self->{filesize}),
format_number($self->{count}),
format_number($self->{linecount}),
function::convert_time($eta, $eta_microtime),
function::convert_time($full_time_needed, $ftn_microtime),
format_number($self->{lines_processed}),
function::convert_number($self->{size_in_last_group}),
function::convert_time($self->{last_group}, $lg_microtime),
format_number($self->{lines_in_global}, 2, 1),
format_number($self->{lines_in_last_group}, 2, 1),
function::convert_number($self->{bytes_in_global}),
function::convert_number($self->{bytes_in_last_group})
) if ($self->{verbose} >= 1);
} else {
$string = sprintf(
"Processed ".$_pr_p_spf."%% | ".$_pr_lc." / ".$_pr_lc." Lines | ETA: ".$_pr_tf." / TR: ".$_pr_tf." / LR: %s lines in %s, %s (%s) lines/s\n",
$percent,
format_number($self->{count}),
format_number($self->{linecount}),
function::convert_time($eta, $eta_microtime),
function::convert_time($full_time_needed, $ftn_microtime),
format_number($self->{lines_processed}),
function::convert_time($self->{last_group}, $lg_microtime),
format_number($self->{lines_in_global}, 2, 1),
format_number($self->{lines_in_last_group}, 2, 1)
) if ($self->{verbose} >= 1);
}
# write back vars
$self->{last_percent} = $percent;
$self->{eta} = $eta;
$self->{full_time_needed} = $full_time_needed;
$self->{lg_microtime} = $lg_microtime;
# for the next run, check data
$self->{start_time} = time();
$self->{current_count} = $self->{count};
# trigger if this is a change
$self->{change} = 1;
} else {
# trigger if this is a change
$self->{change} = 0;
}
return $string;
}
1;