Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

Finding file level lexical variables

by johndeighan (Novice)
on May 25, 2016 at 15:35 UTC ( [id://1164089]=perlquestion: print w/replies, xml ) Need Help??

johndeighan has asked for the wisdom of the Perl Monks concerning the following question:

Our mod_perl application is sometimes running out of memory. So, I want to create a function to track down all variables outside of functions and check their memory usage using Devel::Size. I'm using Package::Stash to find all package variables (declared using 'our'), but now I want to also find all lexical variables (declared with 'my') which are at the file, i.e. top, level in all packages. I know about PadWalker but don't know how to use it - or anything else - to find and get the memory used by these top level lexical variables.

UPDATE: Here is a module that I wrote that is giving me the information that I needed. Hope it works for someone:
# SimpleMemoryMap.pm

=head1 NAME

	SimpleMemoryMap - Return map of memory used for all package globals

=head1 SYNOPSIS

	use SimpleMemoryMap;

	# --- $hMap is { <package> => <var> => <nbytes> }
	my $hMap = GetMemoryMap({MinBytes => 40960}, qw(
		PackageA
		PackageB
		));

=head1 AUTHOR

	John Deighan <F<jdeighan@pcgus.com>>

=cut

package SimpleMemoryMap;

use strict;
use warnings;

use Exporter;
use base 'Exporter';
our @EXPORT = qw(GetMemoryMap);

our $hSigil = {
	SCALAR => '$',
	HASH => '%',
	ARRAY => '@',
	CODE => '&',
	};
our $decplaces = 1;

# ----------------------------------------------------------------------

sub GetMemoryMap { my($hOptions, @packages) = @_;
	# --- Returns either a hashref: { <varname> => {
	#                                   Shared => <boolean>,
	#                                   Size => <numBytes>,
	#                                   DeclaredAs => <my_or_our>,
	#                                   }}
	#        or an error message (with embedded \n for multiple errors)
	# --- NOTE: $hOptions is optional. If the first parameter is a string,
	#           it will be added to @packages
	#     Valid $hOptions:
	#        MinBytes - ignore variables with less than this size
	#           Will default to 4096 if not specified
	#        hOwners - { <varname> => <pkg>, ... }
	#           Any variable named <varname> only included for package <pkg>

# --- Make sure we can load all needed introspection packages
#     If any of these packages are missing, return an error string
my @errors = LoadNeededPackages();
return join("\n", @errors) if (@errors > 0);

if (defined($hOptions) && !ref($hOptions)) {
	unshift(@packages, $hOptions);
	$hOptions = undef;
	}

return "No packages specified" if (@packages == 0);

my $minBytes = $hOptions->{MinBytes};
$minBytes = 4096 if !defined($minBytes);
my $hOwners = $hOptions->{hOwners};

my $hMap;   # return value, if no errors

foreach my $pkg (@packages) {
	my $h;

	# --- Get the package's symbol table
	my $stash = Package::Stash->new($pkg);
	foreach my $type (qw(SCALAR HASH ARRAY)) {

		# --- Get all the variable names used in the package
		#     These names do not include the sigil, i.e. $, @, %, etc.
		my @vars = $stash->list_all_symbols($type);
		VAR: foreach my $varname (@vars) {
			my $nameWSigil = $hSigil->{$type} . $varname;

			# --- I don't know why total_size doesn't allow this
			#     but we don't want variables in other packages anyway
			next VAR if ($nameWSigil =~ /::/);

			# --- If a variable is defined and exported in a package
			#     then imported in a 2nd package, it will appear here
			#     for both packages. If you do that, you can prevent it
			#     showing up twice by specifying an owning package in
			#     $hOwners, and it will only show up for that package
			#     (it's a "best practice" to only export functions
			#     from a package, in which case this won't be needed)
			next VAR if $hOwners->{$nameWSigil}
			         && ($pkg ne $hOwners->{$nameWSigil});

			# --- Get a reference to the variable's value
			#     Then, ignore it if the value is undef
			my $valueref = $stash->get_symbol($nameWSigil);

			# --- dereference references to references
			while (ref($valueref) eq 'REF') {
				$valueref = $$valueref;
				}

			my($size, $shared) = calc_size($valueref);
			if ($size >= $minBytes) {
				$h->{$nameWSigil} = {
					Shared => $shared,
					Size => $size,
					DeclaredAs => 'our',
					};
				}
			}
		}

	# --- Find file-level 'my' variables

	my $hPkgMyVars = undef;

	# --- First, find all functions defined in this package
	my @funcs = $stash->list_all_symbols('CODE');
	FUNC: foreach my $funcname (@funcs) {
		my $nameWSigil = '&' . $funcname;
		my $funcref = $stash->get_symbol($nameWSigil);

		# --- Get the package that the function is actually defined in
		#     It won't be the current package if it was imported
		#     from a different package into this package
		my $stash_name = Sub::Identify::stash_name($funcref);
		next FUNC if ($stash_name ne $pkg);

		# --- Get all of the variables that are used in this function
		#     but not defined inside the function
		#     These will usually be file level 'my' variables,
		#     though they might not be if you have nested functions
		my $hMyVars = PadWalker::closed_over($funcref);
		next FUNC if !$hMyVars || (keys(%$hMyVars) == 0);

		MYVAR: foreach my $varname (keys(%$hMyVars)) {
			# --- Check if this variable has already been seen
			#     in which case we can skip it
			if (!exists($hPkgMyVars->{$varname})) {
				$hPkgMyVars->{$varname} = 1;  # mark it as seen
				my $valueref = $hMyVars->{$varname};

				# --- dereference references to references
				while (ref($valueref) eq 'REF') {
					$valueref = $$valueref;
					}

				if (ref($valueref) eq 'CODE') {
					# --- Can't calc size of $varname because it's 'CODE'
					next MYVAR;
					}
				elsif (ref($valueref) eq 'HASH') {
					foreach my $key (keys(%$valueref)) {
						if (ref($valueref->{$key}) eq 'CODE') {
							# --- Can't calc size of $varname because
							#    it contains a reference to 'CODE'
							next MYVAR;
							}
						}
					}
				elsif (ref($valueref) eq 'ARRAY') {
					foreach my $val (@$valueref) {
						if (ref($val) eq 'CODE') {
							# --- Can't calc size of $varname because
							#    it contains a reference to 'CODE'
							next MYVAR;
							}
						}
					}

				my($size, $shared) = calc_size($valueref);
				if ($size >= $minBytes) {
					$h->{$varname} = {
						Shared => $shared,
						Size => $size,
						DeclaredAs => 'my',
						};
					}
				}
			}
		}

	$hMap->{$pkg} = $h if defined($h);
	}
return (@errors == 0) ? $hMap : join("\n", @errors);
} # GetMemoryMap()

# ----------------------------------------------------------------------

sub LoadNeededPackages {

my @errors;
foreach my $pkg (qw(
		Data::Dumper
		threads::shared
		Package::Stash
		Sub::Identify
		PadWalker
		Devel::Size
		)) {
	my $rc = eval("require $pkg; return 1;");
	if (!$rc) {
		push(@errors, "Required package $pkg not found");
		}
	}
no warnings qw(once);
$Devel::Size::warn = 0;
return @errors;
} # LoadNeededPackages()

# ----------------------------------------------------------------------

sub calc_size {

# --- total_size() does not correctly handle thread shared variables
my $shared = threads::shared::is_shared($_[0]);
my $size = !defined($_[0]) ? 0
         : $shared         ? sharedSize($_[0])
         :                   Devel::Size::total_size($_[0]);
return ($size, $shared);
} # calc_size()

# ----------------------------------------------------------------------

sub sharedSize {

require Data::Dumper;
require Devel::Size;

no warnings qw(once);
local $Data::Dumper::Purity = 1;

my $str = Data::Dumper::Dumper($_[0]);
my $VAR1;
my $rc = eval($str . "return 1;");
return 0 if !$rc;
my $nBytes = Devel::Size::total_size($VAR1);
return $nBytes;
} # sharedSize()

# ----------------------------------------------------------------------

1;

And, here is a test script. You'll note that it analyzes 2 of our proprietary modules. To use it you'll have to pass in the names of your own modules. If you want to find all modules, try using the function at the end, but I don't think it will handle module names with '::' in it:

# testSimpleMemoryMap.pl

use strict;
use warnings;
use Data::Dumper;

use SimpleMemoryMap qw(GetMemoryMap);
use UserTypeInfoCache;
use StudentInfoCache;

# --- This may return an error string ---
my $hMemoryMap = GetMemoryMap({MinBytes => 0}, qw(
	UserTypeInfoCache
	StudentInfoCache
	));

if (ref($hMemoryMap)) {
	$Data::Dumper::Indent = 1;
	$Data::Dumper::Sortkeys = 1;
	$Data::Dumper::Useqq = 1;
	print(Dumper($hMemoryMap));
	}
else {
	print("ERROR:\n", $hMemoryMap);
	}

# --------------------------------------------------

sub GetPackages {

my @packages;
foreach my $module (keys(%INC)) {
	if ($module =~ /^(A-Za-z0-9_-+)\.pm$/) {
		push(@packages, $1);
		}
	}
return @packages;
} # GetPackages()


And, Rolf... sorry about being a bit snotty with you below. I was having a bad day and felt offended by your suggestion that I didn't know what a closure was. I'm sure you only meant to help. Actually, what I didn't understand is how Perl starts up, especially module loading. I think I'm a bit clearer about that now.

Replies are listed 'Best First'.
Re: Finding file level lexical variables
by LanX (Saint) on May 25, 2016 at 15:50 UTC
    My first guess its to write a sub calling PadWalker's closed_over() to get a hash of all vars and their refs in the calling scope.

    Untested, bc I'm on mobile now.

    HTH :)

    update

    The docs are a bit unclear, probably you need to use peek_my(1) instead

    Cheers Rolf
    (addicted to the Perl Programming Language and ☆☆☆☆ :)
    Je suis Charlie!

      I think that should be peek_my(0):

      #! perl use strict; use warnings; use feature qw( state ); use PadWalker qw( peek_my ); our $fred = 'Flintstone'; my $foo = 17; my $quux = bar(); printf "Sum is %d\n", $foo + $$quux; sub bar { state $c = 5; my $baz = 42; return \$baz; } my $h_ref = peek_my(0); print "$_\n" for keys %$h_ref;

      Output:

      2:00 >perl 1643_SoPW.pl Sum is 59 $quux $foo 2:00 >

      Hope that helps,

      Athanasius <°(((><contra mundum Iustus alius egestas vitae, eros Piratica,

        well I meant peek_my(1) to be called from a sub to avoid cluttering the top scope with introspection code. :)

        Cheers Rolf
        (addicted to the Perl Programming Language and ☆☆☆☆ :)
        Je suis Charlie!

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: perlquestion [id://1164089]
Approved by Athanasius
Front-paged by LanX
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others taking refuge in the Monastery: (4)
As of 2024-03-19 04:54 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found