Our mod_perl application is sometimes running out of memory. So, I want to create a function to track down all variables outside of functions and check their memory usage using Devel::Size. I'm using Package::Stash to find all package variables (declared using 'our'), but now I want to also find all lexical variables (declared with 'my') which are at the file, i.e. top, level in all packages. I know about PadWalker but don't know how to use it - or anything else - to find and get the memory used by these top level lexical variables.
UPDATE: Here is a module that I wrote that is giving me the information that I needed. Hope it works for someone:
# SimpleMemoryMap.pm
=head1 NAME
SimpleMemoryMap - Return map of memory used for all package globals
=head1 SYNOPSIS
use SimpleMemoryMap;
# --- $hMap is { <package> => <var> => <nbytes> }
my $hMap = GetMemoryMap({MinBytes => 40960}, qw(
PackageA
PackageB
));
=head1 AUTHOR
John Deighan <F<jdeighan@pcgus.com>>
=cut
package SimpleMemoryMap;
use strict;
use warnings;
use Exporter;
use base 'Exporter';
our @EXPORT = qw(GetMemoryMap);
our $hSigil = {
SCALAR => '$',
HASH => '%',
ARRAY => '@',
CODE => '&',
};
our $decplaces = 1;
# ----------------------------------------------------------------------
sub GetMemoryMap { my($hOptions, @packages) = @_;
# --- Returns either a hashref: { <varname> => {
# Shared => <boolean>,
# Size => <numBytes>,
# DeclaredAs => <my_or_our>,
# }}
# or an error message (with embedded \n for multiple errors)
# --- NOTE: $hOptions is optional. If the first parameter is a string,
# it will be added to @packages
# Valid $hOptions:
# MinBytes - ignore variables with less than this size
# Will default to 4096 if not specified
# hOwners - { <varname> => <pkg>, ... }
# Any variable named <varname> only included for package <pkg>
# --- Make sure we can load all needed introspection packages
# If any of these packages are missing, return an error string
my @errors = LoadNeededPackages();
return join("\n", @errors) if (@errors > 0);
if (defined($hOptions) && !ref($hOptions)) {
unshift(@packages, $hOptions);
$hOptions = undef;
}
return "No packages specified" if (@packages == 0);
my $minBytes = $hOptions->{MinBytes};
$minBytes = 4096 if !defined($minBytes);
my $hOwners = $hOptions->{hOwners};
my $hMap; # return value, if no errors
foreach my $pkg (@packages) {
my $h;
# --- Get the package's symbol table
my $stash = Package::Stash->new($pkg);
foreach my $type (qw(SCALAR HASH ARRAY)) {
# --- Get all the variable names used in the package
# These names do not include the sigil, i.e. $, @, %, etc.
my @vars = $stash->list_all_symbols($type);
VAR: foreach my $varname (@vars) {
my $nameWSigil = $hSigil->{$type} . $varname;
# --- I don't know why total_size doesn't allow this
# but we don't want variables in other packages anyway
next VAR if ($nameWSigil =~ /::/);
# --- If a variable is defined and exported in a package
# then imported in a 2nd package, it will appear here
# for both packages. If you do that, you can prevent it
# showing up twice by specifying an owning package in
# $hOwners, and it will only show up for that package
# (it's a "best practice" to only export functions
# from a package, in which case this won't be needed)
next VAR if $hOwners->{$nameWSigil}
&& ($pkg ne $hOwners->{$nameWSigil});
# --- Get a reference to the variable's value
# Then, ignore it if the value is undef
my $valueref = $stash->get_symbol($nameWSigil);
# --- dereference references to references
while (ref($valueref) eq 'REF') {
$valueref = $$valueref;
}
my($size, $shared) = calc_size($valueref);
if ($size >= $minBytes) {
$h->{$nameWSigil} = {
Shared => $shared,
Size => $size,
DeclaredAs => 'our',
};
}
}
}
# --- Find file-level 'my' variables
my $hPkgMyVars = undef;
# --- First, find all functions defined in this package
my @funcs = $stash->list_all_symbols('CODE');
FUNC: foreach my $funcname (@funcs) {
my $nameWSigil = '&' . $funcname;
my $funcref = $stash->get_symbol($nameWSigil);
# --- Get the package that the function is actually defined in
# It won't be the current package if it was imported
# from a different package into this package
my $stash_name = Sub::Identify::stash_name($funcref);
next FUNC if ($stash_name ne $pkg);
# --- Get all of the variables that are used in this function
# but not defined inside the function
# These will usually be file level 'my' variables,
# though they might not be if you have nested functions
my $hMyVars = PadWalker::closed_over($funcref);
next FUNC if !$hMyVars || (keys(%$hMyVars) == 0);
MYVAR: foreach my $varname (keys(%$hMyVars)) {
# --- Check if this variable has already been seen
# in which case we can skip it
if (!exists($hPkgMyVars->{$varname})) {
$hPkgMyVars->{$varname} = 1; # mark it as seen
my $valueref = $hMyVars->{$varname};
# --- dereference references to references
while (ref($valueref) eq 'REF') {
$valueref = $$valueref;
}
if (ref($valueref) eq 'CODE') {
# --- Can't calc size of $varname because it's 'CODE'
next MYVAR;
}
elsif (ref($valueref) eq 'HASH') {
foreach my $key (keys(%$valueref)) {
if (ref($valueref->{$key}) eq 'CODE') {
# --- Can't calc size of $varname because
# it contains a reference to 'CODE'
next MYVAR;
}
}
}
elsif (ref($valueref) eq 'ARRAY') {
foreach my $val (@$valueref) {
if (ref($val) eq 'CODE') {
# --- Can't calc size of $varname because
# it contains a reference to 'CODE'
next MYVAR;
}
}
}
my($size, $shared) = calc_size($valueref);
if ($size >= $minBytes) {
$h->{$varname} = {
Shared => $shared,
Size => $size,
DeclaredAs => 'my',
};
}
}
}
}
$hMap->{$pkg} = $h if defined($h);
}
return (@errors == 0) ? $hMap : join("\n", @errors);
} # GetMemoryMap()
# ----------------------------------------------------------------------
sub LoadNeededPackages {
my @errors;
foreach my $pkg (qw(
Data::Dumper
threads::shared
Package::Stash
Sub::Identify
PadWalker
Devel::Size
)) {
my $rc = eval("require $pkg; return 1;");
if (!$rc) {
push(@errors, "Required package $pkg not found");
}
}
no warnings qw(once);
$Devel::Size::warn = 0;
return @errors;
} # LoadNeededPackages()
# ----------------------------------------------------------------------
sub calc_size {
# --- total_size() does not correctly handle thread shared variables
my $shared = threads::shared::is_shared($_[0]);
my $size = !defined($_[0]) ? 0
: $shared ? sharedSize($_[0])
: Devel::Size::total_size($_[0]);
return ($size, $shared);
} # calc_size()
# ----------------------------------------------------------------------
sub sharedSize {
require Data::Dumper;
require Devel::Size;
no warnings qw(once);
local $Data::Dumper::Purity = 1;
my $str = Data::Dumper::Dumper($_[0]);
my $VAR1;
my $rc = eval($str . "return 1;");
return 0 if !$rc;
my $nBytes = Devel::Size::total_size($VAR1);
return $nBytes;
} # sharedSize()
# ----------------------------------------------------------------------
1;
And, here is a test script. You'll note that it analyzes 2 of our proprietary modules. To use it you'll have to pass in the names of your own modules. If you want to find all modules, try using the function at the end, but I don't think it will handle module names with '::' in it:
# testSimpleMemoryMap.pl
use strict;
use warnings;
use Data::Dumper;
use SimpleMemoryMap qw(GetMemoryMap);
use UserTypeInfoCache;
use StudentInfoCache;
# --- This may return an error string ---
my $hMemoryMap = GetMemoryMap({MinBytes => 0}, qw(
UserTypeInfoCache
StudentInfoCache
));
if (ref($hMemoryMap)) {
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Useqq = 1;
print(Dumper($hMemoryMap));
}
else {
print("ERROR:\n", $hMemoryMap);
}
# --------------------------------------------------
sub GetPackages {
my @packages;
foreach my $module (keys(%INC)) {
if ($module =~ /^(A-Za-z0-9_-+)\.pm$/) {
push(@packages, $1);
}
}
return @packages;
} # GetPackages()
And, Rolf... sorry about being a bit snotty with you below. I was having a bad day and felt offended by your suggestion that I didn't know what a closure was. I'm sure you only meant to help. Actually, what I didn't understand is how Perl starts up, especially module loading. I think I'm a bit clearer about that now.