Re (tilly) 3: GD::Graph Issues

in reply to Re: Re (tilly) 1: GD::Graph Issues
in thread GD::Graph Issues

The bug issue was that the two_axes option would, if you don't choose data ranges that are extremely "nice" relative to each other, dynamically make very bad choices and insert a huge number of ticks. My change makes it dynamically choose a much smarter range in parallel.

The patch that I am posting here is to GD/Graph/axestype.pm. It improves this functionality, but not does not include corresponding documentation patches. (Those should be made in GD/Graph.pm.) It also enhances the y_tick_number option to allow it to take an anonymous array of numbers - it chooses the number of ticks that gives the best fit. The idea for this is based on the existing (undocumented) 'auto' option which corresponds to a range of 3 to 6 ticks. Allowing this results in massively better choices of ranges! (Try it, you will see.) You may wish to change the default value of y_tick_number accordingly.

One warning note. The indentation style here is not what I would choose to use normally. I chose it only so I could match the existing code. Also the version number change should be taken with a huge grain of salt. Both of us get version numbers out of CVS. Since the two copies are checked into independent CVS trees, the version numbers have no relationship.

--- /usr/local/lib/site_perl/GD/Graph/axestype.pm    Sat Oct  7 01:52:
+41 2000
+++ axestype.pm    Thu Jul 19 12:53:24 2001
@@ -5,16 +5,16 @@
 #    Name:
 #        GD::Graph::axestype.pm
 #
-# $Id: axestype.pm,v 1.29 2000/10/07 05:52:41 mgjv Exp $
+# $Id: axestype.pm,v 1.4 2001/07/19 16:53:24 tilly Exp $
 #
 #====================================================================
+======
 
 package GD::Graph::axestype;

-$GD::Graph::axestype::VERSION = '$Revision: 1.29 $' =~ /\s([\d.]+)/;
+$GD::Graph::axestype::VERSION = '$Revision: 1.4 $' =~ /\s([\d.]+)/;
 
 use strict;
- 
+
 use GD::Graph;
 use GD::Graph::utils qw(:all);
 use Carp;
@@ -127,6 +127,10 @@
     y_max_value     => undef,
     y1_max_value     => undef,
     y2_max_value     => undef,
+        y_min_range     => undef,               # CONTRIB Ben Tilly
+        y1_min_range     => undef,
+        y2_min_range     => undef,
+
 
     borderclrs        => undef,
 
@@ -1007,13 +1011,22 @@
     # First, calculate some decent values
     if ( $self->{two_axes} ) 
     {
-        for my $i (1 .. 2) 
-        {
-            my ($y_min, $y_max) = $self->{_data}->get_min_max_y($i);
-            ($self->{y_min}[$i], $self->{y_max}[$i], $self->{y_tick_n
+umber}) =
-                _best_ends($y_min, $y_max, $self->{y_tick_number});
-        }
-    } 
+                my $min_range_1 = defined($self->{min_range_1})
+                        ? $self->{min_range_1}
+                        : $self->{min_range};
+                my $min_range_2 = defined($self->{min_range_2})
+                        ? $self->{min_range_2}
+                        : $self->{min_range};
+        (
+                $self->{y_min}[1], $self->{y_max}[1],
+                $self->{y_min}[2], $self->{y_max}[2],
+                        $self->{y_tick_number}
+                ) = _best_dual_ends(
+                        $self->{_data}->get_min_max_y(1), $min_range_
+1,
+                        $self->{_data}->get_min_max_y(2), $min_range_
+2,
+                        $self->{y_tick_number}
+                );
+    }
     else
     {
         my ($y_min, $y_max);
@@ -1028,7 +1041,7 @@
             ($y_min, $y_max) = $self->{_data}->get_min_max_y_all;
         }
         ($self->{y_min}[1], $self->{y_max}[1], $self->{y_tick_number}
+) =
-            _best_ends($y_min, $y_max, $self->{y_tick_number});
+            _best_ends($y_min, $y_max, @$self{'y_tick_number','y_min_
+range'});
     }
 
     if (defined($self->{x_tick_number}))
@@ -1043,8 +1056,8 @@
             ($self->{true_x_min}, $self->{true_x_max}) = 
                 $self->{_data}->get_min_max_x;
             ($self->{x_min}, $self->{x_max}, $self->{x_tick_number}) 
+=
-                _best_ends($self->{true_x_min}, $self->{true_x_max},
-                            $self->{x_tick_number});
+                _best_ends($self->{true_x_min}, $self->{true_x_max},
+                        @$self{'y_tick_number','y_min_range'});
         }
     }
 
@@ -1136,24 +1149,43 @@
 #
 # Usage:
 #        ($nmin,$nmax,$nint) = _best_ends(247, 508);
-#        ($nmin,$nmax) = _best_ends(247, 508, 5); 
+#        ($nmin,$nmax) = _best_ends(247, 508, 5);
 #             use 5 intervals
-#        ($nmin,$nmax,$nint) = _best_ends(247, 508, 4..7);    
+#        ($nmin,$nmax,$nint) = _best_ends(247, 508, [4..7]);
 #             best of 4,5,6,7 intervals
-sub _best_ends 
+#        ($nmin,$nmax,$nint) = _best_ends(247, 508, 'auto');
+#             best of 3,4,5,6 intervals
+#        ($nmin,$nmax,$nint) = _best_ends(247, 508, [2..5]);
+#             best of 2,3,4,5 intervals
+
+sub _best_ends
 {
-    my ($min, $max, @n) = @_;
+    my ($min, $max, $n_ref, $min_range) = @_;
+
+        # Adjust for the min range if need be
+        ($min, $max) = _fit_vals_range($min, $max, $min_range);
+
     my ($best_min, $best_max, $best_num) = ($min, $max, 1);

-    # Check that min and max are not the same, and not 0
-    ($min, $max) = ($min) ? ($min * 0.5, $min * 1.5) : (-1,1) 
-        if ($max == $min);
     # mgjv - Sometimes, for odd values, and only one data set, this w
+ill be
     # necessary _after_ the previous step, not before. Data sets of o
+ne
     # long with negative values were causing infinite loops later on.
     ($min, $max) = ($max, $min) if ($min > $max);

-    @n = (3..6) if @n <= 0 || $n[0] =~ /auto/i;
+    # Check that min and max are not the same, and not 0
+    ($min, $max) = ($min) ? ($min * 0.5, $min * 1.5) : (-1,1)
+        if ($max == $min);
+    
+    my @n = ref($n_ref) ? @$n_ref : $n_ref;
+
+    if (@n <= 0)
+        {
+                @n = (3..6);
+        }
+        else
+        {
+            @n = map { ref($_) ? @$_ : /(\d+)/i ? $1 : (3..6) } @n;
+        }

     my $best_fit = 1e30;
     my $range = $max - $min;
@@ -1164,32 +1196,165 @@
     while ($s > $range) { $s /= 10 }
     my @step = map {$_ * $s} (0.2, 0.5, 1, 2, 5);
 
-    for my $n (@n) 
-    {
+    for my $n (@n)
+    {
         # Try all numbers of intervals
         next if ($n < 1);
 
-        for my $step (@step) 
+        for my $step (@step)
         {
             next if ($n != 1) && ($step < $range/$n); # $step too sma
+ll
 
-            my $nice_min   = $step * int($min/$step);
-            $nice_min  -= $step if ($nice_min > $min);
-            my $nice_max   = ($n == 1) 
-                ? $step * int($max/$step + 1) 
-                : $nice_min + $n * $step;
-            my $nice_range = $nice_max - $nice_min;
+            my ($nice_min, $nice_max, $fit)
+                    = _fit_interval($min, $max, $n, $step);
 
-            next if ($nice_max < $max);    # $nice_min too small
-            next if ($best_fit <= $nice_range - $range); # not closer
+ fit
+                        next if $best_fit <= $fit;
 
             $best_min = $nice_min;
             $best_max = $nice_max;
-            $best_fit = $nice_range - $range;
+            $best_fit = $fit;
             $best_num = $n;
         }
     }
-    return ($best_min, $best_max, $best_num)
+    return ($best_min, $best_max, $best_num);
+}
+
+
+# CONTRIB Ben Tilly
+#
+# Calculate best endpoints and number of intervals for a pair of axes
+# where it is trying to line up the scale of the two intervals.  It
+# returns ($nice_min_1, $nice_max_1, $nice_min_2, $nice_max_2, $n),
+# where $n is the number of intervals and
+#
+#    $nice_min_1 <= $min_1 < $max_1 <= $nice_max_1
+#    $nice_min_2 <= $min_2 < $max_2 <= $nice_max_2
+#
+# and 0 will appear at the same point on both axes.
+#
+# Usage:
+#       ($nmin_1,$nmax_1,$nmin_2,$nmax_2,$nint) = _best_dual_ends(247
+, 508, undef, -1, 5, undef, [2..5]);
+# etc.  (The usage of the last arguments just parallels _best_ends.)
+#
+sub _best_dual_ends
+{
+        my ($min_1, $max_1) = _fit_vals_range(splice @_, 0, 3);
+        my ($min_2, $max_2) = _fit_vals_range(splice @_, 0, 3);
+        my @rem_args = @_;
+
+    my $scale_1 = _max(abs($min_1), abs($max_1));
+        my $scale_2 = _max(abs($min_2), abs($max_2));
+
+        $scale_1 = defined($scale_2) ? $scale_2 : 1 unless defined($s
+cale_1);
+        $scale_2 = $scale_1 unless defined($scale_2);
+
+        my $ratio = $scale_1 / $scale_2;
+        my $fact_1 = my $fact_2 = 1;
+
+        while ($ratio < sqrt(0.1))
+        {
+                $ratio *= 10;
+                $fact_2 *= 10;
+        }
+        while ($ratio > sqrt(10))
+        {
+                $ratio /= 10;
+                $fact_1 *= 10;
+        }
+
+    my ($best_min_1, $best_max_1, $best_min_2, $best_max_2, $best_n, 
+$best_fit)
+                = ($min_1, $max_1, $min_2, $max_2, 1, 1e10);
+
+        # Now try all of the ratios of "simple numbers" in the right 
+size-range
+        foreach my $frac
+        (
+                [1,1], [1,2], [1,3], [2,1], [2,3], [2,5],
+                [3,1], [3,2], [3,4], [3,5], [3,8], [3,10],
+                [4,3], [4,5], [5,2], [5,3], [5,4], [5,6],
+        [5,8], [6,5], [8,3], [8,5], [10,3]
+        )
+        {
+                my $bfact_1 = $frac->[0] * $fact_1;
+                my $bfact_2 = $frac->[1] * $fact_2;
+
+        my $min = _min( $min_1/$bfact_1, $min_2/$bfact_2 );
+        my $max = _max( $max_1/$bfact_1, $max_2/$bfact_2 );
+
+        my ($bmin, $bmax, $n) = _best_ends($min, $max, @rem_args);
+                my ($bmin_1, $bmax_1) = ($bfact_1*$bmin, $bfact_1*$bm
+ax);
+                my ($bmin_2, $bmax_2) = ($bfact_2*$bmin, $bfact_2*$bm
+ax);
+
+                my $fit = _measure_interval_fit($bmin_1, $min_1, $max
+_1, $bmax_1)
+                        + _measure_interval_fit($bmin_2, $min_2, $max
+_2, $bmax_2);
+
+                next if $best_fit < $fit;
+
+                (
+                $best_min_1, $best_max_1, $best_min_2, $best_max_2,
+                $best_n,   $best_fit
+        ) = (
+                    $bmin_1,    $bmax_1,      $bmin_2,     $bmax_2,
+                     $n,       $fit
+        );
+        }
+
+        return ($best_min_1, $best_max_1, $best_min_2, $best_max_2, $
+best_n);
+}
+
+# Takes $min, $max, $step_count, $step_size.  Assumes $min <= $max an
+d both
+# $step_count and $step_size are positive.  Returns the fitted $min, 
+$max,
+# and a $fit statistic (where smaller is better).  Failure to fit the
+# interval results in a poor fit statistic. :-)
+sub _fit_interval
+{
+        my ($min, $max, $step_count, $step_size) = @_;
+
+        my $nice_min = $step_size * int($min/$step_size);
+        $nice_min  -= $step_size if ($nice_min > $min);
+        my $nice_max   = ($step_count == 1)
+                ? $step_size * int($max/$step_size + 1)
+                : $nice_min + $step_count * $step_size;
+
+        my $fit = _measure_interval_fit($nice_min, $min, $max, $nice_
+max);
+
+        return ($nice_min, $nice_max, $fit);
+}
+
+# Takes 2 values and a minimum range.  Returns a min and max which ho
+lds
+# both values and is at least that minimum size
+sub _fit_vals_range
+{
+        my ($min, $max, $min_range) = @_;
+
+    ($min, $max) = ($max, $min) if $max < $min;
+
+        if (defined($min_range) and $min_range > $max - $min)
+        {
+                my $nice_min = $min_range * int($min/$min_range);
+                $nice_min = $nice_min - $min_range if $min < $nice_mi
+n;
+                my $nice_max = $max < $nice_min + $min_range
+                        ? $nice_min + $min_range
+                        : $max;
+                ($min, $max) = ($nice_min, $nice_max);
+    }
+        return ($min, $max);
+}
+
+# Takes $bmin, $min, $max, $bmax and returns a fit statistic for how 
+well
+# ($bmin, $bmax) encloses the interval ($min, $max).  Smaller is bett
+er,
+# and failure to fit will be a very bad fit.  Assumes that $min <= $m
+ax
+# and $bmin < $bmax.
+sub _measure_interval_fit
+{
+        my ($bmin, $min, $max, $bmax) = @_;
+        return 1000 if $bmin > $min or $bmax < $max;
+
+    my $range = $max - $min;
+        my $brange = $bmax - $bmin;
+
+        return $brange < 10 * $range
+                ? ($brange / $range)
+                : 10;
 }
 
 sub _get_bottom
@@ -1210,7 +1375,7 @@
 #
 # Convert value coordinates to pixel coordinates on the canvas.
 #
-sub val_to_pixel    # ($x, $y, $i) in real coords ($Dataspace), 
+sub val_to_pixel    # ($x, $y, $i) in real coords ($Dataspace),
 {                        # return [x, y] in pixel coords
     my $self = shift;
     my ($x, $y, $i) = @_;
@@ -1218,7 +1383,7 @@
     my $y_min = ($self->{two_axes} && $i == 2) ?
         $self->{y_min}[2] : $self->{y_min}[1];
 
-    my $y_max = ($self->{two_axes} && $i == 2) ? 
+    my $y_max = ($self->{two_axes} && $i == 2) ?
         $self->{y_max}[2] : $self->{y_max}[1];
 
     my $y_step = abs(($self->{bottom} - $self->{top})/($y_max - $y_mi
+n));
@@ -1230,7 +1395,7 @@
      }
      else
      {
-        $ret_x = ($self->{x_tick_number} ? $self->{x_offset} : 
+        $ret_x = ($self->{x_tick_number} ? $self->{x_offset} :
             $self->{left}) + $x * $self->{x_step};
     }
     my $ret_y = $self->{bottom} - ($y - $y_min) * $y_step;
[download]

(Lemme guess. Utterly obvious, you would have come up with this on your own, right? :-)

In Section Seekers of Perl Wisdom