Hello all!
I often deal with random data that is to be put into an SQL table. This data usually comes to me already in tabular structure, or as XML. But it almost certainly always lacks the type information needed to create a proper SQL table for it.
Lazy as I am, I wrote me a short and stupid module, which I can hand all values for a column and which then gives me back the "best" SQL type that fits all values in the column.
The usage of the code is something like the following:
use Type::FromData;
my $dt= Type::FromData->new();
my @data= (
{
fool => 1,
when => '20140401',
greeting => 'Hello',
value => '1,05'
},
{
fool => 0,
when => '20140402',
greeting => 'World',
value => '99,05'
},
{
fool => 0,
when => '20140402',
greeting => 'World',
value => '9,005'
},
);
$dt->guess( @$data );
print $dt->as_sql( table => 'test' );
# CREATE TABLE test
# fool decimal( 1,0 ),
# when date,
# greeting varchar(5),
# value decimal(5,2)
# )
Before I release that code onto CPAN, I have three questions:
- Certainly, somebody has already written the class to describe an SQL type. Maybe from the Moo* followers, or some other people who write Type modules. But I couldn't find anything like that, and plain strings suit my (limited) use case quite fine, except if a date could be "null" (that is, the empty string). So where are the classes that implement the appropriate attributes (length for varchar resp. precision/decimals for decimal) and the stringification?
- As always, I am very bad at naming things. What would be a good name for this module? So far, I've stuck with Type::FromData, but the only kind of type it returns will be an SQL declaration string or a CREATE TABLE statement, and never an abstract type that a different SQL dialect might handle.
- Curently, the data type recognition is done by three regular expressions that deal with the three kinds of data I get, "dates", "numbers" and "strings". Maybe it would be interesting to expand
package Type::FromData;
use strict;
sub new {
my( $class, %options )= @_;
$options{ column_type } ||= {};
$options{ column_map } ||= {
";date" => 'date',
";decimal" => 'decimal(%2$d,%3$d)',
";varchar" => 'varchar(%1$d)',
"date;" => 'date',
"decimal;" => 'decimal(%2$d,%3$d)',
"varchar;" => 'varchar(%1$d)',
"varchar;date" => 'varchar(%1$d)',
"varchar;decimal" => 'varchar(%1$d)',
"varchar;varchar" => 'varchar(%1$d)',
"date;decimal" => 'decimal(%2$d,%3$d)',
"date;varchar" => 'varchar(%1$d)',
"date;date" => 'date',
"decimal;date" => 'decimal(%2$d,%3$d)',
"decimal;varchar" => 'varchar(%1$d)',
"decimal;decimal" => 'decimal(%2$d,%3$d)',
";" => '',
};
bless \%options => $class;
}
sub column_type { $_[0]->{column_type} };
sub column_map { $_[0]->{column_map} };
sub guess_data_type {
my( $self, $type, @values )= @_;
my $column_map= $self->column_map;
for my $value (@values) {
my $old_type= $type;
my $this_value_type= '';
my $pre= 0;
my $post= 0;
my $length= length $value || 0;
if( ! defined $value or $value =~ /^$/) {
# ... nothing to guess here
} elsif( $value =~ /^((?:19|20)\d\d)-?(0\d|1[012])-?([012]\d|3
+[01])$/) {
$this_value_type= 'date';
$pre= 8;
} elsif( $value =~ /^[+-]?(\d+)$/) {
$this_value_type= 'decimal';
$pre= length( $1 );
$post= 0;
} elsif( $value =~ /^[+-]?(\d+)\.(\d+)$/) {
$this_value_type= 'decimal';
$pre= length( $1 );
$post= length( $2 );
} else {
$this_value_type= 'varchar';
};
if( $type ) {
if( $type =~ s/\s*\((\d+)\)// ) {
$length= $1 > $length ? $1 : $length;
} elsif( $type =~ s/\s*\((\d+),(\d+)\)// ) {
my( $new_prec, $new_post )= ($1,$2);
my $new_pre= $new_prec - $new_post;
$pre= $new_pre > $pre ? $new_pre : $pre;
$post= $2 > $post ? $2 : $post;
};
} else {
$type= '';
};
if( $type ne $this_value_type ) {
if( not exists $column_map->{ "$type;$this_value_type" })
+{
die "Unknown transition '$type' => '$this_value_type'"
+;
};
};
$type= sprintf $column_map->{ "$type;$this_value_type" }, $len
+gth, $pre+$post, $post;
};
$type
};
sub guess {
my( $self, @records )= @_;
my $column_type= $self->column_type;
for my $row (@records) {
for my $col (keys %$row) {
my( $new_type )= $self->guess_data_type($column_type->{$co
+l}, $row->{ $col });
if( $new_type ne ($column_type->{ $col } || '')) {
#print sprintf "%s: %s => %s ('%s')\n",
# $col, ($column_type{ $col } || 'unknown'), ($new_
+type || 'unknown'), $info->{$col};
$column_type->{ $col }= $new_type;
};
}
}
}
sub as_sql {
my( $self, %options )= @_;
my $table= $options{ table };
my $user= defined $options{ user }
? "$options{ user }."
: ''
;
my $column_type= $self->column_type;
$options{ columns }||= [ sort keys %{ $column_type } ];
my $columns= join ",\n ", map { "$_ $column_type->{ $_ }" }
+ @{ $options{ columns }};
my($sql)= <<SQL;
create table $user$table (
$columns
);
SQL
return $sql;
}
1;