in reply to
Truely Unique Code Set?
Here's your program with some annotations (#-- annotation):
#!/usr/bin/
#-- try: #!/usr/bin/perl
#-- or: #!/usr/bin/env perl
#program to generate 6 Million unique codes, Oct 15, 2011.
#-- 'use strict;' - strongly recommended
#-- 'use warnings;' - strongly recommended
open(MYOUTFILE, ">codesSixMil.txt"); #open for write, overwrite (>)
#-- 1.) 3-argument-open() recommended
#-- 2.) filehandle could be e.g. $codesfile instead of MYOUTFILE, but
+it's
# okay for a small script.
#-- 3.) check if open fails:
# 'open( MYOUTFILE, '>', "codesSixMil.txt") or die "cannot open
+codesSixMil.txt - $!";'
#-- 4.) maybe use a variable for filename: 'my $code_filename = "codes
+SixMil.txt"
#-- 'use strict;' will tell you to rewrite the following lines
# as 'my $count = 0;' etc.
$count = 0; #keep track of how many codes generated
$count1 = 0; #keep track of how many codes duplicated
#-- bad variable names, esp. $count1 --> e.g. $codes_generated, $codes
+_unique
$desiredQuantity = 6000001;
#-- 'off by one' smell - if you want exactly 6M then write 6000000 and
+ adjust loop
#-- (or more readable: 6_000_000)
@arrayOfResults =();
#-- bad variable name: suggestion: @unique_codes or @raw_codes or ...
sub generate_random_string {
my $stringsize = shift;
my @alphanumeric = ('a'..'z', 3,4,6,7,9);
my $randstring = join '', (map { $alphanumeric[rand@alphanumeric] }
+ @alphanumeric)[0 .. $stringsize];
#-- maybe:
# my $randstring = join '', (map { $alphanumeric[rand@alphanumeric] }
+ (1..$stringsize);
return $randstring;
}
for ($i=0;$i<$desiredQuantity;$i++) {
#-- C-style loop. More Perlish: 'for (1..$desiredQuantity) {'
$returnvalue = generate_random_string(10);
#-- bad name. Suggestion: $code_key or the like...
#-- maybe use a $code_length variable instead of '10'?
push(@arrayOfResults, $returnvalue);
$count++; #-- not very useful: after the loop: '$count == $desiredQ
+uantity;'
#-- Can be replaced by $desiredQuantitity (maybe).
}
#remove any potential duplicates
@unique = grep { ++$count{$_} < 2 } @arrayOfResults;
#-- waste of memory, but should work
#loop through array to output each code on its own line
#with a carriage return as required.
for ($i=0;$i<$desiredQuantity;$i++) {
#-- needless use of C-style loop here
#-- if you have duplicates, @unique < @arrayOfResults, so
#-- replace $desiredQuantity by $#unique - or better
#-- rewrite the loop in 'for my $code ( @unique ) {' style
print @unique[$i], "\n"; #-- for debugging? Increases runtime.
print MYOUTFILE @unique[$i], "\r\n";
$count1++; #-- see $count above
}
#do some rudimentary checking and output result.
print "\n---Code generation report--- \n\n";
print "First array: $count \n";
print " Unique array: $count1 \n\n";
print "---There are ", $count-$count1, " code duplicates in this listi
+ng of ", $desiredQuantity, " ---\n\n\n";
#-- Your current implementation will always tell you: $count1 == $coun
+t == $desiredQuantity
#-- even if @unique < @arrayOfResults giving you the false impression
+to have $count unique codes
#*** Close the file ***
close(MYOUTFILE);
Maybe it is easier to store the codes as hash keys and generate random codes until the desired quantity
is reached? This one runs approx. 3x faster.
#!/usr/bin/perl
#program to generate 6 Million unique codes, Oct 15, 2011.
use strict;
use warnings;
my $codes_filename = shift || "codesSixMil2.txt";
my $desiredQuantity = shift || 6_000_001;
my $code_length = shift || 10;
my @code_set = ('a'..'z', 3,4,6,7,9);
sub generate_random_string {
my $stringsize = shift;
return join '', map { $code_set[rand@code_set] } (1..$stringsize);
}
my %unique_codes;
while ( scalar keys %unique_codes < $desiredQuantity ) {
$unique_codes{ generate_random_string( $code_length ) }++;
}
my $unique_codes_count = scalar keys %unique_codes;
die "Mismatch: Codes generated: $unique_codes_count, but wanted $desir
+edQuantity!"
if $unique_codes_count != $desiredQuantity;
open( MYOUTFILE, '>', $codes_filename ) or die "cannot open $codes_fil
+ename - $!";
for my $code ( keys %unique_codes ) {
print MYOUTFILE $code, "\r\n";
}
close(MYOUTFILE);
print "Unique codes (size=$code_length) written to '$codes_filename':
+$unique_codes_count\n";