#!/usr/bin/perl # -*- cperl -*- use utf8; require "./db.pl"; my ($pack) = findrecord('pack', 'packname', 'gjiten'); die "You must create the gjiten pack first (and make sure its language is set correctly).\n" if not ref $pack; my $packid = $$pack{id}; my $langid = $$pack{language}; my $userid = $$pack{user} || 1; # We guess that the first user is probably the sysadmin. my ($category) = findrecord('category', 'name', 'word'); die "How can there be no word category?\n" if not ref $category; my $catid = $$category{id}; my $dicfile = '/usr/share/gjiten/dics/edict'; open DIC, #'<:encoding(UTF-8)', '<', $dicfile or die "Cannot open dictionary: $dicfile"; my ($total, $skipped, $japanese, $already, $inserted); while () { my $line = $_; my ($firstchar) = $line =~ /^(.)/; ++$total; if ($firstchar le '~') { ++$skipped; print "Skipping (starts with low character '$firstchar'): $line"; } else { ++$japanese; die if $japanese > 50 or $total > 100; my ($word, @def) = split m{/}, $line; my ($spelling, $reading) = $word =~ /([^[ ]+) ?\s*(?:[[](.*?)[]])?/; my ($chars, @char) = $spelling; #while ($chars) { # my ($c) = $chars =~ m/^(.)/; # $chars =~ s/^(.)//; # push @char, $c; #} #my @char = $spelling =~ m/(\X)/g; my @char = $spelling =~ m/(\P{M}\p{M}*)/g; use Data::Dumper; print Dumper(+{ word => $word, spelling => $spelling, reading => $reading, defs => \@def, char => \@char, }); } } print "Of $total lines, skipped $skipped.\nFound $japanese Japanese words. $already already had cards.\n$inserted new cards would be created.\n";