Beefy Boxes and Bandwidth Generously Provided by pair Networks
good chemistry is complicated,
and a little bit messy -LW
 
PerlMonks  

Bookmark maintenance

by Mungbeans (Pilgrim)
on Jun 22, 2001 at 14:18 UTC ( [id://90669]=perlcraft: print w/replies, xml ) Need Help??

   1: #!/usr/bin/perl -w
   2: 
   3: =head1 NAME
   4: 
   5: linkmanager.pm - Package for maintaining a hierarchical bookmark file
   6: 
   7: =head1 SYNOPSIS
   8: 
   9:     use linkmanager;
  10:     my $links = linkmanager->new;
  11:     $links->read_html('links.htm');  # Read in existing bookmark file
  12:                                      # ... then add some new links
  13:     $links->addlink(
  14:       'News', 'IT', 'Slashdot', 'http://slashdot.org');
  15:     $links->addlink(
  16:       'News', 'PERL', 'Perl Monks', 'http://www.perlmonks.org');
  17:     $links->output_html(links.htm'); # .. then save the new bookmarks
  18:                                      # file
  19: 
  20: =head2 Advice
  21: 
  22: The purpose of this module was to provide a flexible and portable
  23: method of maintaining bookmarks of URLs.  The intention is to retain
  24: the data in a generated HTML file, which can be easily read and
  25: appended to.
  26: 
  27: This module is NOT suitable for use as a CGI script, but could be made
  28: so with appropriate taint checking (FUTURE DEVELOPMENT).
  29: 
  30: Methods for deleting or relocating links do not exist.  Simplest
  31: method is to edit the table rows in the html bookmark file before
  32: reading it in.
  33: 
  34: =head1 METHODS
  35: 
  36: =over
  37: 
  38: =cut
  39: 
  40: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
  41: package linkmanager;
  42: use strict;
  43: use Carp;
  44: use vars qw(
  45:             @ISA 
  46:             $VERSION 
  47:             @EXPORT_OK
  48:            );
  49: 
  50: $VERSION   = "1.0";
  51: @ISA       = qw(Exporter);
  52: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
  53: 
  54: 
  55: sub new {
  56:     my $class = shift;
  57:     my $self = {};
  58:     bless $self, $class;
  59:     return $self;
  60: }
  61: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
  62: 
  63: =head2 addlink method
  64: 
  65:     Parameters
  66:       Parent - The parent section for this link if any, use ''
  67:         for a root node
  68:       Section - Which section the link should appear in
  69:       Title - A brief description of the link
  70:       Link - URL for the link
  71: 
  72:     Returns
  73:       Nothing.
  74: 
  75:     Adds a link to the internal hash array indexed by parent,
  76:     section and title.  Duplicates are overwritten.
  77: 
  78: =cut
  79: 
  80: sub addlink {
  81:     my $self = shift;
  82:     croak "bad method call" unless ref $self;
  83: 
  84:     my ($parent, $section, $title, $link) = @_;
  85:     $self->{$parent}{$section}{$title}=$link;
  86: }
  87: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
  88: 
  89: 
  90: =head2 getparents method
  91: 
  92:     Parameters
  93:       None.
  94: 
  95:     Returns
  96:       Array containing all parent nodes.
  97: 
  98: =cut
  99: 
 100: sub getparents {
 101:     my $self = shift;
 102:     croak "bad method call" unless ref $self;
 103: 
 104:     my $parent;
 105:     my @parents;
 106: 
 107:     foreach $parent (sort keys %{ $self }){
 108:         push @parents, $parent;
 109:     }
 110: 
 111:     return @parents;
 112: }
 113: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 114: 
 115: 
 116: 
 117: =head2 getsections method
 118: 
 119:     Parameters
 120:       Parent node.
 121: 
 122:     Returns
 123:       Array containing all sections under the specified parent node.
 124: 
 125: =cut
 126: 
 127: sub getsections {
 128:     my $self = shift;
 129:     croak "bad method call" unless ref $self;
 130:     my $parent = shift;
 131: 
 132:     my $section;
 133:     my @sections;
 134: 
 135:     foreach $section (sort keys %{ $self->{$parent} }){
 136:         push @sections, $section;
 137:     }
 138: 
 139: 
 140:     return @sections;
 141: }
 142: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 143: 
 144: 
 145: =head2 countkids method
 146: 
 147:     Parameters
 148:       Parent node.
 149: 
 150:     Returns
 151:       Scalar count of all titles under parent node, regardless of
 152:       section.
 153: 
 154: =cut
 155: 
 156: sub countkids {
 157:     my $self = shift;
 158:     croak "bad method call" unless ref $self;
 159:     my $parent = shift;
 160: 
 161:     my $section;
 162:     my @sections;
 163:     my $section_total;
 164:     my $parent_total;
 165: 
 166:     foreach $section (sort keys %{ $self->{$parent} }){
 167:         $parent_total+=$self->gettitles($parent, $section);
 168:     }
 169: 
 170:     return $parent_total;
 171: }
 172: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 173: 
 174: 
 175: =head2 getlinks method
 176: 
 177:     Parameters
 178:       Parent node.
 179:       Section node.
 180: 
 181:     Returns
 182:       Hash containing all titles and links for section and parent
 183:       node.
 184: 
 185: =cut
 186: 
 187: sub getlinks {
 188:     my $self = shift;
 189:     croak "bad method call" unless ref $self;
 190:     my $parent = shift;
 191:     my $section = shift;
 192: 
 193:     return %{ $self->{$parent}{$section}};
 194: }
 195: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 196: 
 197: 
 198: 
 199: =head2 gettitles method
 200: 
 201:     Parameters
 202:       Parent node.
 203:       Section node.
 204: 
 205:     Returns
 206:       Array containing all titles for the section and parent node.
 207: 
 208: =cut
 209: 
 210: sub gettitles {
 211:     my $self = shift;
 212:     croak "bad method call" unless ref $self;
 213:     my $parent = shift;
 214:     my $section = shift;
 215: 
 216:     my $title;
 217:     my @titles;
 218: 
 219:     foreach $section (sort keys %{ $self->{$parent}{$section} }){
 220:         push @titles, $title;
 221:     }
 222: 
 223:     return @titles;
 224: 
 225: }
 226: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 227: 
 228: 
 229: 
 230: =head2 output_html method
 231: 
 232:     Parameters
 233:       None.  Links must already have been created before outputting.
 234: 
 235:     Returns
 236:       Nothing.
 237: 
 238:     Creates a file with html tables containing the links.  Backs up
 239:     existing file to .bak.
 240: 
 241: =cut
 242: 
 243: 
 244: sub output_html {
 245:     my $self = shift;
 246:     croak "bad method call" unless ref $self;
 247: 
 248:     my $file = shift;
 249:     $file = 'links.htm' unless defined $file;
 250: 
 251:     use CGI qw/:form :html param header *table *TR/;
 252:     use File::Copy;
 253:     my $date = scalar localtime;
 254:     my ($section_already_started,
 255:         $parent_already_started,
 256:         @sections,
 257:         $number_of_sections,
 258:         @titles,
 259:         $number_of_titles,
 260:         $parent,
 261:         @parents,
 262:         $section,
 263:        );
 264: 
 265:     move($file, "$file.bak"); # backup existing links file
 266:                               # ... MAY not exist so ignore
 267:                               # errors.
 268: 
 269:     open(BOOKMARK, ">$file")
 270:         or die "Unable to write to $file because: $!, Stopped";
 271:     select BOOKMARK;
 272: 
 273:     ### Start the file
 274:     print
 275:         comment(header),
 276:             start_html("Links"),
 277:                 h1("Links created as at $date"),
 278:                     "\n";
 279: 
 280:     @parents = (sort $self->getparents);
 281: 
 282:     ### Index first
 283:     for $parent (@parents){
 284:         print a({-href=>"$file#$parent"}, $parent), "\n";
 285:     }
 286: 
 287:     ### Then tables
 288:     for $parent (@parents){
 289: 
 290:         print  a({-name=>"$parent"}, h3($parent)), "\n",
 291:             "\n",
 292:                 start_table({-border=>1}), "\n";
 293: 
 294:         ### Use column headings so that html::tableextract can read
 295:         ### this later as structured data.
 296:         print
 297:             td(b("Section")),
 298:                 td(b("Subsection")),
 299:                     td(b("Title")),
 300:                         td(b("Link")),
 301:                             end_TR(), "\n";
 302: 
 303:         $parent_already_started=0;
 304: 
 305:         @sections = (sort $self->getsections($parent));
 306:         $number_of_sections = $self->countkids($parent);
 307:         for $section (@sections){
 308: 
 309:             $section_already_started=0;
 310: 
 311:             ### Span rows based on number of entries under parent to
 312:             ### reduce visual clutter
 313:             print td({-rowspan=>$number_of_sections},$parent)
 314:                 unless $parent_already_started;
 315:             $parent_already_started=1;
 316: 
 317:             my %self = $self->getlinks($parent, $section);
 318:             @titles = (sort keys %self);
 319:             $number_of_titles = $self->gettitles($parent, $section);;
 320:             foreach my $title (@titles){
 321: 
 322:             ### Ditto, span rows based on titles under section to
 323:             ### reduce visual clutter
 324:                 print td({-rowspan=>$number_of_titles},$section) 
 325:                     unless $section_already_started;
 326:                 $section_already_started=1;
 327:                 print
 328:                     td($title),
 329:                         td(a{-href=>"$self{$title}"}, $self{$title}),
 330:                             end_TR(), "\n";
 331:             }
 332:         }
 333:         print end_table(),p();
 334: 
 335:     }
 336: 
 337:     close BOOKMARK;
 338:     select STDOUT;
 339: }
 340: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 341: 
 342: 
 343: 
 344: =head2 read_html method
 345: 
 346:     Parameters
 347:       File to read.  Table rows expected to have headings:
 348:         Section Subsection Title Link
 349: 
 350:     Returns
 351:       Nothing.
 352: 
 353:     Internally stores the links found in the tables indexed by parent,
 354:     section and title.
 355: 
 356: =cut
 357: 
 358: 
 359: sub read_html{
 360:     my $self = shift;
 361:     croak "bad method call" unless ref $self;
 362:     my $file = shift;
 363: 
 364:     ### Prime table extract to grab all data with Section Subsection
 365:     ### Title Link headings
 366:     use HTML::TableExtract;
 367:     my $te = new HTML::TableExtract( headers => [qw(Section Subsection Title Link)] );
 368:     my $html;
 369: 
 370:     ### Grab the html and parse it
 371:     {
 372:         local undef $/;        #Slurp up the whole file into one html string
 373:         open(LINKS, "<$file") or die "unable to open file: $file because: $!,";
 374:         $html = <LINKS>;
 375:         close LINKS;
 376:     }
 377:     $te->parse($html);
 378:     my ($parent, $section, $title,$link, $last_parent, $last_section);
 379: 
 380:     ### Load the links into $self
 381:     foreach my $ts ($te->table_states) {
 382: 
 383:         foreach my $row ($ts->rows) {
 384: 
 385:             ($parent,$section,$title,$link)=(@$row);
 386: 
 387:             if ($parent eq ''){
 388:                 $parent = $last_parent;
 389:             } else {
 390:                 $last_parent=$parent;
 391:             }
 392: 
 393:             if ($section eq ''){
 394:                 $section = $last_section;
 395:             } else {
 396:                 $last_section=$section;
 397:             }
 398: 
 399:             $self->addlink($parent,$section,$title,$link);
 400:         }
 401:     }
 402: 
 403: }
 404: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 405: 
 406: =head1 REQUIRES
 407: 
 408: HTML::TableExtract Carp File::Copy CGI
 409: 
 410: 
 411: =cut
 412: 
 413: 
 414: ### UPDATE: untabify

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others making s'mores by the fire in the courtyard of the Monastery: (5)
As of 2024-04-19 23:44 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found