Beefy Boxes and Bandwidth Generously Provided by pair Networks kudra
Your skill will accomplish
what the force of many cannot
 
PerlMonks

Gtk+ HTML Tree Viewer

by mdillon (Priest)
 | Log in | Create a new user | The Monastery Gates | Super Search | 
 | Seekers of Perl Wisdom | Meditations | PerlMonks Discussion | 
 | Obfuscation | Reviews | Cool Uses For Perl | Perl News | Q&A | Tutorials | 
 | Poetry | Recent Threads | Newest Nodes | Donate | What's New | 

on Sep 20, 2000 at 21:17 UTC ( #33351=sourcecode: print w/ replies, xml ) Need Help??

Category: HTML Utility/GUI Programming
Author/Contact Info mike dillon <mdillon@standmed.com>
Description:

this is a rewrite of a utility i did for a job where i was using HTML::TreeBuilder and XML::XPath to parse and search normal HTML documents using the powerful XPath query language.

this utility uses HTML::TreeBuilder to parse an HTML document from a URL specified on the command line or from an internal browser location line and displays it as a Gtk+ Tree in a window. only subtrees with text nodes or anchors are expanded.

there are (simple) XPath queries displayed in the status bar that could be used to extract that node from the document (for example, by converting it to XHTML with HTML::TreeBuilder and then using XML::XPath, or by traversing the TreeBuilder parse tree and programmatically constructing an XPath parse tree).

it's probably not a bad example of simple Gtk+ GUI programming. more may be yet to come in the way of functionality (and comments).

this was written and tested against Gtk 0.7003.

there is support for using GtkHTML as well, if your installation is functional (mine was partially functional when i wrote the code, but stopped working after i upgraded from GtkHTML 0.4 to 0.6.1 and recompiled Gtk::HTML)

most recently updated: 24 Sep 2000

#!/usr/bin/perl -w

use Data::Dumper;

use Gtk;

#use Gtk::HTML;

use HTML::TreeBuilder;

use HTTP::Request::Common;

use LWP::UserAgent;

use Tie::RefHash;

use URI;

use strict;

init Gtk;
#init Gtk::Gdk::Rgb;
#init Gtk::HTML;

##
##
##

my $TITLE = 'gHTMLTree';
my $false = 0;
my $true = 1;

my $show_tooltips = $false;

##
##
##

my $tooltips;
my $filedialog;
my $window;
my $menubar;
my $pane;
my $vbox;
my $combo;
my $tree_scroller;
my $tree;
my $notebook;
my $raw_text_scroller;
my $raw_text;
my $text_scroller;
my $text;
my $attrlist_scroller;
my $attrlist;
my $html_scroller;
my $html;
my $status;
my $status_context;

##
##
##

my $p;
my $ua = new LWP::UserAgent();

##
##
##

#Gtk::Widget->set_default_colormap(Gtk::Gdk::Rgb->get_cmap());
#Gtk::Widget->set_default_visual(Gtk::Gdk::Rgb->get_visual());

$tooltips = new Gtk::Tooltips();

$window = new Gtk::Window('toplevel');
$window->set_usize(800, 600);
$window->set_title($TITLE);
$window->signal_connect(delete_event => sub { Gtk->exit(0) });

$vbox = new Gtk::VBox($false, 5);
$window->add($vbox);
$vbox->show();

{
    my @menuitems = do {
        my $dummy = new Gtk::Entry();
        $dummy->set_usize(0, 0);
        $vbox->pack_start($dummy, $false, $false, 0);

        ( {
            path => '/_File',
            type => '<Branch>',
        },
        {
            path => '/File/_Open Location...',
            accelerator => '<control>O',
            callback => sub {
                $filedialog->show();
            },
        },
        {
            path => '/File/_Quit',
            accelerator => '<control>Q',
            callback => sub { Gtk->exit(0) },
        },
        {
            path => '/_Edit',
            type => '<Branch>',
        },
        {
            path => '/Edit/_Copy',
            accelerator => '<control>C',
            callback => sub {
                if ($combo->entry->has_focus())
                {
                    $combo->entry->copy_clipboard();
                }
                elsif ($text->has_focus())
                {
                    $text->copy_clipboard();
                }
                elsif ($raw_text->has_focus())
                {
                    $raw_text->copy_clipboard();
                }
                elsif ($tree->selection() && $tree->selection()->has_f
+ocus())
                {
                    $dummy->set_text(${$tree->selection()->get_user_da
+ta()}[0]);
                    $dummy->select_region(0, length($dummy->get_text()
+));
                    $dummy->copy_clipboard();
                }
            },
        }, );
    };

    my $factory;

    my $accel = new Gtk::AccelGroup();

    $factory = new Gtk::ItemFactory('Gtk::MenuBar', '<main>', $accel);

    $factory->create_items(@menuitems);

    $window->add_accel_group($accel);

    $menubar = $factory->get_widget('<main>');
}
$vbox->pack_start($menubar, $false, $true, 0);
$menubar->show();

$combo = new Gtk::Combo();
$vbox->pack_start($combo, $false, $false, 5);
$combo->disable_activate();
$combo->set_use_arrows($true);
$combo->set_use_arrows_always($true);
$combo->set_case_sensitive($false);
$combo->entry->signal_connect(activate => \&change_location);
$combo->list->set_selection_mode('single');
$combo->list->signal_connect(selection_changed => sub {
    $combo->entry->select_region(0, length($combo->entry->get_text));
    $combo->grab_focus();
});
$combo->show();

$pane = new Gtk::HPaned();
$vbox->pack_start($pane, $true, $true, 0);
$pane->set_handle_size(10);
$pane->set_gutter_size(8);
$pane->show();

$tree_scroller = new Gtk::ScrolledWindow();
$pane->add1($tree_scroller);
$tree_scroller->set_usize(300, 500);
$tree_scroller->set_policy('automatic', 'always');
$tree_scroller->show();

$tree = new Gtk::Tree();
$tree_scroller->add_with_viewport( $tree );
$tree->set_selection_mode('browse');
$tree->set_view_mode('line');
$tree->show();

$notebook = new Gtk::Notebook();
$pane->add2($notebook);
$notebook->set_tab_pos('bottom');
$notebook->show();

$text_scroller = new Gtk::ScrolledWindow();
$notebook->append_page($text_scroller, new Gtk::Label('HTML Source (Pr
+ocessed)'));
$text_scroller->set_policy('automatic', 'automatic');
$text_scroller->show();

$text = new Gtk::Text();
$text_scroller->add($text);
$text->set_editable($false);
$text->realize();
$text->show();

$raw_text_scroller = new Gtk::ScrolledWindow();
$notebook->append_page($raw_text_scroller, new Gtk::Label('HTML Source
+ (Raw)'));
$raw_text_scroller->set_policy('automatic', 'automatic');
$raw_text_scroller->show();

$raw_text = new Gtk::Text();
$raw_text_scroller->add($raw_text);
$raw_text->set_editable($false);
$raw_text->realize();
$raw_text->show();

$attrlist_scroller = new Gtk::ScrolledWindow();
$notebook->append_page($attrlist_scroller, new Gtk::Label('Node Attrib
+utes'));
$attrlist_scroller->set_policy('automatic', 'always');
$attrlist_scroller->show();

{
    my @titles = qw(Name Value);
    $attrlist = new_with_titles Gtk::CList(@titles);
    $attrlist_scroller->add($attrlist);
    $attrlist->set_column_width(0, 75);
    $attrlist->set_column_auto_resize($_, $true) for (0 .. $#titles);
    $attrlist->set_selection_mode('single');
    $attrlist->show();
}

#$html_scroller = new Gtk::ScrolledWindow();
#$notebook->append_page($html_scroller, new Gtk::Label('HTML Viewer'))
+;
#$html_scroller->set_policy('automatic', 'automatic');
#$html_scroller->show();

#$html = new Gtk::HTML();
#$html_scroller->add($html);
#$html->set_editable($false);
#$html->realize();
#$html->show();

$status = new Gtk::Statusbar();
$vbox->pack_end($status, $false, $false, 0);
$status_context = $status->get_context_id($TITLE);
$status->show();

$filedialog = new Gtk::FileSelection(qq{$TITLE: File Browser});
$filedialog->set_modal($true);
$filedialog->ok_button->signal_connect(clicked => sub {
    $filedialog->hide();
    change_location($combo->entry, 'file:'.$filedialog->get_filename()
+);
});
$filedialog->cancel_button->signal_connect(clicked => sub { $filedialo
+g->hide() });

unless (fork)
{
    $window->show();
    change_location($combo->entry, shift) if @ARGV;
    main Gtk;
    exit(0);
}

Gtk->_exit(0);

##
##
##

{
    my $node_message;
    my $popup_node;
    my $attrlist_handle;

    sub select_node
    {
        my ($widget) = @_;

        my @data= @{$widget->get_user_data};

        $status->remove($status_context, $node_message) if defined $no
+de_message;

        $node_message = $status->push($status_context, $data[0]);

        if (@data >= 2)
        {
            $attrlist->clear() if $attrlist->rows();

            my @attrs = @{$data[1]};

            while (@attrs and @attrs % 2 == 0)
            {
                my ($key, $value) = splice(@attrs, 0, 2);

                $attrlist->append($key, $value);
            }

            Gtk->main_iteration while (Gtk->events_pending);
        }

        return 1;
    }
}

sub change_location
{
    my ($widget, @data) = @_;

    my $url = @data ? shift(@data) : $widget->get_text;

    $url = "file:$url" unless $url =~ /^[a-z_0-9-]+:/i;

    $p->delete() if defined $p;

    $p = new HTML::TreeBuilder;
    $p->ignore_unknown($false);
    $p->store_comments($true);

    load_url($url);
}

{
    my %history;

    sub load_url
    {
        my $url = shift;

        $window->set_title($TITLE);

        $combo->entry->set_text($url);

        $history{$url} = time();

        $combo->set_popdown_strings(sort
            { $history{$b} <=> $history{$a} } keys %history);

        my $children = $tree->children;
        $tree->clear_items(0, $children - 1) if $children;

        $text->set_point(0);
        $text->forward_delete($text->get_length);

        $raw_text->set_point(0);
        $raw_text->forward_delete($raw_text->get_length);

        Gtk->main_iteration while (Gtk->events_pending);

        my $fetch_msg = $status->push($status_context, "Fetching: $url
+ - 0 kB");

        Gtk->main_iteration while (Gtk->events_pending);

        my $req = GET $url;

        #my $loader = $html->signal_connect('url_requested', get_html_
+resource_loader($url));

        #my $handle = $html->begin();

        my $handle_chunk = do {
            my $last_message;
            my $received = 0;

            sub
            {
                my ($data, $response, $proto) = @_;

                my $message;

                if ($data)
                {
                    $received += length $data;

                    $raw_text->freeze();
                    $raw_text->set_point($raw_text->get_length());
                    $raw_text->insert(undef, $raw_text->style->black, 
+undef, $data);
                    $raw_text->set_position(0);
                    $raw_text->thaw();

                    Gtk->main_iteration while (Gtk->events_pending);

                    #$html->write($handle, $data);

                    #Gtk->main_iteration while (Gtk->events_pending);

                    $p->parse($data);

                    $message = $status->push($status_context,
                        sprintf("Fetching: %s - %0.1f", $url,
                        $received / 1024).'kB');
                }

                $status->remove($status_context, $last_message)
                    if defined $last_message;

                $last_message = $message;

                Gtk->main_iteration while (Gtk->events_pending);
            };
        };

        my $res = $ua->request($req, $handle_chunk, 128);

        #$html->signal_disconnect($loader);

        # clear out the status bar
        $handle_chunk->();

        if ($res->is_success)
        {
            my $build_msg = $status->push($status_context, "Building G
+TK tree");

            $status->remove($status_context, $fetch_msg);

            Gtk->main_iteration while (Gtk->events_pending);

            build_html_tree($p, $tree);

            $status->remove($status_context, $build_msg);

            Gtk->main_iteration while (Gtk->events_pending);

            $p->eof();
            #$html->end($handle, 'ok');

            $text->set_point($text->get_length());
            $text->insert(undef, $text->style->black, undef, $p->as_HT
+ML(undef, "\t", {}));
            $text->set_position(0);

            Gtk->main_iteration while (Gtk->events_pending);
        }
        else
        {
            $status->remove($status_context, $fetch_msg);

            $p->eof();
            #$html->end($handle, 'error');

            error_dialog(qq{Failed fetching "$url"});
        }

    }
}

sub get_html_resource_loader
{
    my $url = shift;

    $url =~ s{(?<=/)[^/]*$}//;

    my $base_uri = new URI($url);

    sub
    {
        my ($html, $url, $handle) = @_;

        my $uri = new_abs URI($url, $base_uri);

        $url = $uri->as_string;

        my $req = GET $url;

        my $res = $ua->request($req, sub
            {
                local $_ = shift;
                my ($response, $proto) = @_;

                Gtk->main_iteration while (Gtk->events_pending);

                if (defined and length)
                {
                    $html->write($handle, $_);
                }
                else
                {
                    die("Error fetching $url$/");
                }
            }, 8192);

        if ($res->is_success)
        {
            $html->end($handle, 'ok');
        }
        else
        {
            warn "Error fetching $url$/";
            $html->end($handle, 'error');
        }
    };
}

sub error_dialog
{
    my ($message) = @_;

    my $dialog = new Gtk::Dialog();
    $dialog->set_modal($true);

    my $label = new Gtk::Label($message);
    $dialog->vbox->pack_start($label, $true, $true, 40);
    $label->show();

    my $button = new Gtk::Button("Close");
    $dialog->action_area->pack_start($button, $true, $true, 0);
    $button->signal_connect(clicked => sub { $dialog->hide; $dialog = 
+undef; 1; });
    $button->show();

    $dialog->show();
}

sub build_html_tree
{
    my $node = shift;
    my $branch = shift;
    my $path = shift;
    my $index = shift || 1;
    my $pos = shift || 0;

    $path .= '/';
    $path .= ref $node ? ($node->tag eq '~comment'
        ? 'comment()' : $node->tag) : 'text()';
    $path .= "[$index]";

    my $leaf;
    my $expand = 0; 

    if (ref $node)
    {
        $expand = 1 if $node->tag eq 'a';

        my $label = $node->tag;

        if ($node->tag eq 'link'
            and defined(my $rel = $node->attr('rel')))
        {
            $label .= " ($rel)";
        }

        if (defined (my $href = $node->attr('href')))
        {
            $label .= ": $href";
        }
        elsif (defined (my $src = $node->attr('src')))
        {
            $label .= ": $src";
        }

        if ($node->tag eq '~comment')
        {
            my $text = create_label($node->attr('text'));

            $label = qq{<!-- $text -->};
        }

        $leaf = new Gtk::TreeItem($label);
        $leaf->set_user_data([ $path, [ $node->all_external_attr() ] ]
+);
        $branch->append($leaf);

        if (my @kids = $node->content_list)
        {
            my $subtree = new Gtk::Tree();
            $leaf->set_subtree($subtree);

            my %child_types;
            for my $child (@kids)
            {
                my $tag;

                if (ref $child)
                {
                    $tag = $child->tag;
                }
                else
                {
                    $tag = "";

                    if ($node->tag eq 'title')
                    {
                        $window->set_title("$TITLE: $child");
                    }
                }

                $expand = build_html_tree($child, $subtree, $path,
                    ++$child_types{$tag}) || $expand;
            }

            $leaf->expand() if $expand;
        }
    }
    else
    {
        my $label = create_label($node);

        $leaf = new Gtk::TreeItem(qq{"$label"});
        $leaf->set_user_data([ $path, [] ]);
        $branch->append($leaf);

        $expand = 1;
    }

    $leaf->show();
    $leaf->signal_connect('select', \&select_node);
    $tooltips->set_tip($leaf, $path, "") if $show_tooltips;

    return $expand;
}

sub create_label
{
    my $label = shift || '';

    $label =~ s/\s+/ /g;
    $label = substr($label, 0, 97).'...'
        unless length($label) < 100;    

    $label;
}

Comment on Gtk+ HTML Tree Viewer
Download Code

Back to Code Catacombs

Login:
Password
remember me
What's my password?
Create A New User

Node Status
node history
Node Type: sourcecode [id://33351]
help
Community Ads
Chatterbox
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users
Others chilling in the Monastery: (12)
GrandFather
wfsp
atcroft
herveus
Spooty
Eyck
NodeReaper
spx2
Sisyphus
vishi83
gnosti
im2
As of 2009-11-21 09:19 GMT
Sections
The Monastery Gates
Seekers of Perl Wisdom
Meditations
PerlMonks Discussion
Categorized Q&A
Tutorials
Obfuscated Code
Perl Poetry
Cool Uses for Perl
Perl News
Information
PerlMonks FAQ
Guide to the Monastery
What's New at PerlMonks
Voting/Experience System
Tutorials
Reviews
Library
Perl FAQs
Other Info Sources
Find Nodes
Nodes You Wrote
Super Search
List Nodes By Users
Newest Nodes
Recently Active Threads
Selected Best Nodes
Best Nodes
Worst Nodes
Saints in our Book
Leftovers
The St. Larry Wall Shrine
Offering Plate
Awards
Craft
Snippets Section
Code Catacombs
Quests
Editor Requests
Buy PerlMonks Gear
PerlMonks Merchandise
Planet Perl
Perlsphere
Use Perl
Perl.com
Perl 5 Wiki
Perl Jobs
Perl Mongers
Perl Directory
Perl documentation
CPAN
Random Node
Voting Booth

Future historians will find that the material characteristic of the current era is...

Aluminium
Plastic
Oil
Water
Carbon dioxide
Copper
Iron
Silicon
Salt
Uranium
Hydrogen
Other

Results (729 votes), past polls