Category: Text Processing
Author/Contact Info
Description: If you're working with RTF, sometimes you'll want to compare two RTF files to see if they're different. Traditional diff falls down here, because RTF can have all sorts of crazy whitespace, some of which is significant, some of which isn't. rtfdiff, below, rasterizes the token streams from two rtf files, and then diffs those, allowing you to easily see if two rtf files are the same :-) Tada!

# Compares a tokenized view of two RTF files

use strict;
use RTF::Tokenizer;
use Text::Diff;

my $first_file = pretty_print( $ARGV[0] );
my $second_file = pretty_print( $ARGV[1] );

print diff \$first_file, \$second_file;

sub pretty_print {

    my $filename = shift;
    my $output;

    my $tokenizer = RTF::Tokenizer->new( file => $filename );

    while (1) {

        my ( $type, $token, $argument ) = $tokenizer->get_token();

        last if $type eq 'eof';

        $argument =~ s/\n/[n]/g;
        $argument =~ s/\t/[t]/g;
        $argument =~ s/\r/[r]/g;

        $output .= "($type) $token $argument\n";


    return $output;