#!/usr/bin/perl use warnings; use strict; use diagnostics; print "Find pair of entities without/with separating space\n"; # read input file and param ('mod' = for modified files) my ($infile, $param) = @ARGV; my @regexes = $param ? (qr/–§/, qr/–Ü/, qr/ߧ/) : (qr/&[^;]+;\s&[^;]+;/); open my $in, '<', $infile or die "Cannot open $infile for reading: $!"; #read input file in variable $xml my $xml; { local $/ = undef; $xml = <$in>; } #define output file open my $out, '>', 'pairs.txt' or die $!; #output statistics print {$out} "Find pair of entities without/with separating space\n\ninput file: "; print {$out} "$infile"; print {$out} "\n========================================================================\n\n"; for my $i (0 .. $#regexes) { my $regex = $regexes[$i]; $regex =~ s/^\(\?\^://; $regex =~ s/\)$//; print {$out} "$regex: $1\n" while $xml =~ /$regex/g; } close $in; close $out;