$/ = \384; # 2K blocks my $blocksz = 384; my $final = ''; my $block = ''; my $lastfound = 0; my $blocklen = 0; open IN, "perlmonks66_tmp.txt"; open OUT, ">>pm66out.txt"; #more efficient when parsing more than one block #Extract the necessary data bits my $block01 = ; my $block02 = ''; while ($block02 = ) { # if pattern matches my $tmphold = ''; do{ $block = $block01.$block02; $block02 = ''; pos($block) = 0; # position pointer at beginning $blocklen = length($block); $block =~ m/\G # pick up where you left off (.*) # match zero or more chars up to ... 11110100 # match byte marker (0x06D4) ( 1 byte) .{8} # match any seq of 8 chars ( 1 byte) (.{1520}) # capture the next 1520 chars (190 bytes) 11110100 # match byte marker (0x06D4) ( 1 byte) .{8} # match any seq of 8 chars ( 1 byte) (.{464}) # capture next 464 chars ( 58 bytes) .{1056} # matching any seq of 1056 chars (132 bytes) /xg ; if(defined(pos($block))){ my ($tmp1,$tmp2,$tmp3) = ($1,$2,$3); # match found ==> use parts $tmphold .= join('',map { $_ ||= '' } ($tmp1,$tmp2,$tmp3)); $block01 = substr($block,-($blocklen-$blocksz)); }else{ if(length($block) >= 2*$blocksz){ $tmphold .= substring($block,0,$blocksz); $block01 = substr($block,-($blocklen-$blocksz)); }else{ $block01 = $block; $block = ''; } } }while(length($block) >= $blocksz); # save the remaining unmatched/unchecked chars $final = $tmphold; print OUT "$final"; $final = ''; # this is strictly unnecssary but does keep variable clean } $final = pack("B*", $block01); # get final block print OUT "$final"; close OUT; close IN; exit;