Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

Re: Regex to strip comments

by clueless newbie (Hermit)
on Oct 01, 2012 at 01:01 UTC ( #996564=note: print w/ replies, xml ) Need Help??


in reply to Regex to strip comments

I'm afraid that it's hardly single regex but the following code works on nested /* /* */ */ and " ... */ ...", etc.

#!/usr/bin/perl use Data::Dumper; use Params::Validate qw(:types); use strict; use warnings; use 5.10.0; local $/="\n\n"; for my $in (<DATA>) { chomp($in); my ($out,$comment)=RemoveComments($in); say $out; # Now putting the comments back for my $pos (sort { $b <=> $a } keys %$comment) { substr($out,$pos,$comment->{$pos}{length})=$comment->{$pos}{b +ody}; }; print $out."\n\n"; }; exit; sub RemoveComments { # Now handles nested /* */ and -- @_=Params::Validate::validate_pos(@_,{ type=>SCALAR }); my ($in)=@_; my (%comment,$comment_begins); my $stackptr=0; local *foo=sub { @_=Params::Validate::validate_pos(@_,{ type=>SCALAR }, { type +=>SCALAR,default=>0 }); my ($string,$forced)=@_; if ($forced || $stackptr > 0) { $comment{$comment_begins}{length}+=length($string); $comment{$comment_begins}{body}.=$string; $string=~ s{.}{ }mg; }; return $string; }; # foo:; my $out=''; my $pos=0; while ($in !~ m{\G$}cg) { if ($in =~ m{\G((?:/\*)+)}cg) { # /* $comment_begins=$pos if ($stackptr == 0); $stackptr+=length($1)/2; $out.=foo($1); $pos=pos($in); } elsif ($in =~ m{\G((?:\*/)+)}cg) { # */ $out.=foo($1); $stackptr-=length($1)/2; $pos=pos($in); die "Too many closing '*/'! \$stackptr($stackptr) has gon +e negative!\n" if ($stackptr < 0); } elsif ($stackptr == 0 && $in =~ m{\G(--+.*$)}cgm) { # -- co +mment not in a /* */ comment $comment_begins=$pos; $out.=foo($1,1); $pos=pos($in); } elsif ($stackptr > 0 && $in =~ m{\G(--+)}cgs) { # might be +a -- comment but it's in a /* */ comment $out.=foo($1); $pos=pos($in); } elsif ($in =~ m{\G('(?:[^']|'')*'|"(?:[^"]|"")*")}cgs) { #' +# ' or " quoted string $out.=foo($1); $pos=pos($in); } elsif ($in =~ m{\G([^'"]+?(?=\*/|/\*|--|'|"|$))}cgs) { # up + to /*,*/,--,',",\z $out.=foo($1); $pos=pos($in); } else { # Everything should be caught in one of the cases be +fore! warn "WTF!"; my $pos=pos($in); my $residue=substr($in,$pos); die Data::Dumper->Dump([\$pos,\$residue],[qw(*pos *residu +e)]); }; }; return $out,\%comment; }; # RemoveComments: __DATA__ 0/*3--6*/90123456789 01234/*789012*/56789 0/*3456*/90123456789 01--4567890123456789 01234/*789012*/56789 -- /*567890123456789 01234567890123456789 -- */567890123456789 012/*567890123456789 01234/*7890123456789 01234567890123456789 01234*/7890123456789 01*/4567890123456789 '123456/**/12345678' 0'234567--01234567'9 01234567890123456789 '123456/**/12345678' 01234567890123456789 -- /* /* -- */ code code -- */ /* bah */ /* /* */ */ yada /* x */


Comment on Re: Regex to strip comments
Download Code

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://996564]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others browsing the Monastery: (22)
As of 2015-07-07 15:15 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (90 votes), past polls