I'm afraid that it's hardly single regex but the following code works on nested /* /* */ */ and " ... */ ...", etc.
#!/usr/bin/perl
use Data::Dumper;
use Params::Validate qw(:types);
use strict;
use warnings;
use 5.10.0;
local $/="\n\n";
for my $in (<DATA>) {
chomp($in);
my ($out,$comment)=RemoveComments($in);
say $out;
# Now putting the comments back
for my $pos (sort { $b <=> $a } keys %$comment) {
substr($out,$pos,$comment->{$pos}{length})=$comment->{$pos}{b
+ody};
};
print $out."\n\n";
};
exit;
sub RemoveComments { # Now handles nested /* */ and --
@_=Params::Validate::validate_pos(@_,{ type=>SCALAR });
my ($in)=@_;
my (%comment,$comment_begins);
my $stackptr=0;
local *foo=sub {
@_=Params::Validate::validate_pos(@_,{ type=>SCALAR }, { type
+=>SCALAR,default=>0 });
my ($string,$forced)=@_;
if ($forced || $stackptr > 0) {
$comment{$comment_begins}{length}+=length($string);
$comment{$comment_begins}{body}.=$string;
$string=~ s{.}{ }mg;
};
return $string;
}; # foo:;
my $out='';
my $pos=0;
while ($in !~ m{\G$}cg) {
if ($in =~ m{\G((?:/\*)+)}cg) { # /*
$comment_begins=$pos
if ($stackptr == 0);
$stackptr+=length($1)/2;
$out.=foo($1);
$pos=pos($in);
} elsif ($in =~ m{\G((?:\*/)+)}cg) { # */
$out.=foo($1);
$stackptr-=length($1)/2;
$pos=pos($in);
die "Too many closing '*/'! \$stackptr($stackptr) has gon
+e negative!\n"
if ($stackptr < 0);
} elsif ($stackptr == 0 && $in =~ m{\G(--+.*$)}cgm) { # -- co
+mment not in a /* */ comment
$comment_begins=$pos;
$out.=foo($1,1);
$pos=pos($in);
} elsif ($stackptr > 0 && $in =~ m{\G(--+)}cgs) { # might be
+a -- comment but it's in a /* */ comment
$out.=foo($1);
$pos=pos($in);
} elsif ($in =~ m{\G('(?:[^']|'')*'|"(?:[^"]|"")*")}cgs) { #'
+# ' or " quoted string
$out.=foo($1);
$pos=pos($in);
} elsif ($in =~ m{\G([^'"]+?(?=\*/|/\*|--|'|"|$))}cgs) { # up
+ to /*,*/,--,',",\z
$out.=foo($1);
$pos=pos($in);
} else { # Everything should be caught in one of the cases be
+fore!
warn "WTF!";
my $pos=pos($in);
my $residue=substr($in,$pos);
die Data::Dumper->Dump([\$pos,\$residue],[qw(*pos *residu
+e)]);
};
};
return $out,\%comment;
}; # RemoveComments:
__DATA__
0/*3--6*/90123456789
01234/*789012*/56789
0/*3456*/90123456789
01--4567890123456789
01234/*789012*/56789
-- /*567890123456789
01234567890123456789
-- */567890123456789
012/*567890123456789
01234/*7890123456789
01234567890123456789
01234*/7890123456789
01*/4567890123456789
'123456/**/12345678'
0'234567--01234567'9
01234567890123456789
'123456/**/12345678'
01234567890123456789
-- /*
/* -- */ code
code
-- */
/* bah */
/*
/*
*/
*/
yada
/*
x
*/