Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

Re: Regex to strip comments

by clueless newbie (Curate)
on Oct 01, 2012 at 01:01 UTC ( [id://996564]=note: print w/replies, xml ) Need Help??


in reply to Regex to strip comments

I'm afraid that it's hardly single regex but the following code works on nested /* /* */ */ and " ... */ ...", etc.

#!/usr/bin/perl use Data::Dumper; use Params::Validate qw(:types); use strict; use warnings; use 5.10.0; local $/="\n\n"; for my $in (<DATA>) { chomp($in); my ($out,$comment)=RemoveComments($in); say $out; # Now putting the comments back for my $pos (sort { $b <=> $a } keys %$comment) { substr($out,$pos,$comment->{$pos}{length})=$comment->{$pos}{b +ody}; }; print $out."\n\n"; }; exit; sub RemoveComments { # Now handles nested /* */ and -- @_=Params::Validate::validate_pos(@_,{ type=>SCALAR }); my ($in)=@_; my (%comment,$comment_begins); my $stackptr=0; local *foo=sub { @_=Params::Validate::validate_pos(@_,{ type=>SCALAR }, { type +=>SCALAR,default=>0 }); my ($string,$forced)=@_; if ($forced || $stackptr > 0) { $comment{$comment_begins}{length}+=length($string); $comment{$comment_begins}{body}.=$string; $string=~ s{.}{ }mg; }; return $string; }; # foo:; my $out=''; my $pos=0; while ($in !~ m{\G$}cg) { if ($in =~ m{\G((?:/\*)+)}cg) { # /* $comment_begins=$pos if ($stackptr == 0); $stackptr+=length($1)/2; $out.=foo($1); $pos=pos($in); } elsif ($in =~ m{\G((?:\*/)+)}cg) { # */ $out.=foo($1); $stackptr-=length($1)/2; $pos=pos($in); die "Too many closing '*/'! \$stackptr($stackptr) has gon +e negative!\n" if ($stackptr < 0); } elsif ($stackptr == 0 && $in =~ m{\G(--+.*$)}cgm) { # -- co +mment not in a /* */ comment $comment_begins=$pos; $out.=foo($1,1); $pos=pos($in); } elsif ($stackptr > 0 && $in =~ m{\G(--+)}cgs) { # might be +a -- comment but it's in a /* */ comment $out.=foo($1); $pos=pos($in); } elsif ($in =~ m{\G('(?:[^']|'')*'|"(?:[^"]|"")*")}cgs) { #' +# ' or " quoted string $out.=foo($1); $pos=pos($in); } elsif ($in =~ m{\G([^'"]+?(?=\*/|/\*|--|'|"|$))}cgs) { # up + to /*,*/,--,',",\z $out.=foo($1); $pos=pos($in); } else { # Everything should be caught in one of the cases be +fore! warn "WTF!"; my $pos=pos($in); my $residue=substr($in,$pos); die Data::Dumper->Dump([\$pos,\$residue],[qw(*pos *residu +e)]); }; }; return $out,\%comment; }; # RemoveComments: __DATA__ 0/*3--6*/90123456789 01234/*789012*/56789 0/*3456*/90123456789 01--4567890123456789 01234/*789012*/56789 -- /*567890123456789 01234567890123456789 -- */567890123456789 012/*567890123456789 01234/*7890123456789 01234567890123456789 01234*/7890123456789 01*/4567890123456789 '123456/**/12345678' 0'234567--01234567'9 01234567890123456789 '123456/**/12345678' 01234567890123456789 -- /* /* -- */ code code -- */ /* bah */ /* /* */ */ yada /* x */

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://996564]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others cooling their heels in the Monastery: (4)
As of 2024-04-24 20:16 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found