Beefy Boxes and Bandwidth Generously Provided by pair Networks
Your skill will accomplish
what the force of many cannot
 
PerlMonks  

Recursive file processing from a path and printing output in a file

by Vijay81 (Acolyte)
on Dec 08, 2012 at 18:02 UTC ( #1007912=perlquestion: print w/ replies, xml ) Need Help??
Vijay81 has asked for the wisdom of the Perl Monks concerning the following question:

Hi All, Am trying to write a perl script which reads the path and searches file recursive from a directory and sub directories and process the file print the xml output in a file.

Basically, 1. Read the path and search through directories/sub directories for a file, if file found then process the file and pass the file content to xml format and print them in a file. I have all the perl scripts as a bit's and pieces but don't know how to put them in a in single script and get the expected output.

In details : path give say : "C:\test" Dir1 -- inside Dir1 there is one more sub directories say Dir2 Dir2 -- inside Dir2 there 2 more sub directories say Dir2a/Dir2b (Each of the sub directories Dir2a/b has 2 files each) Dir3 -- inside Dir3 there is 2 more sub directories say Dir3a (Each of the sub directories Dir3a has 2 files each)

now perl script should read directories/sub directories and file names, as names will be passed to xml format before printing. when ever files found, content of file(space delimted) should be replaced by comma delimeter before passing the values of file content to xml format.

I really appreaciate if someone will show me how get to expected result shown below. i really want to know more about for loop/while loop in perl. am still in learing phase so i need this to be done but as i said i am really putting effort to learn perl. Many Thanks in advance

script which i have so far :
#! /usr/bin/perl use strict; use warnings; use File::Basename; use File::Find; use Data::Dumper; use Class::CSV; use Text::CSV; use XML::TreeBuilder; use Data::Dumper; my ($csv,$inputfilename,$resultfile,$fh,$line,$dir,$fp,$base_dir,$dh,$ +file); my (@dir,@dir_names); $base_dir = 'C:\test'; @dir = $base_dir; # logic to travel through path and gets the directory names(just name) while (@dir) { $dir = pop(@dir); opendir($dh, $dir); while($file = readdir($dh)) { next if $file eq '.'; next if $file eq '..'; $file = "$dir/$file"; if (-d $file) { push(@dir, $file); push(@dir_names, basename($file)); } else { } } } #print scalar @dir_names; my $dir_count = scalar @dir_names; print "The no of folder persent in the path $dir_count"; print "\n"; #logic to travel through given same path and gets the file names (just + name of the file) print "<!-- ********************* Printing files names *************** +***********-->\n"; print "\n"; my $dirfile = 'C:\Test'; find({ wanted => \&process_file, no_chdir => $dir_count }, $dirfile); sub process_file { if (-f $_) { #print "This is a file: $_\n"; $file = $_; $file =~ s/.*\///; #$file =basename($file); $file =~ s/\.[^.]+$//; #print "FileName without path [$file]\n"; my @filenames =$file; print @filenames; print "\n"; } } print "<!-- ********************************************************** +******-->\n"; #logic to replace all space to comma delimited from a file,but this lo +gic should be placed once if file is found from above directory searc +h... # below $dir path is just to for checking purpose.. $dir = $base_dir.'/'.$dir_names[0].'/'.$dir_names[1].'/'.$dir_names[2] +; #print $dir; open $resultfile, '>>', 'C:\Test' or die "Can't open file: $!"; #foreach (@dir_names){ foreach $fp (glob("$dir/*.bat")) { open ($fh, "<", $fp) or die "can't read open '$fp':"; while ($line = <$fh>) { $line=~ s/ /,/g; print $resultfile $line; print $resultfile "\n"; } close $fh or die "can't read close '$fp':"; }

All the input files will have the same format and same number of column's

EX: input file :
start putty -ssh -W 13633 1.0.7.1 -l maxi -qaq sooter

Expected output will be:

# finally the xml structure to print/output in a file <configuration version="0.7.1.136" savepassword="True"> <root type="database" name="DIR1 NAME" expanded="True"> <container type="folder" name="DIR2 NAME" expanded="True"> <container type="folder" name="DIR2A NAME" expanded="True"> <connection type="PuTTY" name="DIR2A's-FILE1 NAME"> <connection_info> <name>DIR2A's-FILE NAME</name> <protocol>SSH</protocol> <host>FILE1's COLUMN[6]</host> <port>FILE1's COLUMN[5]</port> <session>Default Settings</session> <commandline>FILE1's COLUMN[10]</commandline> <description /> </connection_info> </connection> <connection type="PuTTY" name="DIR2A's-FILE2 NAME"> <connection_info> <name>DIR2A's-FILE NAME</name> <protocol>SSH</protocol> <host>FILE1's COLUMN[6]</host> <port>FILE1's COLUMN[5]</port> <session>Default Settings</session> <commandline>FILE1's COLUMN[10]</commandline> <description /> </connection_info> </connection> <container type="folder" name="DIR2B's NAME" expanded="False"> <connection type="PuTTY" name="DIR2B's-FILE1 NAME"> <connection_info> <name>DIR2B's-FILE1 NAME</name> <protocol>SSH</protocol> <host>FILE1's COLUMN[6]</host> <port>FILE1's COLUMN[5]</port> <session>Default Settings</session> <commandline>FILE1's COLUMN[10]</commandline> <description /> </connection_info> </connection> <connection type="PuTTY" name="DIR2B's-FILE2 NAME"> <connection_info> <name>DIR2B's-FILE NAME</name> <protocol>SSH</protocol> <host>FILE2's COLUMN[6]</host> <port>FILE2's COLUMN[5]</port> <session>Default Settings</session> <commandline>FILE2's COLUMN[10]</commandline> <description /> </connection_info> </connection> <container type="folder" name="DIR3's NAME" expanded="False"> <connection type="PuTTY" name="DIR3B's-FILE1 NAME"> <connection_info> <name>DIR3B's-FILE1 NAME</name> <protocol>SSH</protocol> <host>FILE1's COLUMN[6]</host> <port>FILE1's COLUMN[5]</port> <session>Default Settings</session> <commandline>FILE1's COLUMN[10]</commandline> <description /> </connection_info> </connection> <connection type="PuTTY" name="DIR3B's-FILE2 NAME"> <connection_info> <name>DIR3B's-FILE NAME</name> <protocol>SSH</protocol> <host>FILE2's COLUMN[6]</host> <port>FILE2's COLUMN[5]</port> <session>Default Settings</session> <commandline>FILE2's COLUMN[10]</commandline> <description /> </connection_info> </connection> </container> </container> </root> </configuration>

Comment on Recursive file processing from a path and printing output in a file
Select or Download Code
Re: Recursive file processing from a path and printing output in a file
by CountZero (Bishop) on Dec 08, 2012 at 19:36 UTC
    Examine File::Find::Rule and more specifically the start and match methods.

    The logic of your program can then be simplified as follows:

    use Modern::Perl; use File::Find::Rule; my $start_dir = 'c:/Test'; my $rule = File::Find::Rule->file->start($start_dir); while ( defined( my $file = $rule->match ) ) { # process $file ... }
    Why do you need to replace the spaces in the files you found to comma's? I cannot see any good reason for that.

    To transform your files into XML format, several XML modules exist, such as XML::Writer. Writing XML is not simple, there are many rules to follow and these modules take care of it.

    CountZero

    A program should be light and agile, its subroutines connected like a string of pearls. The spirit and intent of the program should be retained throughout. There should be neither too little or too much, neither needless loops nor useless variables, neither lack of structure nor overwhelming rigidity." - The Tao of Programming, 4.1 - Geoffrey James

    My blog: Imperial Deltronics
Re: Recursive file processing from a path and printing output in a file
by davido (Archbishop) on Dec 09, 2012 at 01:02 UTC

    Reading your directory structure with readdir and with File::Find is not quite what I had in mind when I answered your previous question. Everything you're doing in the while loop could be attached to your wanted function, reducing the overall work and complexity of your script. If your 'wanted' is getting to complicated, just break it out into smaller chunks of logic. Have you read the documentation for File::Find yet?

    You (the programmer) are working twice as hard as you need to (and so is your script).


    Dave

Re: Recursive file processing from a path and printing output in a file
by Vijay81 (Acolyte) on Dec 09, 2012 at 21:14 UTC

    Thanks guys for you inputs.

    I now able to read directories and sub directoires and able to print the xml output file.

    made some cleanup work on my inital script so it will be much more eaiser to read and follow.

    problem now is it's not recusively going through sub-directoires and printing th xml output. It just reads first sub-directories file1 content and print just that as output

    Can anyone tell me which loop i am missing out

    Script:
    #! /usr/bin/perl use strict; use warnings; use File::Basename; use File::Find; use Class::CSV; use Text::CSV; use XML::TreeBuilder; use Data::Dumper; #variable/arrays declartions my ($csv,$inputfilename,$resultfile,$fh,$line,$dir,$fp,$base_dir,$dh,$ +file,$mycontainerless,$paramsfile,$connection,$resultxmlfile,$config) +; my ($tree,$container,$row,$contree,$index,$tag,$tagname); my (@dir,@dir_names,@filenames,@taglist,@columns); #path declartions $base_dir = 'C:\Test'; $paramsfile = 'C:\Test\input.csv'; @dir = $base_dir; #read through the given directory path to look for dir/subdir's while (@dir) { $dir = pop(@dir); opendir($dh, $dir); while($file = readdir($dh)) { next if $file eq '.'; next if $file eq '..'; $file = "$dir/$file"; if (-d $file) { push(@dir, $file); push(@dir_names, basename($file)); } elsif (-f $file) { $file =~ s/.*\///; $file =~ s/\.[^.]+$//; push(@filenames, $file); } } } &write_datato_csvfile; #sleep(25); print "<!-- ************************************************** +**************-->\n"; print @dir_names; print "\n"; print "<!-- ************************************************** +**************-->\n"; print @filenames; print "\n"; print "<!-- ************************************************** +**************-->\n"; &write_output_xml; # sub functions #sub function to write input file data into csv format sub write_datato_csvfile{ open $resultfile, '>>', 'C:\Test\input.csv' or die "Can't open file: $ +!"; foreach $fp (glob("$dir/*.bat")) { print $fp; print "\n"; open ($fh, "<", $fp) or die "can't read open '$fp':"; while ($line = <$fh>) { $line=~ s/ /,/g; print $resultfile $line; print $resultfile "\n"; } close $fh or die "can't read close '$fp':"; } #close $fh or die "can't read close '$fp':"; } #sub function to write xml format in output file sub write_output_xml{ open ($resultxmlfile, '>>', 'C:\Test\resultxml.xml') or die "Can't ope +n file: $!"; print $resultxmlfile '<?xml version="1.0" encoding="utf-16"?>'; print $resultxmlfile "\n<!-- ************************************* +***************************-->\n"; print $resultxmlfile "<!-- * + *-->\n"; print $resultxmlfile "<!-- * PuTTY Configuration Manager save file + - All right reserved. *-->\n"; print $resultxmlfile "<!-- * + *-->\n"; print $resultxmlfile "<!-- *************************************** +*************************-->\n"; print $resultxmlfile "<!-- The following lines can be modified at +your own risks. -->\n"; #&write_datato_csvfile; #sleep(30); #my $time = 15; #while($time--){ #sleep(1); #} $resultfile = 'C:\Test\input.csv'; my $csv = Text::CSV->new(); open (CSV, "<", $resultfile) or die $!; print "Reading input csv file.... \n"; while (<CSV>) { next if ($. == 0); if ($csv->parse($_)) { @columns = $csv->fields(); #print "$columns[1] \n"; } else { my $err = $csv->error_input; print "Failed to parse line: $err"; } print $resultxmlfile <<EOF <configuration version="0.1.1.2" savepassword="True"> <root type="database" name="$dir_names[0]" expanded="True"> <container type="folder" name="$dir_names[1]" expanded="True"> <container type="folder" name="$dir_names[2]" expanded="Tr +ue"> <connection type="PuTTY" name="$filenames[0]"> <connection_info> <name>$filenames[0]</name> <protocol>SSH</protocol> <host>$columns[5]</host> <port>$columns[4]</port> <session>Default Settings</session> <commandline>$columns[9]</commandline> <description /> </connection_info> <login> </login> <password /> <prompt /> </login> <timeout> <connectiontimeout>1000</connectiontimeout> <logintimeout>750</logintimeout> <passwordtimeout>750</passwordtimeout> <commandtimeout>750</commandtimeout> </timeout> <command> <command1 /> <command2 /> <command3 /> <command4 /> <command5 /> </command> <options> <loginmacro>False</loginmacro> <postcommands>False</postcommands> <endlinechar>10</endlinechar> </options> </connection> EOF ; } close CSV; print $resultxmlfile "</root>\n"; print $resultxmlfile "</configuration>\n"; #close CSV; print "Output xml file has been generated successfully.. \n"; }

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlquestion [id://1007912]
Approved by davido
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others contemplating the Monastery: (7)
As of 2014-12-27 15:06 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (177 votes), past polls