#!/usr/bin/perl use strict; use warnings; use File::Spec; # IDENTIFY OS my $OS; if ($^O =~ /mswin/i) {$OS = "Windows";print "OS detected: Windows\n"} elsif ($^O =~ /linux/i) {$OS = "Linux";print "OS detected: Linux\n"} elsif ($^O =~ /darwin/i) {$OS = "Mac";print "OS detected: Mac OS X\n"} else {print "\nUnable to detect OS type, choose your OS:\n\nWindows Any version of Microsoft Windows\nMac Any flavour of Mac OS X\nLinux Linux of some sort\n\n"; do { chomp ($OS = ); print "\nIncorrect OS type. Try again.\n\n" unless $OS eq "Windows" or $OS eq "Mac" or $OS eq "Linux";} until ($OS eq "Windows" or $OS eq "Mac" or $OS eq "Linux"); } # IDENTIFY SCRIPT PATH my $script = File::Spec->rel2abs( __FILE__ ); $script =~ /(.*)[\/|\\](.*)/; my $scriptpath = $1; if (-d "$scriptpath/scripts/docx2txt") { # print "\nScript folder found.\n";# comment out } else { do { print "\nThe script path found automatically (${scriptpath}) is not correct.\nPlease drag and drop the aligner script here and press enter. (If your OS doesn't support drag & drop, copy-paste the path here. You can paste by right clicking in the window or right clicking the icon in the top left corner of this window.)\n"; chomp ($script = ); $script =~ / *[\"\'](.*)[\/\\](.*)[\"\'] */; $scriptpath = $1; $scriptpath =~ s/^\s+//; # strip leading whitespace $scriptpath =~ s/\s+$//; # strip trailing whitespace if (-e "$scriptpath/scripts/docx2txt") {print "\nScript folder identified correctly.\n"} } until (-e "$scriptpath/scripts/docx2txt"); } # DRAG AND DROP INPUT FILE my $file1_full; print "\n\nDrag and drop your input file here and press enter.\n"; chomp ($file1_full = ); $file1_full =~ s/^\s+//; # strip leading whitespace $file1_full =~ s/\s+$//; # strip trailing whitespace $file1_full =~ /^[\"\']?(.*)[\/\\]([^\"\']*)[\"\']?$/; my $folder = $1; my $file1 = $2; $file1 =~ /(.*)\.(.*)/; my $f1 = $1; my $ext = lc($2); # CONVERT DOCX TO UTF-8 TXT if ($OS eq "Windows") { # create config file, run docx2txt.exe modded to use win config file open (DOCX2TXTCONFIG, "<", "$scriptpath/scripts/docx2txt/docx2txt.config") or die "Can't open file: $!"; unlink "$scriptpath/scripts/docx2txt/docx2txt_win.config"; open (DOCX2TXTCONFIG_WIN, ">>", "$scriptpath/scripts/docx2txt/docx2txt_win.config") or die "Can't open file: $!"; while () { s/^unzip *=>.*$/unzip => \'$scriptpath\\scripts\\docx2txt\\unzip\\unzip\.exe\',/; print DOCX2TXTCONFIG_WIN $_; } close DOCX2TXTCONFIG; close DOCX2TXTCONFIG_WIN; system ("\"$scriptpath\\scripts\\docx2txt\\docx2txt_win.exe\" \"$folder/$file1\" \"$folder/${f1}.txt\""); } else { # linux and mac both use the original docx2txt.pl and both have unzip at usr/bun/unzip system ("perl \"$scriptpath/scripts/docx2txt/docx2txt.pl\" \"$folder/$file1\" \"$folder/${f1}.txt\""); } #work with the txt file from now on $file1 = "${f1}.txt"; # CHECK FILE SIZE, ABORT IF 0 my $file_1_size = -s "$folder/$file1"; if ($file_1_size == 0) { print "\n\nThe file conversion seems to have failed: the generated file is empty. ABORTING.\n\n"; sleep 3; die; } # DONE print "\n$file1 created ($file_1_size bytes).\nPress enter to quit.\n"; ;