Beefy Boxes and Bandwidth Generously Provided by pair Networks
Just another Perl shrine
 
PerlMonks  

Read a bin file and extract data

by hiX0r (Acolyte)
on Mar 22, 2012 at 14:29 UTC ( #961032=perlquestion: print w/ replies, xml ) Need Help??
hiX0r has asked for the wisdom of the Perl Monks concerning the following question:

I have a binary file, generated in Windows XP (not 64), and copied it (via script, no modifications done) to a linux machine. Here I try to read in the file via this programm, which I found on the net:
my ($orf_file) = @_; my $record_length = 230; open ORF_DATA, "<$orf_file" or die "Can't open $orf_file: $!\n"; binmode ORF_DATA; # Tell Perl this isn't a text file my $buffer; while (read ORF_DATA, $buffer, $record_length) { my ($orf_name, $orf_left, $orf_right, $orf_direction, $orf_descr +) = unpack("x5x4A11x4x4x1a8x1a8x4A1x4x14x4x5x1x8x4A50", $buffer +); foreach my $coordinate ($orf_left, $orf_right) { $coordinate = unpack("d", reverse($coordinate)); } print "$orf_name, $orf_left, $orf_right, $orf_direction, $orf_de +scr\n"; } read ORF_DATA, $buffer, 5; close ORF_DATA;
But it gives me that output:
, -8.57210024934386e+303, 5.05923221341436e-321, , ^@&#65533;&#65533;& +#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#6553 +3;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#6 +5533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533; +&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#655 +33;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&# +65533;&#65533;&#65533;&#65533; &#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#6553 +3;&#65533;&#65533;, -nan, -nan, &#65533;, &#65533;&#65533;&#65533;&#6 +5533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533; +&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#655 +33;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&# +65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533 +;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65 +533;&#65533;&#65533;&#65533; &#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#6553 +3;&#65533;&#65533;, -nan, -nan, &#65533;, &#65533;&#65533;&#65533;&#6 +5533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533; +&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#655 +33;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&# +65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533 +;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65 +533;&#65533;&#65533;&#65533; &#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#6553 +3;&#65533;&#65533;, -nan, -nan, &#65533;, &#65533;&#65533;&#65533;&#6 +5533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533; +&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#655 +33;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&# +65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533 +;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65 +533;&#65533;&#65533;&#65533; &#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#6553 +3;&#65533;&#65533;, -nan, -nan, &#65533;, &#65533;&#65533;&#65533;&#6 +5533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533; +&#65533;R^@o^@o^@t^@ ^@E^@n^@t^@r^@y , 0, 0, , , 0, 0, , ^@^@^@^@^@&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&# +65533;&#65533;&#65533;&#65533;&#65533;&#65533; , 4.17271145058687e-317, 4.26518227762202e-135, , ^@^@^@^@^@^@^@^@^@^@ +^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^X^@^A^@&#65533;&#65533;&#65533;&#6 +5533;&#65533;&#65533;&#65533;&#65533;&#65533;^@^@^@Zp&#65533;3&#65533 +;"&#65533; ^@^@^@^@^@^@^@*^@^A^A, 3.10794187231649e-312, -1.4360283086302e+92, , +^@a^@ ^@P^@r^@o^@c^@e^@s^@s^@i^@n^@g^@-^@2 ^@ ^@^@^@^R^@^@^@&#65533;, 1.94693962667949e-308, 1.2882298355029 +3e-231, ^S, ^@^@^@^^^@^@^@^_^@^@^@ ^@^@^@!^@^@^@"^@^@^@#^@^@^@$^@^@^@ +%^@^@^@&^@^@^@'^@^@^@(^@^@^@) ^@^@^@C^@^@^@&#65533;&#65533;&#65533;&#65533;, 2.14321574923559e-312, +4.01990882735322e-310, , ^@W^@^@^@X^@^@^@Y^@^@^@Z^@^@^@[^@^@^@\^@^@^@ +]^@^@^@^^@^@^@_^@^@^@`^@^@^@a^@^@^@b^@^@^@c ^@|^@^@^@}^@^@^@~, 2.55894867708661e-307, 1.80715861907036e+159, o, ^@ +^@^@*^@^A^A^C^@^@^@^S^@^@^@&#65533;^@^@^@&#65533;v&#65533;&#65533;&#6 +5533;5&#65533;^Q&#65533;&#65533;^@&#65533;O&#65533;&#65533;&#65533;^@ +^@^@^@&#65533;&#65533;&#65533;I&#65533;^A&#65533;&#65533;&#65533; &#65533;&#65533;&#1153;$&#65533;^Q&#65533;^O^@^D, -9.68134237028327e+1 +48, -1.56966257731664e+146, , ^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^ +@^@^@^@^@^@^@^@^@^@^\^@^A^A&#65533;&#65533;&#65533;&#65533;&#65533;&# +65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533 +;&#65533;&#1153; ^@^@^@^@^@^@^@^@^@^\, 7.29112201821461e-304, -3.79584680120728e-303, , + ^@a^@t^@u^@s^@ ^@I^@n^@f^@o^@r^@m^@a^@t^@i^@o^@n ^@n, 0, 4.30034738140221e-320, , I&#65533;^A^@^@^@^@^@^@^@^@^@^@^@^@H^ +@S^@4^@0^@ ^@P^@a^@r^@a^@m^@e^@t^@e^@r^@s ^@S^@-^@X^@ ^@P, 1.7801962549761e-306, 0, , Y&#65533;^Q&#65533;v^@^D&# +65533;Q&#65533;&#65533;^@^@^@^@&#65533;&#65533;&#65533;I&#65533;^A&#6 +5533;&#65533;&#65533;I&#65533;^A^@^@^@^@^@^@^@^@^@^@^@^@A^@O^@C ^H&#65533;^A&#65533;&#65533;&#65533;I&#65533;^A, 1.5066387057954e-312, + 7.94196901705514e+115, r, ^@^@^@^@^@6^@^A^A&#65533;&#65533;&#65533;& +#65533;&#65533;&#65533;&#65533;&#65533;_^@^@^@&#65533;&#65533;&#65533 +;^Qrg&#65533;^Q&#65533;;^@^D&#65533;Q&#65533;i^@^@^@^@&#65533;&#65533 +;&#65533;I&#65533;^A&#65533; ^@a&#65533;&#935;R8&#65533;^Q&#65533;^K, 3.00306280016903e-306, -1.758 +31693470781e-304, , ^@g^@ ^@O^@r^@i^@g^@i^@n^@a^@l^@^@^@^@^@^@^@^@^@^ +@^@8^@^A^A^M^@^@^@^Q^@^@^@E^@^@^@a&#65533;&#65533; ^@-^@1, -nan, -7.08341916115841e-227, &#65533;, ^@a^@t^@a^@ ^@P^@r^@o^ +@c^@e^@s^@s^@i^@n^@g^@ ^@O^@r^@i^@g^@i^@n^@a^@l^@-^@2 ^@s^@i^@n^@g, 1.3351101829727e-306, 4.86173068582902e-63, &#65533;, &# +65533;&#65533;I&#65533;^A^@^@^@^@^@^@^@^@^@^@^@^@C^@h^@r^@o^@m^@a^@t^ +@o^@g^@r^@a^@m^@ ^@P^@a^@r ^@C^@ ^@B^@a^@t, 8.0109992628362e-307, 0, , sA(&#65533;^Q&#65533;&#655 +33;^@^D&#65533;Q&#65533;&#65533;^@^@^@^@&#65533;&#65533;&#65533;I&#65 +533;^A&#65533;&#65533;&#65533;I&#65533;^A^@^@^@^@^@^@^@^@^@^@^@^@G^@C I&#65533;^A&#65533;&#65533;&#65533;I, 2.65513496623009e-317, 3.1891827 +709762e+231, o, ^@^@^@^@^@^@^@^\^@^A^@&#65533;&#65533;&#65533;&#65533 +;&#65533;&#65533;&#65533;&#65533;^\^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@ +^@^@^@^@^@^@&#65533;&#65533;&#65533;I&#65533; &#65533;&#65533;&#65533;, 0, 0, , ^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@ +^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^Z^@^B^A^A^@^@^@^W^@^@^@&#65533;&#65533 +;&#65533;&#65533; , -nan, 0, , ^@e^@d^@d^@i^@n^@g^@ ^@3^@3 , 0, 0, ^A, ^A&#65533;&#65533;&#65533;I&#65533;^A^@^@^@^@^@^@^@^@^@^@^ +@^@&#65533;&#65533;&#65533;&#65533;^B^@^@^@^C^@^@^@^D^@^@^@^E^@^@^@^F +^@^@^@^G^@^@^@^H ^@!^@^@^@"^@^@^@#, 6.11895311342037e-308, 5.07588444990605e-116, +, ^@ +^@^@&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;8 +^@^@^@9^@^@^@:^@^@^@;^@^@^@<^@^@^@=^@^@^@&#65533;&#65533;&#65533;&#65 +533;&#65533;&#65533;&#65533;&#65533;@^@^@^@A ^@^@^@[^@^@^@\, 2.03711595980591e-312, 5.32366304162584e-310, , &#6553 +3;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#6 +5533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;&#65533;s^@^@^@t +^@^@^@u^@^@^@&#65533;&#65533;&#65533;&#65533;w^@^@^@x^@^@^@y^@^@^@&#6 +5533;&#65533;&#65533;&#65533;{ SU6^@^@^@^@^@Ad, 0, 0, >, &#65533;&#65533;I&#65533;^A^B^@^@^@2.30.00 +SU6^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^Q , 4.1995579896506e-322, 1.39783876650346e+54, , ^@^@^@^@^@^@^@^@^@^@^@ +&#65533;?^@^@&#65533;?^@^@&#65533;?^@^@&#65533;?^@^@&#65533;?^@^@&#65 +533;?^@^@&#65533;?^@^@&#65533;?^@^@&#65533;?^@^@&#65533;? @^A, 0, 1.63096010348654e-319, &#65533;, etra_UNK-0018_16.03.2012_1.gc +d , 0, 0, , ion\Data\Project1\gcm\GC1_Hydrogen Method_100m_sp2 , 0, 0, , , 0, 0, , ^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@ +^@^@^@^@^@^@^@^@^@^@^@^@C:\GCsol , 0, 0, , , 0, 0, , , 0, 0, , gcm\GC1_Hyd, 1.14579591627385e+195, 1.13242004846932e+248, r, \GCsolution, 3.35295120276734e+204, 1.02942023076379e-71, , , 0, 0, , , 0, 0, , etc.
If I convert the bin file in a windows programm i get output like this:
[Header] Data File Name C:\Dokumente und Einstellungen\fsc\Desktop\20110919\ +LaborHoppe_P1_UNK-0010_09.09.2011_1.gcd Output Date 20.09.2011 Output Time 10:45:33 [File Information] Type Data File Generated 09.09.2011 19:22:14 Generated by Admin Modified 20.09.2011 10:42:10 Modified by Admin [Sample Information] Operator Name Admin Acquisition Date 09.09.2011 19:24:27 Type Unknown Level 1 Sample Name LaborHoppe_P1 Sample ID UNK-0010 ISTD Amount1 1 ISTD Amount2 1 ISTD Amount3 1 ISTD Amount4 1 ISTD Amount5 1 ISTD Amount6 1 ISTD Amount7 1 ISTD Amount8 1 ISTD Amount9 1 ISTD Amount10 1 ISTD Amount11 1 ISTD Amount12 1 ISTD Amount13 1 ISTD Amount14 1 ISTD Amount15 1 ISTD Amount16 1 Sample Amount 1 Dilution Factor 1 Vial# 10 Injection Volume 2 Injection Count 1 Bar Code [Original Files] Data File C:\GCsolution\Data\Project1\LaborHoppe_P1_UNK-0010_09.09. +2011_1.gcd Method File C:\GCsolution\Data\Project1\gcm\GC1_Hydrogen Method_100 +m_sp2560_integration_UNKNOWN_782.gcm Batch File C:\GCsolution\Data\Project1\g110909.gcb Report Format File C:\GCsolution\System\DEFAULT.gcr [File Comment] [Configration] Instrument Name Instrument1 Instrument # 1 Line # 1 [Peak Table (Ch1)] # of Peaks 26 Peak# R.Time I.Time F.Time Area Height A/H Conc. + Mark ID# Name k' Plate # Plate Ht. Tailing Re +solution Sep.Factor 1 5,332 5,292 5,399 1048 462 0,02 0,00000 + 1 C14:0 0,000 131066,080 0,000 1,208 0,000 0, +000 2 6,238 6,178 6,378 72242 26891 0,03 0,00000 + 2 C16:0 0,170 144557,069 0,000 1,449 14,585 + 0,000 3 6,498 6,479 6,595 419 93 0,04 0,00000 + 3 C16:1n7t 0,219 0,000 0,000 0,000 0,000 1,288 4 6,734 6,689 6,769 2583 1039 0,02 0,00000 + 4 C16:1n7 0,263 159918,329 0,000 0,000 0,000 + 1,202 5 7,549 7,489 7,752 56859 16389 0,03 0,00000 + 5 C18:0 0,416 141705,713 0,000 1,787 11,077 + 1,582 6 7,953 7,905 8,062 971 258 0,04 0,00000 + 6 C18:1t 0,492 88990,255 0,000 1,516 4,337 1,1 +82 7 8,135 8,072 8,319 42929 11611 0,04 0,00000 + 7 C18:1n9 0,526 174140,874 0,000 1,965 1,976 + 1,069 8 8,496 8,452 8,618 910 186 0,05 0,00000 + 8 C18:2n6tt 0,593 38673,668 0,000 2,000 2,886 +1,129 9 8,661 8,618 8,692 16 9 0,02 0,00000 9 + C18:2n6ct 0,624 464832,815 0,000 0,792 1,481 1, +052 10 8,853 8,732 8,889 448 88 0,05 0,00000 + 10 C18:2n6tc 0,660 302521,825 0,000 0,648 3,337 + 1,058 11 8,987 8,918 9,135 20278 6647 0,03 0,00000 + 11 C18:2n6 0,685 229386,429 0,000 1,475 1,935 + 1,038 12 9,191 9,145 9,258 205 80 0,03 0,00000 + 12 C20:0 0,724 316287,435 0,000 1,400 2,913 1, +056 13 9,634 9,598 9,698 198 77 0,03 0,00000 + 13 C18:3n6 0,807 283128,526 0,000 1,271 6,448 +1,115 14 9,784 9,739 9,838 472 175 0,03 0,00000 + 14 C20:1n9 0,835 295677,357 0,000 1,210 2,077 + 1,035 15 9,971 9,932 10,012 180 78 0,02 0,00000 + 15 C18:3n3 0,870 419534,533 0,000 1,043 2,810 + 1,042 16 10,639 10,589 10,712 587 203 0,03 0,00000 + 16 C20:2n6 0,995 341241,962 0,000 1,285 9,964 + 1,144 17 10,866 10,822 10,895 321 138 0,02 0,00000 + 17 C22:0 1,038 502890,145 0,000 0,890 3,402 + 1,043 18 11,257 11,198 11,352 4474 1522 0,03 0,00000 + 18 C20:3n6 1,111 375323,728 0,000 1,451 5,8 +15 1,071 19 11,727 11,645 11,929 59239 19809 0,03 0,00000 + 19 C20:4n6 1,199 430565,779 0,000 1,566 6 +,502 1,079 20 12,466 12,419 12,569 2208 618 0,04 0,00000 + 20 C24:0 1,338 311187,077 0,000 1,614 9,211 + 1,116 21 12,726 12,662 12,809 2990 1049 0,03 0,00000 + 21 C20:5n3 1,387 486334,875 0,000 1,320 3,2 +00 1,036 22 13,023 12,969 13,179 3383 825 0,04 0,00000 + 22 C24:1n9 1,442 302863,559 0,000 1,941 3,56 +3 1,040 23 13,466 13,399 13,585 7656 2247 0,03 0,00000 + 23 C22:4n6 1,526 417011,386 0,000 1,530 4,9 +90 1,058 24 13,921 13,839 13,992 1577 458 0,03 0,00000 + 24 C22:5n6 1,611 416308,716 0,000 1,000 5,37 +2 1,056 25 14,579 14,509 14,739 9154 2520 0,04 0,00000 + 25 C22:5n3 1,734 450041,035 0,000 1,679 7,6 +12 1,077 26 15,109 15,039 15,255 17589 4969 0,04 0,00000 + 26 C22:6n3 1,834 475309,805 0,000 1,437 6, +095 1,057 [Compound Results (Ch1)] # of IDs 26 ID# Name R.Time Area Height Conc. Curve 3rd 2n +d 1st Constant 1 C14:0 5,332 1048 462 0,00000 Default 0,0000000 + 0,0000000 0,0000000 0,0000000 2 C16:0 6,238 72242 26891 0,00000 Default 0,00000 +00 0,0000000 0,0000000 0,0000000 3 C16:1n7t 6,498 419 93 0,00000 Default 0,0000000 + 0,0000000 0,0000000 0,0000000 4 C16:1n7 6,734 2583 1039 0,00000 Default 0,00000 +00 0,0000000 0,0000000 0,0000000 5 C18:0 7,549 56859 16389 0,00000 Default 0,00000 +00 0,0000000 0,0000000 0,0000000 6 C18:1t 7,953 971 258 0,00000 Default 0,0000000 + 0,0000000 0,0000000 0,0000000 7 C18:1n9 8,135 42929 11611 0,00000 Default 0,000 +0000 0,0000000 0,0000000 0,0000000 8 C18:2n6tt 8,496 910 186 0,00000 Default 0,00000 +00 0,0000000 0,0000000 0,0000000 9 C18:2n6ct 8,661 16 9 0,00000 Default 0,0000000 + 0,0000000 0,0000000 0,0000000 10 C18:2n6tc 8,853 448 88 0,00000 Default 0,00000 +00 0,0000000 0,0000000 0,0000000 11 C18:2n6 8,987 20278 6647 0,00000 Default 0,000 +0000 0,0000000 0,0000000 0,0000000 12 C20:0 9,191 205 80 0,00000 Default 0,0000000 + 0,0000000 0,0000000 0,0000000 13 C18:3n6 9,634 198 77 0,00000 Default 0,0000000 + 0,0000000 0,0000000 0,0000000 14 C20:1n9 9,784 472 175 0,00000 Default 0,000000 +0 0,0000000 0,0000000 0,0000000 15 C18:3n3 9,971 180 78 0,00000 Default 0,0000000 + 0,0000000 0,0000000 0,0000000 16 C20:2n6 10,639 587 203 0,00000 Default 0,00000 +00 0,0000000 0,0000000 0,0000000 17 C22:0 10,866 321 138 0,00000 Default 0,0000000 + 0,0000000 0,0000000 0,0000000 18 C20:3n6 11,257 4474 1522 0,00000 Default 0,000 +0000 0,0000000 0,0000000 0,0000000 19 C20:4n6 11,727 59239 19809 0,00000 Default 0,0 +000000 0,0000000 0,0000000 0,0000000 20 C24:0 12,466 2208 618 0,00000 Default 0,000000 +0 0,0000000 0,0000000 0,0000000 21 C20:5n3 12,726 2990 1049 0,00000 Default 0,000 +0000 0,0000000 0,0000000 0,0000000 22 C24:1n9 13,023 3383 825 0,00000 Default 0,0000 +000 0,0000000 0,0000000 0,0000000 23 C22:4n6 13,466 7656 2247 0,00000 Default 0,000 +0000 0,0000000 0,0000000 0,0000000 24 C22:5n6 13,921 1577 458 0,00000 Default 0,0000 +000 0,0000000 0,0000000 0,0000000 25 C22:5n3 14,579 9154 2520 0,00000 Default 0,000 +0000 0,0000000 0,0000000 0,0000000 26 C22:6n3 15,109 17589 4969 0,00000 Default 0,00 +00000 0,0000000 0,0000000 0,0000000 [Group Results (Ch1)] # of Groups 0 etc.
I have made the file available for download here, including the TXT file: www.spectramab.com/example.zip Anyone knows how I can achive a similar thing in perl? Thank you very much in advace! Adrian

Comment on Read a bin file and extract data
Select or Download Code
Re: Read a bin file and extract data
by bulk88 (Priest) on Mar 22, 2012 at 15:11 UTC
    What generated your GCD file? The fileformat in 7120db.dat in http://www.people.vcu.edu/~elhaij/IntroBioinf/Programs/context/binary6.html and your format look very different and incompatible. 7129db.dat looks like fixed length records, this looks like binary tree data/something similar to a MS Word Document. I'm going to say you probably want to use OLE or a DLL supplied by the vendor of your lab equipment to decode this, unless someone in the FOSS/linux world already reverse engineered the file format, or the format is an open standard.

    edit: your GCD file looks very similar to a MS Word file, the beginning of the first line of a DOC file is "ࡱ >  " the beginning of the first line of your GCD file is "ࡱ >  ". Hmm, your file is a "Compound Binary File" http://en.wikipedia.org/wiki/Compound_File_Binary_Format. CPAN does have a couple module I see for parsing this, http://cpan.uwinnipeg.ca/dist/OLE/ and Finnigan::OLE2File you can try.. Again, there is probable an OLE library supplied with whatever made your GCD file that will let you programatically analyze it which is the easiest way to do it.
      Great ! Thanky you very much, thank indeed looks like a good hint. I dont mind doing the work & research, just needed some starters :) And I do know its a OLE file. I will try a little... Best regards, Adrian
Re: Read a bin file and extract data
by jmcnamara (Monsignor) on Mar 22, 2012 at 16:06 UTC

    What is a gcd file and what data are you trying to extract from it?

    As bulk88 points out the file that you are trying to parse is an OLE compound document (of which Word documents are an example but your file isn't a Word document).

    The smplls utility that comes with OLE::Storage_Lite shows the following in the file format:

    $ perl sample/smplls.pl ../LaborHoppe_P1_UNK-0010_09.09.2011_1.gcd 00 1 'Root Entry' (pps 0) ROOT 20.09.2011 +08:42:10 01 1 'Audit Trail' (pps 1) DIR 09.09.2011 +17:22:14 02 1 'Audit Trail Property' (pps af) FILE 50 +bytes 03 2 'File Comment' (pps 18) FILE 1 +bytes 04 3 'File Property' (pps 17) FILE c65 +bytes 05 4 'GC Raw Data 1' (pps 5) DIR 14.09.2011 +11:15:27 06 1 'Status' (pps e3) FILE 36 +bytes 07 2 'Status Data' (pps e6) FILE 0 +bytes 08 3 'Intensity Data' (pps e4) FILE 9f60 +bytes 09 4 'Intensity Data Flag' (pps e5) FILE 13ec +bytes 10 5 'GC Raw Data 2' (pps 6) DIR 09.09.2011 +17:41:29 ... 217 7 'Grouping Results' (pps c9) FILE 32 + bytes 218 8 'Peak Picking Param' (pps ca) FILE 28 + bytes 219 9 'Quantitation Param' (pps cb) FILE 30 + bytes 220 10 'Time Program For Data' (pps cc) FILE 18 + bytes 221 11 'Time Program For Method' (pps cd) FILE 18 + bytes 222 12 'Column Performance Param' (pps ce) FILE 68 + bytes 223 13 'Compound Calib Peak Info' (pps cf) FILE d0 + bytes 224 14 'Grouping Calib Peak Info' (pps d0) FILE d0 + bytes 225 15 'Compound Calib Curve Info' (pps d1) FILE 0 + bytes 226 16 'Compound Calib Peak Info2' (pps d2) FILE 30 + bytes 227 17 'Grouping Calib Curve Info' (pps d3) FILE 0 + bytes 228 18 'Grouping Calib Peak Info2' (pps d4) FILE 30 + bytes 229 27 'GC Data Processing Original 2' (pps 11) DIR 09.09.2011 + 17:22:14

    At first glance this doesn't seem to match the unpack statement in your program. Are you sure it is meant to parse the same file format?

    --
    John.

      John & bulk88, thank you again for the help!
      I have already done the OLE::Storage_Lite thing and got the same info!
      It all looks find but isnt it a "File in a file" these entries point to?
      What I try to get is some similar info as in the TXT file provided in the example.zip.
      To be more precise: I need the info in the "Peak Table" as well as the Graph data at the end.
      Thank you so much!
      Adrian

        SO far I got this:
        207 1 'Peak Table' (pps 30) FILE f4 + bytes 208 2 'Slice Data' (pps 31) FILE 1a + bytes 209 3 'Compound Table' (pps 32) FILE 2ef4 + bytes 210 4 'Grouping Table' (pps 33) FILE 1ca + bytes 211 5 'Calib Data File' (pps 34) FILE a9c + bytes 212 6 'Compound Results' (pps 35) FILE 3e + bytes 213 7 'Grouping Results' (pps 36) FILE 32 + bytes 214 8 'Peak Picking Param' (pps 37) FILE 28 + bytes 215 9 'Quantitation Param' (pps 38) FILE 30 + bytes 216 10 'Time Program For Data' (pps 39) FILE 18 + bytes 217 11 'Time Program For Method' (pps 3a) FILE 18 + bytes 218 12 'Column Performance Param' (pps 3b) FILE 68 + bytes 219 13 'Compound Calib Peak Info' (pps 3c) FILE d0 + bytes 220 14 'Grouping Calib Peak Info' (pps 3d) FILE d0 + bytes 221 15 'Compound Calib Curve Info' (pps 3e) FILE 0 + bytes 222 16 'Compound Calib Peak Info2' (pps 3f) FILE 30 + bytes 223 17 'Grouping Calib Curve Info' (pps 40) FILE 0 + bytes 224 18 'Grouping Calib Peak Info2' (pps 41) FILE 30 + bytes 225 23 'GC Data Processing Original 2' (pps 11) DIR 22.03.2012 + 12:07:01 226 24 'GC Data Processing Original 3' (pps 12) DIR 22.03.2012 + 12:07:01

        from the OLE::Storage_Lite but how can I access the data within a FILE pps Object?
        Thankxxx again...

Re: Read a bin file and extract data
by GrandFather (Cardinal) on Mar 22, 2012 at 20:19 UTC

    XP is an OS and doesn't create user files, it facilitates their creation by an application. It is much more important to know the application involved than to know the OS. It would be even better if you could tell us the file format used because if it is fairly well known there may be a Perl module on CPAN that handles the format for you.

    True laziness is hard work

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlquestion [id://961032]
Approved by ww
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others musing on the Monastery: (7)
As of 2014-07-29 04:15 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    My favorite superfluous repetitious redundant duplicative phrase is:









    Results (211 votes), past polls