#!/usr/bin/perl -w use strict; my $numArgs = $#ARGV + 1; # calculate number of arguments passed to program from command line our $inputfile; our $filterfile; our $outputfile; # initialise filename variables our $indexcolumn; if ($numArgs >= 4) { $inputfile = "$ARGV[0]"; print "Input filename received from command line. Opening '$inputfile'...\n"; open INPUTFILE, "<$inputfile" || die "An error occured whilst opening your input file '$inputfile': $!"; $filterfile = "$ARGV[1]"; print "Filter filename received from command line. Opening '$filterfile'...\n"; open INPUTFILE, "<$filterfile" || die "An error occured whilst opening your input file '$inputfile': $!"; $outputfile = "$ARGV[2]"; print "Output filename received from command line...\n"; open OUTPUTFILE, ">$outputfile" || die "An error occured whilst creating your output file '$outputfile': $!"; $indexcolumn = "$ARGV[3]"; print "Index column ($indexcolumn) received from command line...\n"; } elsif ($numArgs == 3) { $inputfile = "$ARGV[0]"; print "Input filename received from command line. Opening '$inputfile'...\n"; open INPUTFILE, "<$inputfile" || die "An error occured whilst opening your input file '$inputfile': $!"; $filterfile = "$ARGV[1]"; print "Filter filename received from command line. Opening '$filterfile'...\n"; open INPUTFILE, "<$filterfile" || die "An error occured whilst opening your input file '$inputfile': $!"; $outputfile = "$ARGV[2]"; print "Output filename received from command line...\n"; open OUTPUTFILE, ">$outputfile" || die "An error occured whilst creating your output file '$outputfile': $!"; print "Please enter the index column of the input file: "; chomp($indexcolumn = ); } elsif ($numArgs == 2) { $inputfile = "$ARGV[0]"; print "Input filename received from command line. Opening '$inputfile'...\n"; open INPUTFILE, "<$inputfile" || die "An error occured whilst opening your input file '$inputfile': $!"; $filterfile = "$ARGV[1]"; print "Filter filename received from command line. Opening '$filterfile'...\n"; open INPUTFILE, "<$filterfile" || die "An error occured whilst opening your input file '$inputfile': $!"; print "Please enter the path to your output file: "; chomp($outputfile = ); open OUTPUTFILE, ">$outputfile" || die "An error occured whilst creating your output file '$outputfile': $!"; print "Please enter the index column of the input file: "; chomp($indexcolumn = ); } elsif ($numArgs == 1) { $inputfile = "$ARGV[0]"; print "Input filename received from command line. Opening '$inputfile'...\n"; open INPUTFILE, "<$inputfile" || die "An error occured whilst opening your input file '$inputfile': $!"; print "Please enter the path to your filter file: "; chomp($filterfile = ); open INPUTFILE, "<$filterfile" || die "An error occured whilst opening your filter file '$filterfile': $!"; print "Please enter the path to your output file: "; chomp($outputfile = ); open OUTPUTFILE, ">$outputfile" || die "An error occured whilst creating your output file '$outputfile': $!"; print "Please enter the index column of the input file: "; chomp($indexcolumn = ); } else { print "Please enter the path to your input file: "; chomp($inputfile = ); open INPUTFILE, "<$inputfile" || die "An error occured whilst opening your input file '$inputfile': $!"; print "Please enter the path to your filter file: "; chomp($filterfile = ); open INPUTFILE, "<$filterfile" || die "An error occured whilst opening your filter file '$filterfile': $!"; print "Please enter the path to your output file: "; chomp($outputfile = ); open OUTPUTFILE, ">$outputfile" || die "An error occured whilst creating your output file '$outputfile': $!"; print "Please enter the index column of the input file: "; chomp($indexcolumn = ); } print "INPUT: $inputfile\n"; print "OUTPUT: $outputfile\n"; print "FILTER: $filterfile\n"; #display input and output file names my $lineindex = 0; my @fields; my $fields; my $blanks = 0; my $blocked = 0; my $lineindex2 = 0; my @blocklist; my $blocklist; my $isblocked = 0; my $isblank = 0; my $blocklistindex = 0; while(defined(my $line = )) { # read through each line in turn while they still exist $line =~ s/["]+//g; # remove quotation marks from SMILES strings $line =~ s/[\n]+//g; # remove new line characters $_ = $line; chomp $_; @fields = split; $blocklist[$blocklistindex] = $fields[1] - 1; # account for smiles index starting from 1 instead of 0. $blocklistindex++; } close INPUTFILE; open INPUTFILE, "<$inputfile" || die "An error occured whilst opening your input file '$inputfile': $!"; while(defined(my $line = )) { # read through each line in turn while they still exist $line =~ s/["]+//g; # remove quotation marks from SMILES strings $line =~ s/[\n]+//g; # remove new line characters @fields = split(/,\t*/, $line); # split line into fields according to comma delimiter if (!$fields[0] && !$fields[1] && !$fields[2] && !$fields[3]) { print "Blank at line $lineindex\n"; $blanks++; $isblank = 1; } for $lineindex2 (0 .. $blocklistindex - 1) { #print "lineindex2: $lineindex2\n"; #print "fields[IC]: $fields[$indexcolumn]\n"; #print "blocklist[LI2]: $blocklist[$lineindex2]\n"; if ($fields[$indexcolumn] == $blocklist[$lineindex2]) { print "compound $fields[$indexcolumn] blocked...\n"; $blocked++; $isblocked = 1; } } if ($isblank == 0 && $isblocked == 0) { print OUTPUTFILE "$line\n"; } $lineindex++; # increment current line $isblank = 0; $isblocked = 0; $lineindex2 = 0; } print "Found $blanks blank lines.\n"; print "Blocked $blocked compounds.\n"; close OUTPUTFILE; close INPUTFILE; #print "molconvert -F '-3:[hydrogenize]' sdf $inputfile -o $outputfile";