Index: Bio/Perl.pm =================================================================== RCS file: /home/repository/bioperl/bioperl-live/Bio/Perl.pm,v retrieving revision 1.21 diff -a -u -r1.21 Perl.pm --- Bio/Perl.pm 17 Feb 2004 17:27:52 -0000 1.21 +++ Bio/Perl.pm 1 Apr 2005 16:35:19 -0000 @@ -1,4 +1,4 @@ -# $Id: Perl.pm,v 1.21 2004/02/17 17:27:52 bosborne Exp $ +# $Id: Perl.pm,v 1.20 2003/09/15 13:38:07 bosborne Exp $ # # BioPerl module for Bio::Perl # @@ -29,38 +29,38 @@ # sequences are Bio::Seq objects, so the following methods work # for more info see Bio::Seq, or do 'perldoc Bio/Seq.pm' - print "Sequence name is ",$seq_object->display_id,"\n"; print "Sequence acc is ",$seq_object->accession_number,"\n"; print "First 5 bases is ",$seq_object->subseq(1,5),"\n"; # get the whole sequence as a single string - $sequence_as_a_string = $seq_object->seq(); # writing sequences - write_sequence(">$filename",'genbank',$seq_object); - write_sequence(">$filename",'genbank',@seq_object_array); # making a new sequence from just a string - $seq_object = new_sequence("ATTGGTTTGGGGACCCAATTTGTGTGTTATATGTA", "myname","AL12232"); # getting a sequence from a database (assumes internet connection) - $seq_object = get_sequence('swissprot',"ROA1_HUMAN"); - $seq_object = get_sequence('embl',"AI129902"); - $seq_object = get_sequence('genbank',"AI129902"); - # BLAST a sequence (assummes an internet connection) - + # BLAST a sequence (assumes an internet connection) $blast_report = blast_sequence($seq_object); + # BLAST one or more sequences (requires a local Blast server running wwwBlast) + # Default URL http://localhost/blast/Blast.cgi + my $seq_string = 'MKVDVGPDPSLVYRPDVDPEVAKDKASFRNYTSGPLLDRVFT'; + my $localServerURL = 'http://127.0.0.1/blast/Blast.cgi'; + my $localDB = 'test_aa_db'; + $blast_report = wwwBlast_sequence($seq_object, $localDB); + $blast_report = wwwBlast_sequence($seq_string, $localServerURL, $localDB); + + # writing out a blast report write_blast(">blast.out",$blast_report); @@ -132,7 +132,8 @@ @EXPORT = qw(read_sequence read_all_sequences write_sequence new_sequence get_sequence translate translate_as_string reverse_complement revcom revcom_as_string - reverse_complement_as_string blast_sequence write_blast); + reverse_complement_as_string blast_sequence + wwwBlast_sequence write_blast); @EXPORT_OK = @EXPORT; @@ -332,39 +333,68 @@ Function: If the computer has Internet accessibility, blasts the sequence using the NCBI BLAST server against nrdb. - It chooses the flavour of BLAST on the basis of the sequence. + Any additional parameters specified for the GET or PUT are passed + as is to Bio::Tools::Run::RemoteBlast. By default, runs blastp + with E-val = 1e-10. This function uses Bio::Tools::Run::RemoteBlast, which itself use Bio::SearchIO - as soon as you want to know more, check out - these modules - Returns : Bio::Search::Result::GenericResult.pm + these modules. + + Returns : Bio::Search::Result::GenericResult - Args : Either a string of protein letters or nucleotides, or a - Bio::Seq object + Args : $seq = A string of protein letters or nucleotides + or a Bio::Seq object. + 1 (verbose) or 0 (quiet) + @args = optional PUT and GET arguments to pass to RemoteBlast + (e.g. '-prog' => 'blastn', '-expect' => '1e-3', + '-descriptions' => '25'). + For a description of the many possible parameters see: + http://www.ncbi.nlm.nih.gov/BLAST/Doc/urlapi.html or the BEGIN + block of Bio::Tools::Run::Tools::RemoteBlast. =cut sub blast_sequence { - my ($seq,$verbose) = shift; + my ($seq, $verbose, @params) = @_; - if( !defined $verbose ) { + if ( !defined $verbose ) { + $verbose = 1; + } elsif ( $verbose =~ /^-/ ) { + # $verbose is actually tag for first argument restore it + # where it belongs + unshift @params, $verbose; $verbose = 1; } if( !ref $seq ) { - $seq = Bio::Seq->new( -seq => $seq, -id => 'blast-sequence-temp-id'); + $seq = Bio::Seq->new('-seq' => $seq, '-id' => 'blast-sequence-temp-id'); } elsif ( !$seq->isa('Bio::PrimarySeqI') ) { croak("[$seq] is an object, but not a Bio::Seq object, cannot be blasted"); } require Bio::Tools::Run::RemoteBlast; - my $prog = 'blastp'; - my $e_val= '1e-10'; - - my @params = ( '-prog' => $prog, - '-expect' => $e_val, - '-readmethod' => 'SearchIO' ); + # set some default values which are different from those in RemoteBlast + # unless their values are passed as parameters + my ($prog_found, $expect_found); + my $count = 0; + for my $arg ( @params ) { + if ( $arg =~ /-prog/ ) { + $prog_found = 1; + if ( ++$count > 1) { + last; + } + } elsif ( $arg =~ /-expect/ ) { + $expect_found = 1; + if ( ++$count > 1) { + last; + } + } + } + push @params, ('-prog', 'blastp') unless $prog_found; + push @params, ('-expect', '1e-10') unless $expect_found; + push @params, ('-readmethod', 'SearchIO'); my $factory = Bio::Tools::Run::RemoteBlast->new(@params); @@ -402,6 +432,98 @@ return $result; } +=head2 wwwBlast_sequence + + Title : wwwBlast_sequence + + Usage : $blast_result = wwwBlast_sequence($seq_obj) + $blast_result = wwwBlast_sequence($seq, $localDB, $localServerURL) + $blast_result = wwwBlast_sequence($filename) + + Function: Blasts the sequence using an accessible wwwBlast server against + an optionally specified local database. Default server is + http://localhost/blast/Blast.cgi. Default db is test_na_db. Default + program is blastn. + + Any additional parameters specified for the GET or PUT are passed + as is to Bio::Tools::Run::LocalServerBlast. Default values are those + of Bio::Tools::Run::LocalServerBlast. + + This function uses Bio::Tools::Run::LocalServerBlast, which itself + uses Bio::SearchIO - as soon as you want to know more, check out + these modules. + + Returns : -1 on error + An array of Bio::Search::Result::GenericResult + + Args : $seq = A string of protein letters or nucleotides, the name of + of a file containing one or more Fasta-formatted sequences, + or a Bio::Seq object. + 1 (verbose) or 0 (quiet) + @args = optional arguments, including the URL of the local Blast + server ('-server' => 'http://localhost/blast/Blast.cgi'), + the name of the database to blast against ('-database' => + 'my_db'), or PUT and GET arguments to pass to LocalServerBlast + (e.g. HITLIST_SIZE => '250', EXPECT => '1e-6', + DESCRIPTIONS => '25'). + For a description of the many possible parameters see: + http://athena.bioc.uvic.ca/blast/readme.html#Installation or the BEGIN + block of Bio::Tools::Run::Tools::LocalServerBlast. + +=cut + +sub wwwBlast_sequence { + + my ($seq, $verbose, @params) = @_; + + # do a quick check on $seq + if ( !defined $seq ) { + confess("No sequence provided. Can't Blast.\n"); + } elsif ( ref $seq) { + $seq->isa('Bio::PrimarySeqI') or + croak("[$seq] is an object but not a Bio::Seq object so cannot be Blasted\n"); + } elsif ( ! -e $seq ) { + # not a valid filename so probably a sequence-in-a-string + $seq = Bio::Seq->new( '-seq' => $seq, '-id' => 'default ID'); + } + + if ( !defined $verbose ) { + $verbose = 1; + } elsif ( $verbose =~ /^\D+/ ) { + # $verbose is actually tag for first argument so restore it to + # where it belongs + unshift @params, $verbose; + $verbose = 1; + } + + require Bio::Tools::Run::LocalServerBlast; + # set the server address if it's been passed. Kludge for compatibility: also + # check whether ALIGNMENT_VIEW set using text rather than number and fix if so + my $index = 0; + for my $arg ( @params ) { + if ( $arg =~ /-server/i ) { + $Bio::Tools::Run::LocalServerBlast::URLBASE = $params[$index]; + last; + } elsif ( ($arg =~ /alignment_view/i) && ($params[$index+1] !~ /[0-9]/) ) { + $params[$index+1] = {'Pairwise' => 0, + 'QueryAnchored' => 1, + 'QueryAnchoredNoIdentitites' => 2, + 'FlatQueryAnchored' => 3, + 'FlatQueryAnchoredNoIdentities' => 4, + 'BlastXML' => 7, + 'Tabular' => 9}->{$params[$index+1]}; + } + $index++; + } + push @params, ('-readmethod', 'SearchIO'); + + my $factory = Bio::Tools::Run::LocalServerBlast->new(@params); + print STDOUT "Job\(s\) submitted.\n" if $verbose; + my @r = $factory->submit_blast($seq); + + return @r; +} + =head2 write_blast Title : write_blast @@ -515,7 +637,6 @@ return $seq; } - =head2 translate Title : translate