Commit dbe1c5b9 authored by Keith Jolley's avatar Keith Jolley
Browse files

Genome Comparator: Only check paralogous sequences if >1 hit.

parent 26f0586c
......@@ -2322,64 +2322,84 @@ sub _parse_blast_ref {
open( my $blast_fh, '<', $blast_file ) || ( $logger->error("Can't open BLAST output file $blast_file. $!"), return \$; );
push @blast, $_ foreach <$blast_fh>; #slurp file to prevent file handle remaining open during database queries.
close $blast_fh;
my $good_matches = 0;
my @existing_match_seqs;
my $criteria_matches = 0;
foreach my $line (@blast) {
next if !$line || $line =~ /^#/;
my @record = split /\s+/, $line;
my $this_quality = $record[3] * $record[2];
if ( $record[3] >= $alignment * 0.01 * $required_alignment && $record[2] >= $identity ) {
my $this_match;
$this_match->{'seqbin_id'} = $record[1];
$this_match->{'identity'} = $record[2];
$this_match->{'alignment'} = $record[3];
$this_match->{'start'} = $record[8];
$this_match->{'end'} = $record[9];
if ( ( $record[8] > $record[9] && $record[7] > $record[6] ) || ( $record[8] < $record[9] && $record[7] < $record[6] ) ) {
$this_match->{'reverse'} = 1;
} else {
$this_match->{'reverse'} = 0;
}
if ( $required_alignment > $this_match->{'alignment'} ) {
if ( $this_match->{'reverse'} ) {
$this_match->{'predicted_start'} = $this_match->{'start'} - $ref_length + $record[6];
$this_match->{'predicted_end'} = $this_match->{'end'} + $record[7] - 1;
} else {
$this_match->{'predicted_start'} = $this_match->{'start'} - $record[6] + 1;
$this_match->{'predicted_end'} = $this_match->{'end'} + $ref_length - $record[7];
}
} else {
if ( $this_match->{'reverse'} ) {
$this_match->{'predicted_start'} = $this_match->{'end'};
$this_match->{'predicted_end'} = $this_match->{'start'};
} else {
$this_match->{'predicted_start'} = $this_match->{'start'};
$this_match->{'predicted_end'} = $this_match->{'end'};
}
}
my $match_seq = $self->_extract_sequence($this_match);
my $seq_already_found = 0;
foreach my $existing_match_seq (@existing_match_seqs) {
if ( $match_seq eq $existing_match_seq ) { #Only count different alleles
$seq_already_found = 1;
last;
}
}
if ( !$seq_already_found ) {
push @existing_match_seqs, $match_seq;
$good_matches++;
}
my $this_match = $self->_extract_match( \@record, $required_alignment, $ref_length );
$criteria_matches++;
if ( $this_quality > $quality ) {
$quality = $this_quality;
$final_match = $this_match;
}
}
}
$final_match->{'good_matches'} = $good_matches;
if ( $criteria_matches > 1 ) { #only check if match sequences are different if there are more than 1
my $good_matches = 0;
my @existing_match_seqs;
foreach my $line (@blast) {
next if !$line || $line =~ /^#/;
my @record = split /\s+/, $line;
if ( $record[3] >= $alignment * 0.01 * $required_alignment && $record[2] >= $identity ) {
my $this_match = $self->_extract_match( \@record, $required_alignment, $ref_length );
my $match_seq = $self->_extract_sequence($this_match);
my $seq_already_found = 0;
foreach my $existing_match_seq (@existing_match_seqs) {
if ( $match_seq eq $existing_match_seq ) { #Only count different alleles
$seq_already_found = 1;
last;
}
}
if ( !$seq_already_found ) {
push @existing_match_seqs, $match_seq;
$good_matches++;
}
}
}
$final_match->{'good_matches'} = $good_matches;
} else {
$final_match->{'good_matches'} = $criteria_matches;
}
return $final_match;
}
sub _extract_match {
my ( $self, $record, $required_alignment, $ref_length ) = @_;
my $match;
$match->{'seqbin_id'} = $record->[1];
$match->{'identity'} = $record->[2];
$match->{'alignment'} = $record->[3];
$match->{'start'} = $record->[8];
$match->{'end'} = $record->[9];
if ( ( $record->[8] > $record->[9] && $record->[7] > $record->[6] ) || ( $record->[8] < $record->[9] && $record->[7] < $record->[6] ) )
{
$match->{'reverse'} = 1;
} else {
$match->{'reverse'} = 0;
}
if ( $required_alignment > $match->{'alignment'} ) {
if ( $match->{'reverse'} ) {
$match->{'predicted_start'} = $match->{'start'} - $ref_length + $record->[6];
$match->{'predicted_end'} = $match->{'end'} + $record->[7] - 1;
} else {
$match->{'predicted_start'} = $match->{'start'} - $record->[6] + 1;
$match->{'predicted_end'} = $match->{'end'} + $ref_length - $record->[7];
}
} else {
if ( $match->{'reverse'} ) {
$match->{'predicted_start'} = $match->{'end'};
$match->{'predicted_end'} = $match->{'start'};
} else {
$match->{'predicted_start'} = $match->{'start'};
$match->{'predicted_end'} = $match->{'end'};
}
}
return $match;
}
sub _create_locus_FASTA_db {
my ( $self, $locus, $prefix ) = @_;
my $clean_locus = $locus;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment