BLAST MATLAB
Da Bioingegneria Elettronica e Informatica.
Versione del 30 nov 2019 alle 11:45 di Profbevilacqua (Discussione | contributi)
Indice
BLAST Examples with MATLAB
Vitoantonio Bevilacqua vitoantonio.bevilacqua@poliba.it
Nicola Altini nicola.altini@poliba.it
Download .fna and .faa files
% The following examples assume you have a FASTA nucleotide file and a FASTA amino acid file for E. coli:
% NC_004431.fna and NC_004431.faa, saved inside path_fasta_files.
path_fasta_files = "MATLAB/BLAST/fasta_files";
fullURL = 'http://www.scfbio-iitd.res.in/chemgenome/genomedataset/NC_004431.fna';
filename_fna = fullfile(path_fasta_files, 'NC_004431.fna');
urlwrite(fullURL,filename_fna);
fullURL = 'https://www.brinkman.mbb.sfu.ca/~mlangill/public_tmp/NC_004431.faa';
filename_faa = fullfile(path_fasta_files, 'NC_004431.faa');
urlwrite(fullURL,filename_faa);
Prepare database and query
% Create local blastable databases from the NC_004431.fna and NC_004431.faa FASTA files by using the blastformat function.
blastformat('inputdb', filename_fna, 'protein', 'false', 'log', 'formatdb.log');
blastformat('inputdb', filename_faa, 'log', 'formatdb.log');
% Use the getgenbank function to retrieve sequence information for the
% E. coli threonine operon from the GenBank database.
S = getgenbank('M28570');
% Create a query file by using the fastawrite function to create a
% FASTA file named query_nt.fa from this sequence information, using
% only the accession number as the header.
S.Header = S.Accession;
path_query = fullfile(path_fasta_files, 'query_nt.fa');
fastawrite(path_query, S);
Examples
Example 1. Search nucleotide query versus nucleotide data
results = blastlocal('inputquery', path_query,...
'database', filename_fna,...
'program', 'blastn');
print_score_results(results(1));
Example 2. Performing a Nucleotide Translated Search
% Use MATLAB syntax to submit the query sequence in the query_nt.fa FASTA file for a BLAST search
% of the local amino acid database NC_004431.faa.
% Specify the BLAST program blastx. Return the BLAST search results in results, a MATLAB structure.
results = blastlocal('inputquery', path_query,...
'database', filename_faa,...
'program', 'blastx');
Example 3. Performing a Nucleotide Search Using blastall Syntax
% Use blastall syntax to submit the query sequence in the query_nt.fa FASTA file for a BLAST search
% of the local nucleotide database NC_004431.fna.
% Specify the BLAST program blastn and an expectation value of 0.0001.
% Return the BLAST search results in results, a MATLAB structure.
query = sprintf('-i %s -d %s -p blastn -e 0.0001', path_query, filename_fna);
results = blastlocal(query);
Example 4. Performing a Nucleotide Search and Creating a Formatted Report
% Submit the query sequence in the query_nt.fa FASTA file for a BLAST search
% of the local nucleotide database NC_004431.fna.
% Specify the BLAST program blastn and a tabular alignment format.
% Save the contents of the BLAST report to a file named myecoli_nt.txt.
blast_report_file = fullfile(path_fasta_files, 'myecoli_nt.txt');
blastlocal('inputquery', path_query,...
'database', filename_fna, ...
'tofile', blast_report_file, ...
'blastargs', '-p blastn -m 8');
Utility Functions
function pen = score_from_matrix(char1, char2, varargin)
if nargin < 3
pen_matrix = ...
[+1 -5 -5 -1;
-5 +1 -1 -5;
-5 -1 +1 -5;
-1 -5 -5 +1];
else
pen_matrix = varargin{1};
end
% ATCG
idx1 = idx_from_atcg(char1);
idx2 = idx_from_atcg(char2);
if idx1 > 0 && idx2 > 0
pen = pen_matrix(idx1, idx2);
else
pen = 0;
end
end
function idx = idx_from_atcg(char1)
if char1 == 'a'
idx = 1;
elseif char1 == 't'
idx = 2;
elseif char1 == 'c'
idx = 3;
elseif char1 == 'g'
idx = 4;
else
idx = -1;
end
end
function print_score_results(results)
n_hits = numel(results.Hits.HSPs);
for idx_hit = 1:n_hits
fprintf('Index HIT = %d\n', idx_hit);
alignment = results.Hits.HSPs(idx_hit).Alignment;
rew = results.Hits.HSPs(idx_hit).Identities.Match * 2;
empty_pos = [];
ncols = size(alignment,2);
for i = 1:ncols
if alignment(2,i) == ' '
empty_pos = [empty_pos, i];
end
end
pen = 0;
for i = 1:numel(empty_pos)
empty_pos_idx = empty_pos(i);
char1 = alignment(1,empty_pos_idx);
char2 = alignment(3,empty_pos_idx);
fprintf('char1 = %c char2 = %c\n', char1, char2);
pen_iter = score_from_matrix(char1, char2);
pen = pen + pen_iter;
end
fprintf('Penalty is %d\n', pen);
score = rew + pen;
fprintf('Final score is %d\n', score);
fprintf('\n');
end
end