BLAST MATLAB: differenze tra le versioni
Da Bioingegneria Elettronica e Informatica.
(Creata pagina con "== BLAST Examples with MATLAB == === Download .fna and .faa files === <syntaxhighlight lang="matlab" line> % The following examples assume you have a FASTA nucleotide file a...") |
|||
Riga 1: | Riga 1: | ||
== BLAST Examples with MATLAB == | == BLAST Examples with MATLAB == | ||
+ | [http://www.vitoantoniobevilacqua.it/wiki/images/7/7a/BLAST_LD.pdf Slides] | ||
+ | |||
+ | '''Vitoantonio Bevilacqua''' [mailto:vitoantonio.bevilacqua@poliba.it vitoantonio.bevilacqua@poliba.it] | ||
+ | |||
+ | '''Nicola Altini''' [mailto:nicola.altini@poliba.it nicola.altini@poliba.it] | ||
+ | |||
=== Download .fna and .faa files === | === Download .fna and .faa files === | ||
<syntaxhighlight lang="matlab" line> | <syntaxhighlight lang="matlab" line> |
Versione delle 11:45, 30 nov 2019
Indice
BLAST Examples with MATLAB
Vitoantonio Bevilacqua vitoantonio.bevilacqua@poliba.it
Nicola Altini nicola.altini@poliba.it
Download .fna and .faa files
% The following examples assume you have a FASTA nucleotide file and a FASTA amino acid file for E. coli:
% NC_004431.fna and NC_004431.faa, saved inside path_fasta_files.
path_fasta_files = "MATLAB/BLAST/fasta_files";
fullURL = 'http://www.scfbio-iitd.res.in/chemgenome/genomedataset/NC_004431.fna';
filename_fna = fullfile(path_fasta_files, 'NC_004431.fna');
urlwrite(fullURL,filename_fna);
fullURL = 'https://www.brinkman.mbb.sfu.ca/~mlangill/public_tmp/NC_004431.faa';
filename_faa = fullfile(path_fasta_files, 'NC_004431.faa');
urlwrite(fullURL,filename_faa);
Prepare database and query
% Create local blastable databases from the NC_004431.fna and NC_004431.faa FASTA files by using the blastformat function.
blastformat('inputdb', filename_fna, 'protein', 'false', 'log', 'formatdb.log');
blastformat('inputdb', filename_faa, 'log', 'formatdb.log');
% Use the getgenbank function to retrieve sequence information for the
% E. coli threonine operon from the GenBank database.
S = getgenbank('M28570');
% Create a query file by using the fastawrite function to create a
% FASTA file named query_nt.fa from this sequence information, using
% only the accession number as the header.
S.Header = S.Accession;
path_query = fullfile(path_fasta_files, 'query_nt.fa');
fastawrite(path_query, S);
Examples
Example 1. Search nucleotide query versus nucleotide data
results = blastlocal('inputquery', path_query,...
'database', filename_fna,...
'program', 'blastn');
print_score_results(results(1));
Example 2. Performing a Nucleotide Translated Search
% Use MATLAB syntax to submit the query sequence in the query_nt.fa FASTA file for a BLAST search
% of the local amino acid database NC_004431.faa.
% Specify the BLAST program blastx. Return the BLAST search results in results, a MATLAB structure.
results = blastlocal('inputquery', path_query,...
'database', filename_faa,...
'program', 'blastx');
Example 3. Performing a Nucleotide Search Using blastall Syntax
% Use blastall syntax to submit the query sequence in the query_nt.fa FASTA file for a BLAST search
% of the local nucleotide database NC_004431.fna.
% Specify the BLAST program blastn and an expectation value of 0.0001.
% Return the BLAST search results in results, a MATLAB structure.
query = sprintf('-i %s -d %s -p blastn -e 0.0001', path_query, filename_fna);
results = blastlocal(query);
Example 4. Performing a Nucleotide Search and Creating a Formatted Report
% Submit the query sequence in the query_nt.fa FASTA file for a BLAST search
% of the local nucleotide database NC_004431.fna.
% Specify the BLAST program blastn and a tabular alignment format.
% Save the contents of the BLAST report to a file named myecoli_nt.txt.
blast_report_file = fullfile(path_fasta_files, 'myecoli_nt.txt');
blastlocal('inputquery', path_query,...
'database', filename_fna, ...
'tofile', blast_report_file, ...
'blastargs', '-p blastn -m 8');
Utility Functions
function pen = score_from_matrix(char1, char2, varargin)
if nargin < 3
pen_matrix = ...
[+1 -5 -5 -1;
-5 +1 -1 -5;
-5 -1 +1 -5;
-1 -5 -5 +1];
else
pen_matrix = varargin{1};
end
% ATCG
idx1 = idx_from_atcg(char1);
idx2 = idx_from_atcg(char2);
if idx1 > 0 && idx2 > 0
pen = pen_matrix(idx1, idx2);
else
pen = 0;
end
end
function idx = idx_from_atcg(char1)
if char1 == 'a'
idx = 1;
elseif char1 == 't'
idx = 2;
elseif char1 == 'c'
idx = 3;
elseif char1 == 'g'
idx = 4;
else
idx = -1;
end
end
function print_score_results(results)
n_hits = numel(results.Hits.HSPs);
for idx_hit = 1:n_hits
fprintf('Index HIT = %d\n', idx_hit);
alignment = results.Hits.HSPs(idx_hit).Alignment;
rew = results.Hits.HSPs(idx_hit).Identities.Match * 2;
empty_pos = [];
ncols = size(alignment,2);
for i = 1:ncols
if alignment(2,i) == ' '
empty_pos = [empty_pos, i];
end
end
pen = 0;
for i = 1:numel(empty_pos)
empty_pos_idx = empty_pos(i);
char1 = alignment(1,empty_pos_idx);
char2 = alignment(3,empty_pos_idx);
fprintf('char1 = %c char2 = %c\n', char1, char2);
pen_iter = score_from_matrix(char1, char2);
pen = pen + pen_iter;
end
fprintf('Penalty is %d\n', pen);
score = rew + pen;
fprintf('Final score is %d\n', score);
fprintf('\n');
end
end