Levenshtein Distance in C

Da Bioingegneria Elettronica e Informatica.
Versione del 30 nov 2019 alle 14:01 di Profbevilacqua (Discussione | contributi)

(diff) ← Versione meno recente | Versione attuale (diff) | Versione più recente → (diff)

Nicola Altini nicola.altini@poliba.it

Vitoantonio Bevilacqua vitoantonio.bevilacqua@poliba.it

Slides

Levenshtein Distance in C

Including Libraries

  1. #include <stdlib.h>
  2. #include <stdio.h>  
  3. #include <string.h>
  4. <syntaxhighlight/>
  5.  
  6. === Macros ===
  7. <syntaxhighlight lang="c" line>
  8. #define MAXNCHARS 50
  9. #define VERBOSE 1
  10. #define VERSION 1
  11. <syntaxhighlight/>
  12.  
  13. === Declaration of  Functions ===
  14. <syntaxhighlight lang="c" line>
  15. void print_matrix(int **, int, int);
  16. int Levenshtein_distance(char *, char *);
  17. int Levenshtein_distance_v2(char *, char *);
  18. int minimum(int, int, int);
  19. <syntaxhighlight/>
  20.  
  21. === Main ===
  22. <syntaxhighlight lang="c" line>
  23. int main(void) {
  24.     FILE *query, *db;
  25.     char str1[MAXNCHARS], str2[MAXNCHARS];
  26.     int d;
  27.  
  28.     printf("VERSION: %d\n", VERSION);
  29.     printf("VERBOSE: %d\n", VERBOSE);
  30.  
  31.     query = fopen("query.fa","r");
  32.     if (query == NULL) {
  33.         printf("It is not possible to open query.fa!\n");
  34.         scanf ("%[^\n]%*c", str1);
  35.     } else {
  36.         fscanf (query, "%[^\n]%*c", str1);
  37.     }
  38.  
  39.     db = fopen("db.fna", "r");
  40.     if (db == NULL) {
  41.         printf("It is not possible to open db.fna!\n");
  42.         scanf ("%[^\n]%*c", str2);
  43.     } else {
  44.         fscanf (db, "%[^\n]%*c", str2);
  45.     }
  46.  
  47.     printf("Query:\n'%s'\n", str1);
  48.     printf("Database:\n'%s'\n", str2);
  49.  
  50.     if (VERSION == 1) {
  51.         d = Levenshtein_distance(str1,str2);
  52.     } else if (VERSION == 2) {
  53.         d = Levenshtein_distance_v2(str1,str2);
  54.     }
  55.  
  56.     printf("La distanza di Levenshtein e': %d", d);
  57.     return 0;
  58. }
  59. <syntaxhighlight/>
  60.  
  61. === Definition of Functions ===
  62. <syntaxhighlight lang="c" line>
  63. int Levenshtein_distance(char *x, char *y) {
  64.     int m = strlen(x);
  65.     int n = strlen(y);
  66.     printf("m = %d , n = %d\n", m, n);
  67.  
  68.     int i, j;
  69.  
  70.     int distance;
  71.  
  72.     int **d = malloc((m + 1) * sizeof(int*));
  73.  
  74.     for(i = 0; i <= m; i++)
  75.         d[i] = malloc((n + 1) * sizeof(int));
  76.  
  77.     for(i = 0; i <= m; i++)
  78.         d[i][0] = i;
  79.  
  80.     for(j = 1; j <= n; j++)
  81.         d[0][j] = j;
  82.  
  83.     if (VERBOSE) print_matrix(d, m, n);
  84.  
  85.     for(i = 1; i <= m; i++) {
  86.         for(j = 1; j <= n; j++) {
  87.             if(x[i - 1] != y[j - 1]) {
  88.                 int k = minimum(
  89.                     d[i][j - 1],
  90.                     d[i - 1][j],
  91.                     d[i - 1][j - 1]
  92.                 );
  93.                 d[i][j] = k + 1;
  94.             } else {
  95.                 d[i][j] = d[i - 1][j - 1];
  96.             }
  97.             if (VERBOSE) print_matrix(d, m, n);
  98.         }
  99.     }
  100.  
  101.     distance = d[m][n];
  102.  
  103.     for(i = 0; i <= m; i++)
  104.         free(d[i]);
  105.  
  106.     free(d);
  107.     return distance;
  108. }
  109.  
  110. void print_matrix(int **matrix, int m, int n) {
  111.     int i, j;
  112.  
  113.     for (i = 0; i <= m; i++) {
  114.         for (j = 0; j <= n; j++) {
  115.             printf("%d ", matrix[i][j]);
  116.         }
  117.         printf("\n");
  118.     }
  119.     printf("\n\n");
  120. }
  121.  
  122. int Levenshtein_distance_v2(char *x, char *y) { 
  123.     int m = strlen(x);
  124.     int n = strlen(y);
  125.     int i, j;
  126.     int distance;
  127.     int *prev = malloc((n + 1) * sizeof(int));
  128.     int *curr = malloc((n + 1) * sizeof(int));
  129.     int *tmp = 0;
  130.     for(i = 0; i <= n; i++)
  131.         prev[i] = i;
  132.     for(i = 1; i <= m; i++) {
  133.         curr[0] = i;
  134.         for(j = 1; j <= n; j++) {
  135.             if(x[i - 1] != y[j - 1]) {
  136.                 int k = minimum(curr[j - 1], prev[j - 1], prev[j]);
  137.                 curr[j] = k + 1;
  138.                 } else {
  139.                     curr[j] = prev[j - 1];
  140.             }
  141.         }
  142.         tmp = prev;
  143.         prev = curr;
  144.         curr = tmp;
  145.         memset((void*) curr, 0, sizeof(int) * (n + 1)); 
  146.     }
  147.     distance = prev[n];
  148.     free(curr);
  149.     free(prev);
  150.     return distance;
  151. }
  152.  
  153. int minimum(int a, int b, int c) {
  154.  
  155. /* funzione che calcola il minimo di 3 valori */
  156.  
  157.     int min = a;
  158.  
  159.     if (b < min) min = b;
  160.     if (c < min) min = c;
  161.  
  162.     return min;
  163. }
  164. <syntaxhighlight/>