The purpose of this algorithm is to measure the difference between two sequences/strings. It is based around the number of changes required to make one string equal to the other.

It is aimed at short strings, it usage is spell checkers, optical character recognition, etc.

Wikipedia entry can be found here:

public class Levenshtein
{
private String compOne;
private String compTwo;
private int[][] matrix;
private Boolean calculated = false;
public Levenshtein(String one, String two)
{
compOne = one;
compTwo = two;
}
public int getSimilarity()
{
if (!calculated)
{
setupMatrix();
}
return matrix[compOne.length()][compTwo.length()];
}
public int[][] getMatrix()
{
setupMatrix();
return matrix;
}
private void setupMatrix()
{
matrix = new int[compOne.length()+1][compTwo.length()+1];
for (int i = 0; i <= compOne.length(); i++)
{
matrix[i][0] = i;
}
for (int j = 0; j <= compTwo.length(); j++)
{
matrix[0][j] = j;
}
for (int i = 1; i < matrix.length; i++)
{
for (int j = 1; j < matrix[i].length; j++)
{
if (compOne.charAt(i-1) == compTwo.charAt(j-1))
{
matrix[i][j] = matrix[i-1][j-1];
}
else
{
int minimum = Integer.MAX_VALUE;
if ((matrix[i-1][j])+1 < minimum)
{
minimum = (matrix[i-1][j])+1;
}
if ((matrix[i][j-1])+1 < minimum)
{
minimum = (matrix[i][j-1])+1;
}
if ((matrix[i-1][j-1])+1 < minimum)
{
minimum = (matrix[i-1][j-1])+1;
}
matrix[i][j] = minimum;
}
}
}
calculated = true;
displayMatrix();
}
private void displayMatrix()
{
System.out.println(" "+compOne);
for (int y = 0; y <= compTwo.length(); y++)
{
if (y-1 < 0) System.out.print(" "); else System.out.print(compTwo.charAt(y-1));
for (int x = 0; x <= compOne.length(); x++)
{
System.out.print(matrix[x][y]);
}
System.out.println();
}
}
}

### Like this:

Like Loading...

*Related*

Tags: alogrithm, distance, Java, Levenshtein

This entry was posted on June 28, 2010 at 09:20 and is filed under Algorithm, Data, Java Example, Levenshtein, Uncategorized. You can follow any responses to this entry through the RSS 2.0 feed.
You can leave a response, or trackback from your own site.

June 28, 2010 at 09:23 |

[…] Levenshtein Damerau-Levenshtein Hamming Jaro Winkler N-Gram Markov Chain […]