The Markov Chain model calculates the probability of the next letter occurring in a sequence based on the previous n characters.
They are used in a multitude of areas, including data compression, entropy encoding, and pattern recognition.
The wikipedia entry can be found here. I based my algorithm off of an excellent explanation found here, provided by Rutgers as part of their course Discrete and Probabilistic Models in Biology. Warning, I think this document has a couple of mistakes in the matrix on page 10 (the counting of A,C,G,T), or I may be mistaken.
For this code, the final part which is needed is a method which gives the probability of a sequence, that is you pass in a sequence (e.g. AACGT) and it returns the probability of this sequence based on the matrix it has calculated.
The code:
public class Markov
{
private class similarities
{
private char c;
private int count;
public similarities(char a, int b)
{
c = a;
count = b;
}
public char getC()
{
return c;
}
public int getCount()
{
return count;
}
public void setCount(int a)
{
count = a;
}
public void increaseCount()
{
count++;
}
}
private List<similarities> entries;
private List<String> axies;
private double[][] matrix;
public Markov()
{
entries = new ArrayList();
axies = new ArrayList();
}
public void getSimilarity(List<String> s)
{
String temp = new String();
for (int i = 0; i < s.size(); i++)
{
if (temp.length() > 0)
{
temp = temp + " " + s.get(i);
}
else temp = s.get(i);
}
setupAxis(temp);
compareCharsAgainstAxis(temp);
display();
for (int i = 0; i < matrix.length; i++)
{
double line = getLineValue(i);
for (int j = 0; j < matrix[i].length; j++)
{
double t = matrix[i][j];
matrix[i][j] = t / line;
}
}
display();
}
private void compareCharsAgainstAxis(String s)
{
matrix = new double[axies.size()][axies.size()];
String comparison = "";
String test = s;
for (int h = 0; h < test.length(); h++)
{
int end = test.indexOf(" ");
if (end > 0)
{
comparison = test.substring(0,end);
}
else
{
comparison = test.substring(0,test.length());
end = test.length()-1;
}
test = test.substring(end+1,test.length());
for (int i = 1; i < comparison.length(); i++)
{
for (int j = 0; j < axies.size(); j++)
{
for (int k = 0; k < axies.size(); k++)
{
if (String.valueOf(comparison.charAt(i-1)).equalsIgnoreCase(axies.get(j)) && String.valueOf(comparison.charAt(i)).equalsIgnoreCase(axies.get(k)))
{
matrix[j][k]++;
}
}
}
}
}
}
private void setupAxis(String temp)
{
for (int j = 0; j < temp.length(); j++)
{
boolean found = false;
for (int k = 0; k < axies.size() && !found; k++)
{
if (String.valueOf(temp.charAt(j)).equalsIgnoreCase(axies.get(k)))
{
found = true;
}
}
if (!found)
{
if (temp.charAt(j) > 32)
{
axies.add(String.valueOf(temp.charAt(j)));
}
}
}
}
private double getTotalValue()
{
double sum = 0;
for (int i = 0; i < matrix.length; i++)
{
for (int j = 0; j < matrix[i].length; j++)
{
double t = matrix[i][j];
sum = sum + t;
}
}
return sum;
}
private double getLineValue(int line)
{
double sum = 0;
for (int i = 0; i < matrix[line].length; i++)
{
double t = matrix[line][i];
sum = sum + t;
}
return sum;
}
private void display()
{
System.out.println("Sum of matrix is "+getTotalValue());
System.out.println("Sum of line 1 = "+getLineValue(3));
System.out.print(" ");
for (int j = 0; j < axies.size(); j++)
{
System.out.print(axies.get(j)+" ");
}
System.out.println();
for (int i = 0; i < matrix.length; i++)
{
System.out.print(axies.get(i)+" ");
for (int j = 0; j < matrix[i].length; j++)
{
System.out.print(matrix[i][j]+" ");
}
System.out.println();
}
}
}