// Copyright(c) 2006, Shinichi Morishita. All Rights Reserved. public class Chapter5_SpaceEfficientStringSearch { // Rabin-Karp public static void rkSearch( int[] target, int[] query ) { int targetLen = target.length; int queryLen = query.length; int primeNumber = 524287; // 2^{19}-1, a Mersenne prime number. int intQuery = 0; // The hash value of query int tmp = 0; // The hash value of the temporary substring int c = 1; // The power of 4 to queryLen for(int i = 0; i <= (targetLen - queryLen); i++){ if(i == 0){ for(int j = 0; j < queryLen; j++){ intQuery = (intQuery*4 + query[j]) % primeNumber; tmp = (tmp*4 + target[j]) % primeNumber; c = c*4 % primeNumber; } }else tmp = (tmp*4 - target[i-1]*c + target[i+queryLen-1]) % primeNumber; if(intQuery == tmp){ int j; for(j = 0; j < queryLen && target[i+j]==query[j]; j++){} if(j == queryLen) // Print positions of query occurrences. System.out.print(i+" "); } } System.out.println(); } // Knuth-Morris-Pratt public static void kmpSearch( int[] target, int[] query ) { int targetLen = target.length; int queryLen = query.length; // Compute the delta table. int[] delta = new int[queryLen+1]; delta[0] = -1; for(int i = 0, j = -1; i < queryLen; i++, j++, delta[i]=j){ while( (0 <= j) && query[i] != query[j] ) j = delta[j]; } // Search the target for the query using the KMP shift rule. for(int i = 0, j = 0; (i < targetLen) && (j < queryLen); i++, j++){ if(target[i] != query[j]){ // 0 <= j at this moment. j = delta[j]; while( (0 <= j) && (target[i] != query[j]) ) j = delta[j]; }else if(j == queryLen-1){ // Print positions of query occurrences. System.out.print(i-j+" "); j = delta[queryLen]-1; // Revised to fix the bug (2006/08/04). // The following two lines fail to output all positions for target=AAAAAA and query=AAA. // i++; // j = delta[queryLen]; } } System.out.println(); } // bad character heuristics public static void badCharHeuristics( int[] target, int[] query ) { int targetLen = target.length; int queryLen = query.length; int SIZE = 256; // Declare the number of letters to use. int[] shift = new int[SIZE+1]; // Generate the shift table // Initially suppose that no letters appear in the query. for(int k=0; k<=SIZE; k++) shift[k]=queryLen; // Associate the position of the rightmost occurence with each letter. for(int k=0; k < queryLen-1; k++) if(query[k] < SIZE) shift[query[k]] = queryLen-1-k; else return; //@Search the target for the query. for(int i=queryLen-1, j=queryLen-1; i < targetLen; i--, j--){ if(j == -1){ // Print positions of query occurrences. System.out.print(i+1+" "); i = i+1+queryLen; j = queryLen-1; } while(i < targetLen && target[i] != query[j]){ int shiftLen = shift[target[i]]; if(queryLen-j > shiftLen) i = i+queryLen-j; else i = i+shiftLen; j = queryLen-1; // Search the query from the rightmost again. } } System.out.println(); } // Subroutines public static int[] characters2IntArray(String targetString){ int targetLen = targetString.length(); int[] targetIntArray = new int[targetLen]; for(int i = 0; i < targetLen; i++) targetIntArray[i] = (int)targetString.charAt(i); return targetIntArray; } public static int encode_char( char c ){ switch (c){ case 'A': return 0; case 'C': return 1; case 'G': return 2; case 'T': return 3; default: return -1; } } public static int[] generateRandomNumbers(int targetLen){ // Generate an array in which elements are selected from {0,1,2,3} at random. int[] target = new int[targetLen]; for(int i=0; i