package ruc.irm.similarity.sentence.editdistance;
import java.util.ArrayList;
import java.util.List;
public class Split {
public static boolean MERGE_FLAG = true;
@SuppressWarnings("unchecked")
public static Object[] split(SuperString<? extends EditUnit> X, SuperString<? extends EditUnit> Y){
Block<? extends EditUnit> LX = new Block(X);
Block<? extends EditUnit> LY = new Block(Y);
split(LX,LY);
while(LY.getPrev()!=null){
LY = LY.getPrev();
}
while(LX.getPrev()!=null){
LX = LX.getPrev();
}
List<ChunkEditUnit> first = new ArrayList<ChunkEditUnit>();
List<ChunkEditUnit> second = new ArrayList<ChunkEditUnit>();
while(LX!=null){
first.add(new ChunkEditUnit(LX.getData()));
LX = LX.getNext();
}
while(LY!=null){
second.add(new ChunkEditUnit(LY.getData()));
LY = LY.getNext();
}
SuperString<ChunkEditUnit> s1 = new SuperString<ChunkEditUnit>(first);
SuperString<ChunkEditUnit> s2 = new SuperString<ChunkEditUnit>(second);
Object[] obj = new Object[]{s1, s2};
return obj;
}
private static void split(Block<?> bx, Block<?> LY){
LCS maxLCS = null;
Block<?> by = LY;
while(by.getPrev()!=null){
by = by.getPrev();
}
Block<?> maxMatchedBy = by;
while(by!=null){
if(by.isDivideFlag()){
by = by.getNext();
continue;
}
LCS lcs = LCS.parse(bx.getData(), by.getData());
if(maxLCS==null || maxLCS.length<lcs.length){
maxLCS = lcs;
maxMatchedBy = by;
}
by = by.getNext();
}
if(maxLCS!=null && maxLCS.length>0){
bx.divide(maxLCS.x_pos, maxLCS.length);
maxMatchedBy.divide(maxLCS.y_pos, maxLCS.length);
}
if(bx.getPrev()!=null && !bx.isDivideFlag()){
split(bx.getPrev(), LY);
}
if(bx.getNext()!=null &&!bx.getNext().isDivideFlag()){
split(bx.getNext(), LY);
}
}
/**
* longest common string
* @author Gavin
*
*/
public static class LCS {
public int length = 0; //LCS匹配的最长结果
public int x_pos = 0; //LCS匹配的X的位置
public int y_pos = 0; //LCS匹配的Y的位置
public static LCS parse(SuperString<?> X, SuperString<?> Y){
LCS lcs = new LCS();
for(int start=0; start<X.length(); start++){
for(int end=start+1; end<=X.length(); end++){
SuperString<?> tempX = X.substring(start, end);
int pos = Y.indexOf(tempX);
if(pos>=0 && tempX.length()>lcs.length){
lcs.length = tempX.length();
lcs.x_pos = start;
lcs.y_pos = pos;
}
}
}
return lcs;
}
public String toString(){
return "length=" + length + ", x_pos=" + x_pos + ", y_pos=" + y_pos;
}
}
public static void main(String[] args) {
String s1 = "abcdefghijkabc";
String s2 = "cdefghijklabccc";
// s2 = "fgabcdehijklkdslfkasdflak";
// s1 = "abcdefgxyzoxyjasdkfjjjaldsfa";
// s1 = "I like the book";
// s2 = "the book I like";
s1 = "什么是计算机病毒";
s2 = "电脑病毒是什么";
// SuperString<CharEditUnit> ss1 = SuperString.createCharSuperString(s1);
// SuperString<CharEditUnit> ss2 = SuperString.createCharSuperString(s2);
SuperString<WordEditUnit> ss1 = SuperString.createWordSuperString(s1);
SuperString<WordEditUnit> ss2 = SuperString.createWordSuperString(s2);
Split.split(ss1, ss2);
// LCS lcs = LCS.parse(ss1, ss2);
// System.out.println(lcs);
}
}