package fna.parsing;
import java.util.Hashtable;
import fna.parsing.character.Glossary;
//
public class DeHyphenizerCorrected extends DeHyphenizer {
public DeHyphenizerCorrected(String database, String table, String column, String countcolumn, String hyphen, String glossarytable, Glossary glossary) {
super(database, table, column, countcolumn, hyphen, glossarytable, glossary);
}
protected void collectTerms(String[] segs, String[] terms, int[][] matrix) {
//rank rows by the distance between a 1 in upper matrix to the diagonal line
int max = 0;
Hashtable<String, String> rank = new Hashtable<String, String>();
for(int i = 0; i < segs.length; i++){
int distance = getDistance(matrix[i], i);
if(distance > max){
max = distance;
}
String list = (String)rank.get(distance+"");
if(list == null){
rank.put(distance+"", i+"");
}else{
rank.put(distance+"", list+" "+i+"");
}
}
//collect terms
String checked="-";
for(int i = max; i >= 0; i--){
String rows = (String)rank.get(i+"");
if(rows!= null && i == 0){//term not see in learned or glossary, and not connectable to other terms
String[] rowss = rows.split(" ");
for(int j = 0; j < rowss.length; j++){
int arow = Integer.parseInt(rowss[j]);
if(checked.indexOf("-"+arow+"-")<0){
terms[arow] = segs[arow];
}
}
}else if(rows!=null){
String[] rowss = rows.split(" ");
for(int j = 0; j < rowss.length; j++){
int arow = Integer.parseInt(rowss[j]);
if(checked.indexOf("-"+arow+"-")<0){
terms[arow] = formTerm(segs, arow, arow+i);
checked += formString(arow, arow+i, "-");
}
}
}
}
}
/**
* the distance between a 1 in upper matrix to the diagonal line
* @param is
* @return
*/
private int getDistance(int[] arow, int rownumber) {
for(int i = arow.length-1; i>=0; i--){
if(arow[i] == 1){
return i - rownumber;
}
}
return 0;
}
}