// Copyright 2014 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package marmot.tokenize.preprocess;
import marmot.util.LevenshteinLattice;
public class Pair {
public Pair(String tokenized, String untokenized) {
this.tokenized = tokenized;
this.untokenized = untokenized;
LevenshteinLattice lattice = new LevenshteinLattice(untokenized,
tokenized);
score = lattice.getDistance() / (double) (untokenized.length() + tokenized.length());
}
public String tokenized;
@Override
public String toString() {
return "Pair [tokenized=" + tokenized + ", untokenized=" + untokenized
+ "]";
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result
+ ((tokenized == null) ? 0 : tokenized.hashCode());
result = prime * result
+ ((untokenized == null) ? 0 : untokenized.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Pair other = (Pair) obj;
if (tokenized == null) {
if (other.tokenized != null)
return false;
} else if (!tokenized.equals(other.tokenized))
return false;
if (untokenized == null) {
if (other.untokenized != null)
return false;
} else if (!untokenized.equals(other.untokenized))
return false;
return true;
}
public String untokenized;
public double score;
}