package edu.umd.hooka.alignment;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import edu.umd.hooka.corpora.Language;
public class HadoopAlignConfig extends Configuration {
static final String KEY_MODEL1ITERATIONS = "ha.model1.iterations";
static final String KEY_HMMITERATIONS = "ha.hmm.iterations";
static final String KEY_HMMP0 = "ha.hmm.p0";
static final String KEY_USEVB = "ha.vb.use";
static final String KEY_USETRUNC = "ha.trunc.use";
static final String KEY_USENULLWORD = "ha.use.nullword";
static final String KEY_ALPHA = "ha.vb.alpha";
static final String KEY_BITEXTS = "ha.inbitext";
static final String KEY_F = "ha.sourcelang";
static final String KEY_E = "ha.targetlang";
static final String KEY_TTABLE = "ha.ttable.path";
static final String KEY_ATABLE = "ha.atable.path";
static final String KEY_EVOC = "ha.evoc";
static final String KEY_FVOC = "ha.fvoc";
static final String KEY_MAX_SENTLEN = "ha.max.sentlen";
static final String KEY_HOMOGENEOUS_HMM = "ha.hmm.homogeneous";
public HadoopAlignConfig() {}
public HadoopAlignConfig(Configuration conf) {
super(conf);
}
public HadoopAlignConfig(String root, String e, String f, String bitexts, int model1Iters, int hmmIters, boolean useNull, boolean useVB, boolean useTruncate, float alpha) {
this.setRoot(root);
this.setE(Language.languageForISO639_1(e));
this.setF(Language.languageForISO639_1(f));
this.setBitexts(bitexts);
this.setModel1Iterations(model1Iters);
this.setHMMIterations(hmmIters);
this.setIncludeNullWord(useNull);
this.setUseVariationalBayes(useVB);
this.setUseTruncate(useTruncate);
this.setAlpha(alpha);
this.setMaxSentLen(200);
}
private void setRoot(String root) {
this.set("root", root);
}
String getRoot() {
return this.get("root", null);
}
public int getMaxSentLen() {
return Integer.parseInt(get(KEY_MAX_SENTLEN));
}
public Language getE() { return Language.languageForISO639_1(get(KEY_E)); }
public Language getF() { return Language.languageForISO639_1(get(KEY_F)); }
public int getModel1Iterations() { return this.getInt(KEY_MODEL1ITERATIONS, 0); }
public int getHMMIterations() { return this.getInt(KEY_HMMITERATIONS, 0); }
public double getHMMp0() {
String v = this.get(KEY_HMMP0);
if (v == null || v.equals("")) return -1.0;
return Double.parseDouble(v);
}
public boolean isHMMHomogeneous() { return this.getBoolean(KEY_HOMOGENEOUS_HMM, true); }
public boolean useVariationalBayes() { return this.getBoolean(KEY_USEVB, false); }
public boolean includeNullWord() { return this.getBoolean(KEY_USENULLWORD, false); }
public float getAlpha() { return this.getFloat(KEY_ALPHA, 0.0f); }
public String getBitexts() { return this.get(KEY_BITEXTS); }
public Path getTestBitextPath() { return null; }
public boolean hasTestBitext() {
return false;
}
public Path getTestRefPath() { return null; }
public boolean hasTestRef() {
return false;
}
public Path getTestAlignmentsPath() { return null; }
public Path getTTablePath() {
String tp = this.get(KEY_TTABLE);
if (tp == null || tp.equals("")) tp = "tmp.ttable";
return new Path(getRoot()+"/"+tp);
}
public Path getATablePath() {
String tp = this.get(KEY_ATABLE);
if (tp == null || tp.equals("")) tp = "tmp.atable";
return new Path(getRoot()+"/"+tp);
}
public Path getFVocPath() {
String tp = this.get(KEY_FVOC);
if (tp == null || tp.equals("")) return null;
return new Path(getRoot()+"/"+tp);
}
public Path getEVocPath() {
String tp = this.get(KEY_EVOC);
if (tp == null || tp.equals("")) return null;
return new Path(getRoot()+"/"+tp);
}
public void setMaxSentLen(int n) { this.setInt(KEY_MAX_SENTLEN, n); }
public void setModel1Iterations(int n) { this.setInt(KEY_MODEL1ITERATIONS, n); }
public void setHMMIterations(int n) { this.setInt(KEY_HMMITERATIONS, n); }
public void setUseVariationalBayes(boolean vb) { this.setBoolean(KEY_USEVB, vb); }
public void setUseTruncate(boolean trunc) { this.setBoolean(KEY_USETRUNC, trunc); }
public void setIncludeNullWord(boolean nw) { this.setBoolean(KEY_USENULLWORD, nw); }
public void setAlpha(float alpha) { this.set(KEY_ALPHA, Float.toString(alpha)); }
public void setBitexts(String value) { this.set(KEY_BITEXTS, value); }
public void setE(Language e) { this.set(KEY_E, e.code()); }
public void setF(Language f) { this.set(KEY_F, f.code()); }
public void setTestBitextPath(Path p) { }
public void setTestAlignmentsPath(Path p) { }
public void setTestReferencePath(Path p) { }
public void setTTablePath(Path p) { this.set(KEY_TTABLE, p.toString()); }
public void setATablePath(Path p) { this.set(KEY_ATABLE, p.toString()); }
public void setEVocFile(Path p) { this.set(KEY_EVOC, p.toString()); }
public void setFVocFile(Path p) { this.set(KEY_FVOC, p.toString()); }
public void setHMMp0(double p0) { this.set(KEY_HMMP0, Double.toString(p0)); }
public void setHMMHomogeneous(boolean x) { this.setBoolean(KEY_HOMOGENEOUS_HMM, x); }
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append("Alignment Configuration Parameters\n")
.append(" E-language: ").append(getE().toString()).append('\n')
.append(" F-language: ").append(getF().toString()).append('\n')
.append(" Corpora: ").append(getBitexts()).append('\n')
.append(" Model1 iterations: ").append(getModel1Iterations()).append('\n')
.append(" HMM iterations: ").append(getHMMIterations()).append('\n')
.append(" Include NULL?: ").append(includeNullWord()).append('\n')
.append(" Training: ").append(useVariationalBayes() ? "VB" : "EM").append('\n')
.append(" alpha: ").append(getAlpha()).append('\n');
return sb.toString();
}
}