/**
* Copyright (c) 2009, Regents of the University of Colorado All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution. Neither the name of the University of Colorado at
* Boulder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package clear.parse;
import clear.ftr.FtrLib;
import clear.ftr.map.AbstractFtrMap;
import clear.ftr.xml.AbstractFtrXml;
import clear.ftr.xml.FtrTemplate;
import clear.ftr.xml.FtrToken;
import clear.reader.AbstractReader;
import clear.util.tuple.JObjectObjectTuple;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.ObjectIntOpenHashMap;
import java.util.ArrayList;
/**
* Abstract parser.
*
* @author Jinho D. Choi <b>Last update:</b> 4/12/2011
*/
public abstract class AbstractParser {
/**
* Print transitions
*/
static public final byte FLAG_PRINT_TRANSITION = 0;
/**
* Train lexica
*/
static public final byte FLAG_TRAIN_LEXICON = 1;
/**
* Train instances
*/
static public final byte FLAG_TRAIN_INSTANCE = 2;
/**
* Train using boosting
*/
static public final byte FLAG_TRAIN_BOOST = 3;
/**
* Predict
*/
static public final byte FLAG_PREDICT = 4;
/**
* Predict using k-best ranking
*/
static public final byte FLAG_PREDICT_BEST = 5;
/**
* {@link AbstractParser#FLAG_*}
*/
protected byte i_flag;
/**
* Language
*/
protected String s_language = AbstractReader.LANG_EN;
/**
* Training instances
*/
public ArrayList<JObjectObjectTuple<IntArrayList, ArrayList<int[]>>> a_trans;
/**
* @param language {@link AbstractReader#LANG_*}
*/
public void setLanguage(String language) {
s_language = language;
}
/**
* Initializes arrays to save training instances.
*/
protected void initTrainArrays(int size) {
a_trans = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
addTrainArrays();
}
}
public void addTrainArrays() {
a_trans.add(new JObjectObjectTuple<>(new IntArrayList(), new ArrayList<int[]>()));
}
/**
* Saves a training instance.
*/
protected void saveInstance(String label, IntArrayList ftr, AbstractFtrMap tmap, int trainIndex) {
int index = tmap.labelToIndex(label);
if (index < 0) {
return;
}
JObjectObjectTuple<IntArrayList, ArrayList<int[]>> yx = a_trans.get(trainIndex);
int[] ftrArr = ftr.toArray();
// System.err.println(label+" "+index+" "+ftr);
yx.o1.add(index);
yx.o2.add(ftrArr);
}
protected boolean existInstance(JObjectObjectTuple<IntArrayList, ArrayList<int[]>> yx, int index, int[] ftrArr) {
int i, size = yx.o1.size();
for (i = 0; i < size; i++) {
if (yx.o1.get(i) == index && equals(yx.o2.get(i), ftrArr)) {
return true;
}
}
return false;
}
protected boolean equals(int[] x1, int[] x2) {
if (x1.length != x2.length) {
return false;
}
for (int i = 0; i < x1.length; i++) {
if (x1[i] != x2[i]) {
return false;
}
}
return true;
}
/**
* Add n-gram lexica.
*/
protected void addNgramLexica(AbstractFtrXml txml, AbstractFtrMap tmap) {
FtrTemplate[][] templates = txml.a_ngram_templates;
FtrTemplate[] template;
int i, j, n, m = templates.length;
String ftr;
for (j = 0; j < m; j++) {
template = templates[j];
n = template.length;
for (i = 0; i < n; i++) {
if ((ftr = getFeature(template[i])) != null) {
tmap.addNgram(j, ftr);
}
}
}
}
/**
* Adds n-gram features.
*/
protected void addNgramFeatures(IntArrayList arr, int[] idx, AbstractFtrXml txml, AbstractFtrMap tmap) {
FtrTemplate[][] templates = txml.a_ngram_templates;
FtrTemplate[] template;
int i, j, n, m = templates.length, size, value;
ObjectIntOpenHashMap<String> map;
String ftr;
for (j = 0; j < m; j++) {
map = tmap.getNgramHashMap(j);
size = tmap.n_ngram[j];
template = templates[j];
n = template.length;
for (i = 0; i < n; i++) {
if ((ftr = getFeature(template[i])) != null) {
value = map.get(ftr);
if (value > 0) {
arr.add(idx[0] + value - 1);
}
}
idx[0] += size;
}
}
}
/**
* @return feature value.
*/
protected String getFeature(FtrTemplate ftr) {
StringBuilder build = new StringBuilder();
int i, n = ftr.tokens.length;
String field;
for (i = 0; i < n; i++) {
field = getField(ftr.tokens[i]);
if (field == null) {
return null;
}
if (i > 0) {
build.append(FtrLib.TAG_DELIM);
}
build.append(field);
}
return build.toString();
}
abstract protected String getField(FtrToken token);
}