/**
* Copyright (c) 2010, Regents of the University of Colorado All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution. Neither the name of the University of Colorado at
* Boulder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package clear.ftr.map;
import clear.ftr.xml.AbstractFtrXml;
import clear.util.IOUtil;
import com.carrotsearch.hppc.ObjectIntOpenHashMap;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import java.io.BufferedReader;
import java.io.PrintStream;
import java.util.ArrayList;
/**
* Abstract feature map.
*
* @author Jinho D. Choi <b>Last update:</b> 4/12/2010
*/
abstract public class AbstractFtrMap {
/**
* List of labels.
*/
protected ArrayList<String> a_label;
/**
* Takes "label" as a key and its index as a value.
*/
protected ObjectIntOpenHashMap<String> m_label;
/**
* Contains n-gram features.
*/
protected ArrayList<ObjectIntOpenHashMap<String>> m_ngram;
/**
* Size of each n-gram feature.
*/
public int[] n_ngram;
/**
* Takes "feature" as a key and its index as a value.
*/
protected ArrayList<ObjectIntOpenHashMap<String>> m_extra;
/**
* Size of each feature.
*/
public int[] n_extra;
// =========================== Init ===========================
public AbstractFtrMap(AbstractFtrXml xml, int nExtra) {
init(xml, nExtra);
}
protected void init(AbstractFtrXml xml, int nExtra) {
int i, nNgram = xml.a_ngram_templates.length;
m_label = new ObjectIntOpenHashMap<>();
m_ngram = new ArrayList<>(nNgram);
m_extra = new ArrayList<>(nExtra);
for (i = 0; i < nNgram; i++) {
m_ngram.add(new ObjectIntOpenHashMap<String>());
}
for (i = 0; i < nExtra; i++) {
m_extra.add(new ObjectIntOpenHashMap<String>());
}
}
public void addLabel(String label) {
incrementKey(m_label, label);
}
public void addNgram(int index, String ftr) {
incrementKey(m_ngram.get(index), ftr);
}
public void addExtra(int index, String ftr) {
incrementKey(m_extra.get(index), ftr);
}
protected void incrementKey(ObjectIntOpenHashMap<String> map, String key) {
map.put(key, map.get(key) + 1);
}
// =========================== Save ===========================
public void save(AbstractFtrXml xml, String lexiconFile) {
try {
PrintStream fout = IOUtil.createPrintFileStream(lexiconFile);
saveDefault(xml, fout);
fout.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public void save(AbstractFtrXml xml, PrintStream fout) {
try {
saveDefault(xml, fout);
} catch (Exception e) {
e.printStackTrace();
}
}
protected void saveDefault(AbstractFtrXml xml, PrintStream fout) {
int i, n;
// labels
saveHashMap(fout, m_label, xml.n_cutoff_label);
// n-grams features
n = m_ngram.size();
fout.println(n);
for (i = 0; i < n; i++) {
saveHashMap(fout, m_ngram.get(i), xml.n_cutoff_ngram);
}
// extra features
n = m_extra.size();
fout.println(n);
for (i = 0; i < n; i++) {
saveHashMap(fout, m_extra.get(i), xml.n_cutoff_extra);
}
}
protected void saveHashMap(PrintStream fout, ObjectIntOpenHashMap<String> map, int cutoff) {
String key;
int value;
fout.println(countKeys(map, cutoff));
for (ObjectCursor<String> str : map.keys()) {
key = str.value;
value = map.get(key);
if (value > cutoff) {
fout.println(key);
}
}
}
protected void saveFreqMap(PrintStream fout, ObjectIntOpenHashMap<String> map, int cutoff) {
String key;
int value;
fout.println(countKeys(map, cutoff));
for (ObjectCursor<String> str : map.keys()) {
key = str.value;
value = map.get(key);
if (value > cutoff) {
fout.println(key + " " + value);
}
}
}
protected int countKeys(ObjectIntOpenHashMap<String> map, int cutoff) {
if (cutoff < 1) {
return map.size();
}
int count = 0, value;
for (ObjectCursor<String> key : map.keys()) {
value = map.get(key.value);
if (value > cutoff) {
count++;
}
}
return count;
}
// =========================== Load ===========================
public AbstractFtrMap(String lexiconFile) {
load(lexiconFile);
}
public AbstractFtrMap(BufferedReader fin) {
load(fin);
}
public void load(String lexiconFile) {
try {
BufferedReader fin = IOUtil.createBufferedFileReader(lexiconFile);
loadDefault(fin);
fin.close();
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
public void load(BufferedReader fin) {
try {
loadDefault(fin);
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
protected void loadDefault(BufferedReader fin) throws Exception {
ObjectIntOpenHashMap<String> map;
int n, i;
String key;
// labels
n = Integer.parseInt(fin.readLine());
a_label = new ArrayList<>(n);
m_label = new ObjectIntOpenHashMap<>(n);
for (i = 1; i <= n; i++) {
key = fin.readLine();
a_label.add(key);
m_label.put(key, i);
}
// n-grams
n = Integer.parseInt(fin.readLine());
m_ngram = new ArrayList<>(n);
n_ngram = new int[n];
for (i = 0; i < n; i++) {
map = loadHashMap(fin);
m_ngram.add(map);
n_ngram[i] = map.size();
}
// extra features
n = Integer.parseInt(fin.readLine());
m_extra = new ArrayList<>(n);
n_extra = new int[n];
for (i = 0; i < n; i++) {
map = loadHashMap(fin);
m_extra.add(map);
n_extra[i] = map.size();
}
}
protected ObjectIntOpenHashMap<String> loadHashMap(BufferedReader fin) throws Exception {
int i, n = Integer.parseInt(fin.readLine());
ObjectIntOpenHashMap<String> map = new ObjectIntOpenHashMap<>(n);
for (i = 1; i <= n; i++) // 0 is reserved for unseen feature
{
map.put(fin.readLine(), i);
}
return map;
}
protected ObjectIntOpenHashMap<String> loadFreqMap(BufferedReader fin) throws Exception {
int i, n = Integer.parseInt(fin.readLine());
ObjectIntOpenHashMap<String> map = new ObjectIntOpenHashMap<>(n);
String[] tmp;
for (i = 1; i <= n; i++) {
tmp = fin.readLine().split(" ");
map.put(tmp[0], Integer.parseInt(tmp[1]));
}
return map;
}
public String indexToLabel(int index) {
return a_label.get(index);
}
public int labelToIndex(String label) {
return m_label.get(label) - 1;
}
public int ngramToIndex(int index, String ftr) {
return m_ngram.get(index).get(ftr) - 1;
}
public int extraToIndex(int index, String ftr) {
return m_extra.get(index).get(ftr) - 1;
}
public ObjectIntOpenHashMap<String> getNgramHashMap(int index) {
return m_ngram.get(index);
}
}