/** * Title: StanfordMaxEnt<p> * Description: A Maximum Entropy Toolkit<p> * Copyright: Copyright (c) Kristina Toutanova<p> * Company: Stanford University<p> */ package edu.stanford.nlp.tagger.maxent; import edu.stanford.nlp.util.logging.Redwood; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.Pair; import java.util.ArrayList; import java.util.Map; /** * Hash the instances on the things that the features look at. * * @author Kristina Toutanova * @version 1.0 */ class ListInstances { private final ArrayList<Integer> v = new ArrayList<>(); private int[] positions = null; private int num = 0; ListInstances() { } protected void add(int x) { v.add(x); } protected void addPositions(int s, int e) { positions = new int[2]; positions[0] = s; positions[1] = e; } public int[] getPositions() { return positions; } protected void inc() { num++; } public int getNum() { return num; } public int[] getInstances() { int[] arr = new int[v.size()]; Integer[] arr1 = new Integer[v.size()]; v.toArray(arr1); for (int i = 0; i < v.size(); i++) { arr[i] = arr1[i]; } return arr; } /* Methods unused: commented for now. public void save(DataOutputStream rf) { try { rf.writeInt(v.size()); int[] arr = getInstances(); for (int i = 0; i < v.size(); i++) { rf.writeInt(arr[i]); } } catch (Exception e) { e.printStackTrace(); } } public void read(DataInputStream rf) { try { int len = rf.readInt(); for (int i = 0; i < len; i++) { int x = rf.readInt(); add(x); } } catch (Exception e) { e.printStackTrace(); } }// end read */ } public class TemplateHash { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(TemplateHash.class); // the positions of the feature extractors private final Map<Pair<Integer,String>,ListInstances> tempHash = Generics.newHashMap(); private final MaxentTagger maxentTagger; public TemplateHash(MaxentTagger maxentTagger) { this.maxentTagger = maxentTagger; } protected void addPositions(int start, int end, FeatureKey fK) { Pair<Integer, String> key = new Pair<>(fK.num, fK.val); tempHash.get(key).addPositions(start, end); } protected int[] getPositions(FeatureKey s) { Pair<Integer, String> p = new Pair<>(s.num, s.val); return tempHash.get(p).getPositions(); } //public void init() { // cdm 2008: stringNums isn't used anywhere, so we now don't do any init. // int num = maxentTagger.extractors.getSize() + maxentTagger.extractorsRare.getSize(); // //log.info("A total of "+num+" features in TemplateHash"); // stringNums = new String[num]; // for (int i = 0; i < num; i++) { // stringNums[i] = String.valueOf(i); // } //} protected void release() { tempHash.clear(); } protected void add(int nFeatFrame, History history, int number) { Pair<Integer,String> wT; int general = maxentTagger.extractors.size(); if (nFeatFrame < general) { wT = new Pair<>(nFeatFrame, maxentTagger.extractors.extract(nFeatFrame, history)); } else { wT = new Pair<>(nFeatFrame, maxentTagger.extractorsRare.extract(nFeatFrame - general, history)); } if (tempHash.containsKey(wT)) { ListInstances li = tempHash.get(wT); // TODO: can we clean this call up somehow? perhaps make the // TemplateHash aware of the TaggerExperiments if we need to, or // vice-versa? if (TaggerExperiments.isPopulated(nFeatFrame, li.getNum(), maxentTagger)) { li.add(number); } } else { ListInstances li = new ListInstances(); li.add(number); tempHash.put(wT, li); } } protected void addPrev(int nFeatFrame, History history) { Pair<Integer,String> wT; int general = maxentTagger.extractors.size(); if (nFeatFrame < general) { wT = new Pair<>(nFeatFrame, maxentTagger.extractors.extract(nFeatFrame, history)); } else { wT = new Pair<>(nFeatFrame, maxentTagger.extractorsRare.extract(nFeatFrame - general, history)); } if (tempHash.containsKey(wT)) { (tempHash.get(wT)).inc(); } else { ListInstances li = new ListInstances(); li.inc(); tempHash.put(wT, li); } } protected int[] getXValues(Pair<Integer, String> key) { if (tempHash.containsKey(key)) { return tempHash.get(key).getInstances(); } return null; } /* Methods unused. Commented for now. public void save(DataOutputStream rf) { try { Pair[] keys = new Pair[tempHash.keySet().size()]; tempHash.keySet().toArray(keys); rf.writeInt(keys.length); for (Pair key : keys) { //rf.writeInt(s.length()); //rf.write(s.getBytes()); key.save(rf); tempHash.get(key).save(rf); } // for } catch (Exception e) { e.printStackTrace(); } } private void read(InDataStreamFile rf) { try { int numElem = rf.readInt(); for (int i = 0; i < numElem; i++) { //int strLen=rf.readInt(); //byte[] buff=new byte[strLen]; //rf.read(buff); //String s=new String(buff); Pair<String,String> sWT = Pair.readStringPair(rf); Pair<Integer,String> wT = new Pair<Integer,String>(Integer.parseInt(sWT.first()), sWT.second()); ListInstances li = new ListInstances(); li.read(rf); tempHash.put(wT, li); }// for } catch (Exception e) { e.printStackTrace(); } } public void print() { Object[] arr = tempHash.keySet().toArray(); for (int i = 0; i < arr.length; i++) { System.out.println(arr[i]); } } public static void main(String[] args) { TemplateHash hT = new TemplateHash(); Pair<Integer,String> p = new Pair<Integer,String>(0, "0"); ListInstances li = new ListInstances(); li.add(14); hT.tempHash.put(p, new ListInstances()); if (hT.tempHash.containsKey(p)) { System.out.println(hT.tempHash.get(p)); } } */ }