package edu.stanford.nlp.loglinear.storage;
import edu.stanford.nlp.loglinear.model.GraphicalModel;
import java.io.*;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.util.function.Consumer;
/**
* Created on 10/17/15.
* @author keenon
* <p>
* The idea here is pretty straightforward, but requires some explanation.
* <p>
* GraphicalModels are great for storing lots of metadata about the model, though storing full featurizations can be a
* bit slow.
* <p>
* With a ModelBatch, you can get your models from anywhere, and after running LENSE on them (which will add lots of
* annotations, potentially) you can write those models to disk in a big fat batch. Those models you've stored can be
* stored without featurizing them, as long as you keep enough metadata to be able to featurize later. Then when you
* load a batch from disk to run simulations, you can try out different feature sets and gameplayers, all while keeping
* the beautifully precomputed metadata for the model (including instructions for querying, and the query logs).
*/
public class ModelBatch extends ArrayList<GraphicalModel> {
/**
* Creates an empty ModelBatch
*/
public ModelBatch() {
}
/**
* This loads a model batch from a file, then closes the file handler. Just a convenience.
*
* @param filename the file to load from
* @throws IOException
*/
public ModelBatch(String filename) throws IOException {
this(filename, (model) -> {
});
}
/**
* This loads a model batch from a file, then closes the file handler. Just a convenience.
*
* @param filename the file to load from
* @param featurizer a function that gets run on every GraphicalModel, and has a chance to edit them (eg by adding
* or changing features)
* @throws IOException
*/
public ModelBatch(String filename, Consumer<GraphicalModel> featurizer) throws IOException {
InputStream is = new FileInputStream(filename);
readFrom(is, featurizer);
is.close();
}
/**
* Load a batch of models from disk, without specifying a function to re-featurize those models.
*
* @param inputStream the inputstream to load from
*/
public ModelBatch(InputStream inputStream) throws IOException {
this(inputStream, (model) -> {
});
}
/**
* Load a batch of models from disk, while running the function "featurizer" on each of the models before adding it
* to the batch. This gives the loader a chance to experiment with new featurization techniques.
*
* @param inputStream the input stream to load from
* @param featurizer a function that gets run on every GraphicalModel, and has a chance to edit them (eg by adding
* or changing features)
*/
public ModelBatch(InputStream inputStream, Consumer<GraphicalModel> featurizer) throws IOException {
readFrom(inputStream, featurizer);
}
/**
* Load a batch of models from disk, while running the function "featurizer" on each of the models before adding it
* to the batch. This gives the loader a chance to experiment with new featurization techniques.
*
* @param inputStream the input stream to load from
* @param featurizer a function that gets run on every GraphicalModel, and has a chance to edit them (eg by adding
* or changing features)
*/
private void readFrom(InputStream inputStream, Consumer<GraphicalModel> featurizer) throws IOException {
GraphicalModel read;
while ((read = GraphicalModel.readFromStream(inputStream)) != null) {
featurizer.accept(read);
add(read);
}
}
/**
* Convenience function to write the current state of the modelBatch out to a file, including all factors.
* <p>
* WARNING: These files can get quite large, if you're using large embeddings as features.
*
* @param filename the file to write the batch to
* @throws IOException
*/
public void writeToFile(String filename) throws IOException {
FileOutputStream fos = new FileOutputStream(filename);
writeToStream(fos);
fos.close();
}
/**
* Convenience function to write the current state of the modelBatch out to a file, without factors.
*
* @param filename the file to write the batch to
* @throws IOException
*/
public void writeToFileWithoutFactors(String filename) throws IOException {
FileOutputStream fos = new FileOutputStream(filename);
writeToStreamWithoutFactors(fos);
fos.close();
}
/**
* This writes the entire batch, including all factors, to the given output stream.
* <p>
* WARNING: These files can get quite large, if you're using large embeddings as features.
*
* @param outputStream the outputstream to write our files to
* @throws IOException
*/
public void writeToStream(OutputStream outputStream) throws IOException {
for (GraphicalModel model : this) {
model.writeToStream(outputStream);
}
}
/**
* This writes the whole batch, WITHOUT FACTORS, which means that anyone loading this batch will need to include
* their own featurizer. Make sure that you have sufficient metadata to be able to do full featurizations.
*
* @param outputStream the outputstream to write our files to
* @throws IOException
*/
public void writeToStreamWithoutFactors(OutputStream outputStream) throws IOException {
Set<GraphicalModel.Factor> emptySet = new HashSet<>();
for (GraphicalModel model : this) {
Set<GraphicalModel.Factor> cachedFactors = model.factors;
model.factors = emptySet;
model.writeToStream(outputStream);
model.factors = cachedFactors;
}
}
}