package eu.europeana.creative.dataset.culturecam.bl.analysis;
import it.cnr.isti.feature.extraction.FeatureExtractionException;
import it.cnr.isti.feature.extraction.Image2Features;
import it.cnr.isti.vir.features.FeaturesCollectorArr;
import it.cnr.isti.vir.features.IFeaturesCollector;
import it.cnr.isti.vir.file.FeaturesCollectorsArchive;
import it.cnr.isti.vir.id.IDString;
import it.cnr.isti.vir.id.IHasID;
import it.cnr.isti.vir.readers.CoPhIRv2Reader;
import it.cnr.isti.vir.similarity.knn.IntDoubleString;
import it.cnr.isti.vir.similarity.metric.LireMetric;
import it.cnr.isti.vir.similarity.metric.Metric;
import it.cnr.isti.vir.util.Pivots;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.log4j.Logger;
import eu.europeana.service.ir.image.IRConfiguration;
import eu.europeana.service.ir.image.IRConfigurationImpl;
import eu.europeana.service.ir.image.exceptions.TechnicalRuntimeException;
public class SubsetAnalyserImpl<F> {
private Logger log = Logger.getLogger(getClass());
private IRConfiguration configuration;
private String dataset = null;
private String subset = null;
private Image2Features img2ftx;
private File subsetFCArchiveFile;
boolean resetFeaturesArchive = false;
FeaturesCollectorsArchive subsetFCArchive;
List<IFeaturesCollector> subsetFeatures;
// private IFeaturesCollector[] qObj;
private final Metric<F> comp;
// private File lireObjectPivotsFile;
// FeaturesCollectorsArchive lireObjectPivotsArchive;
// SubsetAnalyserImpl() {
// this(null, null);
// }
// SubsetAnalyserImpl(IRConfiguration configuration, String subset) {
// this(configuration, null, );
// }
public SubsetAnalyserImpl(String dataset, String subset, Metric<F> comp,
boolean resetFeaturesArchive) {
this(null, dataset, subset, comp, resetFeaturesArchive);
}
public SubsetAnalyserImpl(IRConfiguration configuration, String dataset,
String subset, Metric<F> comp, boolean resetFeaturesArchive) {
this.configuration = configuration;
this.dataset = dataset;
this.subset = subset;
this.comp = comp;
this.resetFeaturesArchive = resetFeaturesArchive;
}
// @Override
public IRConfiguration getConfiguration() {
if (configuration == null)
configuration = new IRConfigurationImpl();
return configuration;
}
// @Override
public void init() {
// ensure initialization of configuration attribute
getConfiguration();
// init index searcher bean
// File indexFolder = getConfiguration().getIndexFolder(
// getDataset());
// File indexConfFolder = getConfiguration().getIndexConfFolder(
// getDataset());
// init feature extraction bean
try {
if (img2ftx == null)
img2ftx = new Image2Features(getConfiguration()
.getIndexConfFolder(getDataset()));
} catch (Exception e) {
throw new TechnicalRuntimeException(
"Cannot instantiate feature extractor!", e);
// log.warn("Cannot instantiate feature extractor!", e);
}
}
protected void initSubsetFCArchive() {
initSubsetFCArchive(resetFeaturesArchive);
}
protected void initSubsetFCArchive(boolean resetFile) {
// create file path if needed
if (!getSubsetFCArchiveFile().exists())
getSubsetFCArchiveFile().getParentFile().mkdirs();
else if (resetFile)
getSubsetFCArchiveFile().delete();
try {
subsetFCArchive = new FeaturesCollectorsArchive(
getSubsetFCArchiveFile(),
new LireMetric().getRequestedFeaturesClasses(),
IDString.class, FeaturesCollectorArr.class);
} catch (Exception e) {
throw new TechnicalRuntimeException(
"Cannot instantiate (pivots) feature collection archive!",
e);
}
}
public String getDataset() {
return dataset;
}
public void setDataset(String dataset) {
this.dataset = dataset;
}
public File getSubsetFCArchiveFile() {
if (subsetFCArchiveFile == null)
subsetFCArchiveFile = new File(getConfiguration()
.getSubsetFCArchive(getDataset(), getSubset()));
return subsetFCArchiveFile;
}
public FeaturesCollectorsArchive getSubsetFCArchive() {
return subsetFCArchive;
}
// @Override
public void extractSubsetFeatures(Set<String> subsetThumbnailIds)
throws FeatureExtractionException {
// init file
initSubsetFCArchive();
// TODO: move resetFeaturesArchive from constructor to this method
// skip feature extraction
if (!resetFeaturesArchive && getSubsetFCArchiveFile().exists())
return;
File thumbnailFile = null;
int cnt = 0;
try {
for (String thumbnailId : subsetThumbnailIds) {
log.debug("extracting features for pivot with ID: "
+ thumbnailId);
thumbnailFile = getConfiguration().getImageFile(getDataset(),
thumbnailId);
final int PLACEHOLDER_SIZE = 3583;
if(thumbnailFile.length() == PLACEHOLDER_SIZE){
log.debug("Skip placeholder thumbnail: " + thumbnailFile.getAbsolutePath());
continue;
}
storeImageFeatures(thumbnailId, new FileInputStream(
thumbnailFile));
cnt++;
if (cnt % 1000 == 0)
log.debug("Features extracted for #pivots: " + cnt);
}
// write index files an close
getSubsetFCArchive().close();
} catch (Exception e) {
throw new FeatureExtractionException(
"Cannot write pivot Features Archives!", e);
}
}
public SortedSet<IntDoubleString> generateOrder() {
double[][] interDist = evalInterDistances();
TreeSet<IntDoubleString> res = new TreeSet<IntDoubleString>(
new Comparator<IntDoubleString>() {
@Override
public int compare(IntDoubleString o1, IntDoubleString o2) {
return o2.compareTo(o1);
}
});
log.debug("Avg inter-dist before ordering: "
+ Pivots.getTrMatrixAvg(interDist));
log.debug("Avg inter-dist before ordering(50): "
+ Pivots.getTrMatrixAvg(interDist,
Math.min(interDist.length, 50)));
double sum;
String stringId;
for (int i = 0; i < interDist.length; i++) {
sum = 0;
for (int j = 0; j < interDist[i].length; j++) {
sum += Math.abs(interDist[i][j]);
}
stringId = ((IHasID)getSubsetFeatures().get(i)).getID().toString();
res.add(new IntDoubleString(i, sum, stringId));
}
return res;
}
protected final double[][] evalInterDistances() {
getSubsetFeatures();
double temp[][] = new double[subsetFeatures.size()][subsetFeatures
.size()];
// for ( int i=0; i<temp.length; i++ ) {
// temp[i] = new double[i];
// }
for (int i = 0; i < temp.length; i++) {
for (int j = 0; j < temp[i].length; j++) {
temp[i][j] = comp.distance(subsetFeatures.get(i),
subsetFeatures.get(j));
}
}
return temp;
}
protected List<IFeaturesCollector> getSubsetFeatures() {
if (subsetFeatures == null) {
try {
subsetFeatures = getSubsetFCArchive().getAll();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw new TechnicalRuntimeException(
"Cannot get features from subset archive", e);
}
}
return subsetFeatures;
}
protected void storeImageFeatures(String thumbnailId, InputStream imageObj)
throws FeatureExtractionException {
String imgFeatures;
imgFeatures = img2ftx.extractFeatures(imageObj);
storeImageFeatures(thumbnailId, imgFeatures);
}
protected void storeImageFeatures(String docID, String imgFeatures)
throws FeatureExtractionException {
BufferedReader br = null;
try {
InputStream is = new ByteArrayInputStream(imgFeatures.getBytes());
// read it with BufferedReader
br = new BufferedReader(new InputStreamReader(is));
FeaturesCollectorArr features = CoPhIRv2Reader.getObj(br);
// System.out.println("writting");
// LireObject object = new LireObject(features);
features.setID(new IDString(docID));
getSubsetFCArchive().add(features);
} catch (Exception e) {
throw new FeatureExtractionException(
"Cannot store pivot features: " + docID, e);
} finally {
if (br != null)
try {
br.close();
} catch (IOException e) {
// this exception should not occur
// if it occurs nothing harmful should occur
log.warn("warning: exception occured when closing buffered reader of image features for image "
+ docID
+ "\nError message"
+ e.getLocalizedMessage());
}
}
}
// public void generateSubsetOrder() throws SecurityException,
// IllegalArgumentException, IOException, NoSuchMethodException,
// InstantiationException, IllegalAccessException,
// InvocationTargetException {
// public MultipleKNNPQueueID( Collection queryColl,
// Integer k,
// Metric comp,
// Class pQueueClass
// ) {
//
// this (queryColl, k, comp, false, null, null, true, false,
// pQueueClass, true );
// }
// TODO: test also with lire pivots archive
// File inFile = new File(pivotManager.getConfiguration()
// .getPivotsFCArchive(getDataset()));
// List<IFeaturesCollector> featuresCollection = FeaturesCollectorsArchive
// .getAll(inFile);
// List<IFeaturesCollector> featuresCollection =
// getSubsetFCArchive().getAll();
// // FeaturesCollectorsArchive.
//
// final int k = 100;
// final int tries = 30;
// MultipleKNNPQueueID multipleKnnQueue = new MultipleKNNPQueueID(
// featuresCollection, // Collection queryColl,
// k, // Integer k,
// new LireMetric(), // Metric comp,
// true, // boolean useInterDistances,
// new QueriesOrder3(tries, k), // (available Queriesorder1-3)
// // IQueriesOrdering ordering,
// -1, // (not used) - Integer nRecents,
// false, // (distance overflow) boolean distET,
// false, // (search by ID or features?) boolean storeID,
// SimPQueue_kNN.class, // (same as SimPQueueDMax) Class
// // pQueueClass,
// false /* boolean silent */);
//
// System.out.println(multipleKnnQueue.getAvgIntDist());
//
// // System.out.println(multipleKnnQueue.getAvgLastDist());
// ISimilarityResults[] results = multipleKnnQueue.getResults();
// // multipleKnnQueue.writeResultsIDs();
//
// for (int i = 0; i < results.length; i++) {
// System.out.println("ISimilarityResult [" + i + "]: " + results[i]);
// }
//
// // System.out.println("Top k results : " + multipleKnnQueue.get(k));
// // for (int i = 0; i < k; i++) {
// // System.out.println("Top k results : " + multipleKnnQueue.get(k));
// // }
//
// // multipleKnnQueue.get(index);
//
// }
// @Override
// public void extractPivotFeatures(Map<String, String> pivotThumbnails)
// throws FileNotFoundException, FeatureExtractionException {
// // TODO update implementation
// extractPivotFeatures(pivotThumbnails.keySet());
//
// }
// @Override
// public void generateLireObjectPivots(Integer[] order)
// throws FileNotFoundException, FeatureExtractionException {
//
// File pivotsFile = initLireObjectPivotFile(true);
// // initPivotsFCArchive(false);
//
// DataOutputStream out = null;
//
// try {
// List<IFeaturesCollector> pivotFeatures = FeaturesCollectorsArchive
// .getAll(getSubsetFCArchiveFile());
// // getPivotsFCArchive().getAll();
// if (order != null && pivotFeatures.size() < order.length)
// throw new ArrayIndexOutOfBoundsException(
// "The feature collector list was expected to have more than "
// + order.length + " elements, but only found: "
// + pivotFeatures.size());
//
// LireObject pivot;
// // IFeaturesCollector pivot;
// int positionAsId = 0;
//
// out = new DataOutputStream(new BufferedOutputStream(
// new FileOutputStream(pivotsFile)));
//
// Object[] features = pivotFeatures.toArray().clone();
// //int indexingPivots =
// int pivotsCount = features.length;
//
// if (order != null){
// Reordering.reorder(Arrays.asList(order), features);
// pivotsCount = order.length;
// }
//
// for (int i = 0; i < pivotsCount; i++) {
// positionAsId = i + 1;
// pivot = new LireObject(positionAsId, (IFeaturesCollector) features[i]);
// pivot.writeData(out);
// }
//
// out.flush();
// // lireObjectPivotsArchive.close();
//
// } catch (Exception e) {
// throw new FeatureExtractionException(
// "cannot generate lire pivots file from feature collection archive!",
// e);
// } finally {
// try {
// if (out != null)
// out.close();
// } catch (Exception e) {
// log.warn("Cannot close out Stream for file: " + pivotsFile, e);
// }
// }
// }
// public File getLireObjectPivotsFile() {
// if (lireObjectPivotsFile == null)
// lireObjectPivotsFile = new File(getConfiguration().getPivotsFolder(
// getDataset()), "LireObjectPivots.dat");
//
// return lireObjectPivotsFile;
// }
// @Override
// public void generateLireObjectPivots() throws FileNotFoundException,
// FeatureExtractionException {
// generateLireObjectPivots(null);
// }
protected String getSubset() {
return subset;
}
protected void setSubset(String subset) {
this.subset = subset;
}
// protected FeaturesCollectorsArchive getLireobjectPivotsArchive() {
// return lireObjectPivotsArchive;
// }
}