package eu.europeana.service.ir.image.api;
import it.cnr.isti.feature.extraction.FeatureExtractionException;
import it.cnr.isti.feature.extraction.Image2Features;
import it.cnr.isti.melampo.index.indexing.LireIndexer;
import it.cnr.isti.melampo.index.settings.LireSettings;
import it.cnr.isti.melampo.vir.exceptions.VIRException;
import it.cnr.isti.vir.features.FeatureClassCollector;
import it.cnr.isti.vir.features.FeaturesCollectorArr;
import it.cnr.isti.vir.features.FeaturesCollectorException;
import it.cnr.isti.vir.features.mpeg7.LireObject;
import it.cnr.isti.vir.features.mpeg7.vd.MPEG7VDFormatException;
import it.cnr.isti.vir.file.ArchiveException;
import it.cnr.isti.vir.file.FeaturesCollectorsArchive;
import it.cnr.isti.vir.id.IDString;
import it.cnr.isti.vir.readers.CoPhIRv2Reader;
import it.cnr.isti.vir.similarity.metric.LireMetric;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.InvocationTargetException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLStreamException;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import eu.europeana.api.client.thumbnails.ThumbnailsAccessor;
import eu.europeana.api.client.thumbnails.ThumbnailsForCollectionAccessor;
import eu.europeana.service.ir.image.IRConfiguration;
import eu.europeana.service.ir.image.exceptions.ImageIndexingException;
import eu.europeana.service.ir.image.index.indexing.ExtendedLireIndexer;
import eu.europeana.service.ir.image.model.IndexingStatus;
/**
* @author paolo
* @author Sergiu Gordea <sergiu.gordea_at_ait.ac.at>
*/
public class ImageIndexingServiceImpl implements ImageIndexingService {
@Autowired
private IRConfiguration configuration;
private final String dataset;
private FeaturesCollectorsArchive featuresArchive;
public FeaturesCollectorsArchive getFeatureCollectorArchive() {
return featuresArchive;
}
private LireIndexer mp7cIndex;
private LireSettings settings;
private Image2Features img2Features;
private Logger log = Logger.getLogger(getClass());
public ImageIndexingServiceImpl(String dataset,
IRConfiguration configuration) {
this.configuration = configuration;
if (dataset == null)
this.dataset = configuration.getDefaultDataset();
else
this.dataset = dataset;
}
public ImageIndexingServiceImpl(IRConfiguration configuration) {
this(null, configuration);
}
public void init() {
getConfiguration();
}
/**
* @return
*/
public IRConfiguration getConfiguration() {
return configuration;
}
/**
* @param configuration
*/
public void setConfiguration(IRConfiguration configuration) {
this.configuration = configuration;
}
public void initIndex() throws ImageIndexingException {
// try {
// } catch (Exception e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
}
public void openIndex(String dataset) throws ImageIndexingException {
registerFeaturesCollector();
File featuresArchiveFile = getConfiguration().getFeaturesArchiveFile(
dataset);
// create file path if needed
if (!featuresArchiveFile.exists())
featuresArchiveFile.getParentFile().mkdirs();
try {
//img2Features = new Image2Features(dataset, configuration);
img2Features = initFeatureExtractor(dataset);
// features archive, Feature classes, VirId, FeaturesCollection
// array
featuresArchive = initFeaturesArchive(featuresArchiveFile);
setVariables();
} catch (Exception e) {
throw new ImageIndexingException(
"Exception when opening image index for dataset: "
+ dataset, e);
}
}
protected void registerFeaturesCollector() {
CoPhIRv2Reader.setFeatures(LireMetric.reqFeatures);
}
protected FeaturesCollectorsArchive initFeaturesArchive(
File featuresArchiveFile) throws Exception {
return new FeaturesCollectorsArchive(featuresArchiveFile,
getVirFeatureClasses(),
IDString.class, FeaturesCollectorArr.class);
}
protected FeatureClassCollector getVirFeatureClasses() {
return new LireMetric().getRequestedFeaturesClasses();
}
protected Image2Features initFeatureExtractor(String dataset)
throws IOException, InstantiationException, IllegalAccessException,
ClassNotFoundException {
return new Image2Features(configuration.getIndexConfFolder(dataset));
}
public void closeIndex() throws ImageIndexingException {
try {
mp7cIndex.closeIndex();
} catch (Exception e) {
log.trace("Unexpected exception thrown when closing image index: ", e);
}
}
/*
* (non-Javadoc)
* @see eu.europeana.service.ir.image.api.ImageIndexingService#insertImage(java.lang.String, java.net.URL)
*/
@Override
public void insertImage(String docID, URL imageURL)
throws ImageIndexingException {
String imgFeatures;
try {
imgFeatures = img2Features.extractFeatures(imageURL);
} catch (FeatureExtractionException e) {
throw new ImageIndexingException(
"Cannot extract features for image:" + imageURL, e);
}
String thumbnailUrl= imageURL.toString();
insertFeatures(docID, thumbnailUrl, imgFeatures);
}
protected void insertFeatures(String docID, String thumbnailUrl,
String imgFeatures) throws FactoryConfigurationError,
ImageIndexingException {
BufferedReader br = null;
try {
InputStream is = new ByteArrayInputStream(imgFeatures.getBytes());
// read it with BufferedReader
br = new BufferedReader(new InputStreamReader(is));
FeaturesCollectorArr features = readFeatures(br);
features.setID(new IDString(docID));
if (featuresArchive != null)
featuresArchive.add(features);
//settings || indexer = null?
if (settings == null) {
setVariables();
}
LireObject obj = new LireObject(features);
obj.setThmbURL(thumbnailUrl);
mp7cIndex.addDocument(obj, docID);
} catch (ArchiveException e) {
throw new ImageIndexingException(
"Feature archive access exception:", e);
} catch (Exception e) {
throw new ImageIndexingException(
"Indexing image by URL thows exception:", e);
}finally{
if(br != null)
try {
br.close();
} catch (IOException e) {
//this exception should not occur
//if it occurs nothing harmful should occur
System.out.println("warning: exception occured when closing buffered reader of image features for image "
+ docID + "\nError message"+ e.getLocalizedMessage());
}
}
}
protected FeaturesCollectorArr readFeatures(BufferedReader br)
throws IOException, FactoryConfigurationError,
MPEG7VDFormatException, XMLStreamException, InstantiationException,
IllegalAccessException, InvocationTargetException,
NoSuchMethodException, FeaturesCollectorException {
registerFeaturesCollector();
FeaturesCollectorArr features = CoPhIRv2Reader.getObj(br);
return features;
}
/*
* (non-Javadoc)
* @see eu.europeana.service.ir.image.api.ImageIndexingService#insertImage(java.lang.String, java.io.InputStream)
*/
@Override
public void insertImage(String docID, InputStream imageObj)
throws ImageIndexingException {
String imgFeatures = extractFeatures(docID, imageObj);
String thumbnailUrl="image/"+docID;
insertFeatures(docID, thumbnailUrl, imgFeatures);
}
protected String extractFeatures(String docID, InputStream imageObj)
throws ImageIndexingException {
String imgFeatures;
//ensure initialized feature extractor
try{
if(img2Features == null)
img2Features = initFeatureExtractor(getDataset());
} catch (Exception e) {
throw new ImageIndexingException(
"Cannot init feature extractor for dataset!" + getDataset(), e);
}
//perform feature extraction
try {
imgFeatures = img2Features.extractFeatures(imageObj);
} catch (Exception e) {
throw new ImageIndexingException(
"Cannot extract features from input stream. docId" + docID, e);
}
return imgFeatures;
}
public void insertImage(String docID, File imageFile)
throws ImageIndexingException {
try {
insertImage(docID, new FileInputStream(imageFile));
} catch (FileNotFoundException e) {
throw new ImageIndexingException(
"Cannot access file:" + imageFile, e);
}
}
protected void deleteImage(String docID)
throws ImageIndexingException {
try {
//settings || indexer = null?
if (settings == null) {
setVariables();
}
// LireObject obj = new LireObject(features);
// obj.setThmbURL(thumbnailUrl);
mp7cIndex.deleteDocument(docID);
} catch (Exception e) {
throw new ImageIndexingException(
"Removing image by ID thows exception:", e);
}
}
private void setVariables() throws IOException, VIRException {
// File home =
// this.configuration.getConfigProperty("image_index_home"));
// TODO: check if this code is not redundant
settings = this.configuration.getLireSettings(getDataset());
//TODO: verify the correctness of this implementations. Is this redundant or not?
if (featuresArchive== null && settings.getFCArchives().size() > 0)
featuresArchive = settings.getFCArchives().getArchive(0);
//TODO: move to method open new indexer
// mp7cIndex = new LireIndexer();
mp7cIndex = initFeatureIndexer();
mp7cIndex.OpenIndex(settings);
}
protected ExtendedLireIndexer initFeatureIndexer() {
return new ExtendedLireIndexer();
}
@Override
public int insertCollection(String collectionId)
throws ImageIndexingException {
String collectionName = collectionId;
// collectionName starts with "collectionId_". Use wild-card search
if (!collectionName.endsWith("*"))
collectionName += "_*";
ThumbnailsForCollectionAccessor tfca = new ThumbnailsForCollectionAccessor(
collectionName);
Map<String, String> thumbnails;
try {
thumbnails = tfca.getThumbnailsForCollection(0, -1, ThumbnailsAccessor.ERROR_POLICY_RETHROW);
} catch (Throwable th) {
throw new ImageIndexingException("Cannot access thumbnails map!", th);
}
return insertCollectionByUrls(getDataset(), thumbnails);
}
public int insertCollectionByUrls(String dataset,
Map<String, String> thumbnails) throws ImageIndexingException {
int indexedImageCount = 0;
int skipedFileCount = 0;
URL imageUrl = null;
String imageId = null;
// open index
openIndex(dataset);
for (Entry<String, String> thumbnail : thumbnails.entrySet()) {
try {
imageUrl = new URL(thumbnail.getValue());
imageId = thumbnail.getKey();
insertImage(imageId, imageUrl);
indexedImageCount++;
} catch (MalformedURLException e) {
log.warn("Wrong thumbnail URL format:" + imageUrl);
skipedFileCount++;
// e.printStackTrace();
}catch (ImageIndexingException e) {
log.warn("Cannot index thumbnail:" + imageUrl, e);
skipedFileCount++;
// e.printStackTrace();
}
if ((indexedImageCount % 1000) == 0) {
// mp7cIndex.commit(); - not needed. auto flush is used
System.out.println("Processed items count: "
+ indexedImageCount);
}
}
log.info("Skiped wrong thumbnail URLs :" + skipedFileCount);
log.info("Successfully indexed thumbnail URLs :" + indexedImageCount);
closeIndex();
return indexedImageCount;
}
@Override
public IndexingStatus getIndexingStatus(String collectionId)
throws ImageIndexingException {
throw new ImageIndexingException("Not implemented");
}
public String getDataset() {
return dataset;
}
@Override
public int insertDatasetByIds(Set<String> ids)
throws ImageIndexingException {
int indexedImageCount = 0;
int skipedFileCount = 0;
File imageFile;
// open index
openIndex(dataset);
for (String imageId: ids) {
try {
imageFile = getConfiguration().getImageFile(dataset, imageId);
insertImage(imageId, imageFile);
indexedImageCount++;
}catch (ImageIndexingException e) {
log.warn("Cannot index thumbnail with id:" + imageId, e);
skipedFileCount++;
// e.printStackTrace();
}
if ((indexedImageCount % 1000) == 0) {
// mp7cIndex.commit(); - not needed. auto flush is used
System.out.println("Processed items count: "
+ indexedImageCount);
}
}
log.info("Skiped wrong thumbnail URLs :" + skipedFileCount);
log.info("Successfully indexed thumbnail URLs :" + indexedImageCount);
closeIndex();
return indexedImageCount;
}
@Override
public int deleteDatasetByIds(Set<String> ids)
throws ImageIndexingException {
int removedCount = 0;
int skipedCount = 0;
// open index
openIndex(dataset);
for (String imageId: ids) {
try {
deleteImage(imageId);
removedCount++;
}catch (ImageIndexingException e) {
log.warn("Cannot remove thumbnail from index. id:" + imageId, e);
skipedCount++;
// e.printStackTrace();
}
if ((removedCount % 1000) == 0) {
// mp7cIndex.commit(); - not needed. auto flush is used
log.info("Processed items count: "
+ removedCount);
}
}
log.info("Skiped wrong thumbnail URLs :" + skipedCount);
log.info("Successfully indexed thumbnail URLs :" + removedCount);
closeIndex();
return removedCount;
}
}