package eu.europeana.creative.dataset.culturecam.v1;
import static org.junit.Assert.assertEquals;
import it.cnr.isti.indexer.IndexHelper;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.Before;
import org.junit.Test;
import eu.europeana.api.client.EuropeanaApi2Client;
import eu.europeana.api.client.dataset.DatasetDescriptor;
import eu.europeana.api.client.exception.EuropeanaApiProblem;
import eu.europeana.api.client.model.EuropeanaApi2Results;
import eu.europeana.api.client.model.search.EuropeanaApi2Item;
import eu.europeana.api.client.model.search.parts.BaseAggregation;
import eu.europeana.api.client.search.query.Api2Query;
import eu.europeana.api.client.search.query.SubQuery;
import eu.europeana.api.client.thumbnails.ThumbnailAccessorUtils;
import eu.europeana.api.client.thumbnails.download.ThumbnailDownloader;
import eu.europeana.api.client.thumbnails.processing.LargeThumbnailsetProcessing;
import eu.europeana.creative.dataset.IRTestConfigurations;
import eu.europeana.creative.dataset.culturecam.input.SelectionDescriptionEnum;
import eu.europeana.creative.dataset.culturecam.input.SelectionDescriptionImpl;
import eu.europeana.creative.dataset.pt.classification.GrayScaleSepiaDetector;
import eu.europeana.service.ir.image.IRConfiguration;
import eu.europeana.service.ir.image.IRConfigurationImpl;
import eu.europeana.service.ir.image.api.ImageSearchingService;
import eu.europeana.service.ir.image.api.ImageSearchingServiceImpl;
import eu.europeana.service.ir.image.exceptions.ImageSearchingException;
public class CultureCamV1ThumbnailMapsTest extends ThumbnailAccessorUtils implements IRTestConfigurations{
//private boolean overwriteThumbnails = false;
String colSelectionFilename = "/selection/input/culturecam_5000.csv";
String colectionThumbnailsFilename = "/selection/input/thumbnails_v1/culturecam_5000.csv";
String colectionClassifiedFilename = "/selection/input/thumbnails_v1/culturecam_pd_classified.csv";
String designInputFilename = "/selection/input/design_v1.csv";
//String thumbnailMapFolder = "/selection/thumbnailmap";
//final String STEP_THUMBNAILMAP = "THUMBNAILMAP";
final String STEP_THUMBNAILMAP_V1 = "THUMBNAILMAP_V1";
final String STEP_SUBSET_V1 = "SUBSET_V1";
final String STEP_CLASSIFIED_V1 = "CLASSIFIED_V1";
final String STEP_FILTER_OUT = "FILTER_OUT";
private String processingStep = null;
private boolean overwriteThumbnails = false;
final String IMAGE_FOLDER = "/app/eucreative/imagesimilarityhome/culturecam/image/";
ImageSearchingService imageSearchingService;
@Before
public void init(){
String dataset = "culturecam";
setDataset(dataset);
}
//1@Test
public void buildSelectedCollections() throws IOException, EuropeanaApiProblem{
File colSelectionCvsFile = new File(getCollectionsCvsFolder() + colSelectionFilename);
//we misuse the readThumbnailsMap as this is the same implementation as readCollectionsMap
DatasetDescriptor descriptor;
int missingThumbnails;
int missingThumbnailsSum = 0;
int expectedResultsTotal = 0;
int expectedResults = 0;
SelectionDescriptionImpl selectionDescription;
String subsetName;
Map<String, String> selectedCollections = readThumbnailsMap(colSelectionCvsFile);
//#ID;Title;Portal link;Results;items;selection;dicriminator;Content selection comments
for (Map.Entry<String, String> collection : selectedCollections.entrySet()) {
selectionDescription = new SelectionDescriptionImpl(collection.getKey(), collection.getValue().split(";"));
subsetName = buildSubSetName(selectionDescription);
descriptor = new DatasetDescriptor(subsetName, selectionDescription.getId());
this.setProcessingStep(STEP_THUMBNAILMAP_V1);
expectedResults = selectionDescription.getIntFieldValue(SelectionDescriptionEnum.RESULT_COUNT);
File thumbnailsMapFile = getCollectionCsvFile(descriptor, STEP_THUMBNAILMAP_V1);
if(thumbnailsMapFile.exists()){
log.info("Skip selected collection. Thumbnailsmap exists already :" + thumbnailsMapFile);
continue;
}
missingThumbnails = createSubset(subsetName, selectionDescription.getId(), selectionDescription.getFieldValue(SelectionDescriptionEnum.PORTAL_LINK), 0, expectedResults);
missingThumbnailsSum += missingThumbnails;
expectedResultsTotal += expectedResults;
if(missingThumbnails > 0)
System.out.println("Missing thumbnails in dataset:" + descriptor + ": " + missingThumbnails);
}
//we expect no more than 10 missing Thumbnails
log.info("Number of missing thumbnails: " + missingThumbnailsSum);
log.info("Total expected results: " + expectedResultsTotal);
}
//2@Test
public void downloadThumbnails() throws FileNotFoundException, IOException {
//File datasetFile = getConfig().getDatasetFile(getDataset());
File downloadFolder = getConfig().getImageFolderAsFile(getDataset());
File datasetFile = new File(getCollectionsCvsFolder() + colectionThumbnailsFilename);
if(!datasetFile.exists())
performDatasetAggregation(STEP_THUMBNAILMAP_V1, datasetFile);
LargeThumbnailsetProcessing datasetDownloader = new LargeThumbnailsetProcessing(datasetFile);
ThumbnailDownloader observer = new ThumbnailDownloader(downloadFolder);
observer.setSkipExistingFiles(!overwriteThumbnails);
datasetDownloader.addObserver(observer);
datasetDownloader.processThumbnailset(0, -1, 1000);
log.debug("Skipped items: " + datasetDownloader.getSkippedItemsCount());
log.warn("Failed downloads: " + datasetDownloader.getFailureCount());
log.info("Downloaded files: " + datasetDownloader.getItemsProcessed());
assertEquals(0, datasetDownloader.getFailureCount());
// for (String itemId : skippedItems) {
// System.out.println(itemId);
// }
}
//3@Test
public void categorizeSubsetThumbnails() throws FileNotFoundException, IOException {
File colSelectionCvsFile = new File(getCollectionsCvsFolder() + colSelectionFilename);
//we misuse the readThumbnailsMap as this is the same implementation as readCollectionsMap
DatasetDescriptor descriptor;
SelectionDescriptionImpl selectionDescription;
String subsetName;
Map<String, String> selectedCollections = readThumbnailsMap(colSelectionCvsFile);
//#ID;Title;Portal link;Results;items;selection;dicriminator;Content selection comments
File categorizedThumbnailsFile;
File thumbnailsCvsFile;
for (Map.Entry<String, String> collection : selectedCollections.entrySet()) {
selectionDescription = new SelectionDescriptionImpl(collection.getKey(), collection.getValue().split(";"));
subsetName = buildSubSetName(selectionDescription);
descriptor = new DatasetDescriptor(subsetName, selectionDescription.getId());
this.setProcessingStep(STEP_CLASSIFIED_V1);
thumbnailsCvsFile = getCollectionCsvFile(descriptor, STEP_THUMBNAILMAP_V1);
//categorize by color-fullness
categorizedThumbnailsFile = categorizeThumbnails(descriptor,
thumbnailsCvsFile);
System.out.println("subset written to file: " + categorizedThumbnailsFile);
}
}
//4@Test
public void filterDesignThumbnails() throws IOException {
//read design input
File desginV1 = new File(getCollectionsCvsFolder() + designInputFilename);
//we misuse the readThumbnailsMap as this is the same implementation as readCollectionsMap
Map<String, String> designV1Thumbnails = readThumbnailsMap(desginV1);
log.debug("Items in design_v1 dataset :" + designV1Thumbnails.size());
//read culturecam subsets
File cvsFolder = new File(getCollectionsCvsFolder() + STEP_SUBSET_V1.toLowerCase() + "/");
File[] collectionFiles = cvsFolder.listFiles();
Map<String, String> subset;
for (int i = 0; i < collectionFiles.length; i++) {
//for each subset
subset = readThumbnailsMap(collectionFiles[i]);
for (String thumbnailId: subset.keySet()) {
//remove items available in subsets
if(designV1Thumbnails.containsKey(thumbnailId)){
log.trace("removing item from subset: " + thumbnailId);
designV1Thumbnails.remove(thumbnailId);
}
}
}
DatasetDescriptor designSubset = new DatasetDescriptor("Design", "V1");
File designSubsetFile = getCollectionCsvFile(designSubset, STEP_SUBSET_V1);
log.info("Writing items in desing subset: " + designV1Thumbnails.size());
writeThumbnailsToCsvFile(designSubset, designV1Thumbnails, designSubsetFile);
// log.trace("Closing dataset file");
// datasetWriter.close();
}
@Test
public void filterDesignNonPDThumbnails() throws IOException, EuropeanaApiProblem {
//read design input
File desginV1 = new File(getCollectionsCvsFolder() + designInputFilename);
//we misuse the readThumbnailsMap as this is the same implementation as readCollectionsMap
Map<String, String> designV1Thumbnails = readThumbnailsMap(desginV1);
log.debug("Items in design_v1 dataset :" + designV1Thumbnails.size());
EuropeanaApi2Client euClient = new EuropeanaApi2Client();
EuropeanaApi2Item obj;
Map<String, String> designNonPDThumbnails = new HashMap<String, String>();
Api2Query searchQuery = new Api2Query();
EuropeanaApi2Results searchResults;
for (Map.Entry<String, String> item : designV1Thumbnails.entrySet()) {
obj = null;//clear obj
if(!(searchQuery.getSubQueries() == null) && !searchQuery.getSubQueries().isEmpty())
searchQuery.getSubQueries().remove(0);//re-initialize
searchQuery.addSubQuery(new SubQuery("europeana_id", item.getKey(), false, true, false));
searchResults = euClient.searchApi2(searchQuery, 2, -1);
if(!searchResults.getItems().isEmpty())
obj = searchResults.getItems().get(0);
else
System.out.println("Cannot find object with id query: " + item.getKey());
if(obj != null && !hasPdRights(obj)){
designNonPDThumbnails.put(item.getKey(), item.getValue());
}
}
DatasetDescriptor designSubset = new DatasetDescriptor("Design", "V1");
File filterOoutFile = getCollectionCsvFile(designSubset, STEP_FILTER_OUT);
log.info("Writing items in desing subset: " + designNonPDThumbnails.size());
writeThumbnailsToCsvFile(designSubset, designNonPDThumbnails, filterOoutFile);
// log.trace("Closing dataset file");
// datasetWriter.close();
}
protected boolean hasPdRights(BaseAggregation aggregation) {
boolean ret = false;
if(aggregation != null && aggregation.getEdmRights() != null){
Collection<List<String>> allRights = aggregation.getEdmRights().values();
for (List<String> rightsList : allRights) {
if(rightsList == null || rightsList.isEmpty())
continue;
else for (String rights : rightsList) {
if(rights != null && rights.indexOf("/publicdomain/") > -1)
ret = true;
}
}
}
return ret;
}
protected boolean hasPdRights(EuropeanaApi2Item item) {
boolean ret = false;
if(item != null && item.getRights() != null){
for (String rights : item.getRights()) {
if(rights.indexOf("/publicdomain/") > -1)
return true;
}
}
return ret;
}
//@Test
public void aggregateDataset() throws IOException {
File datasetFile = getConfig().getDatasetFile(getDataset());
performDatasetAggregation(STEP_SUBSET_V1, datasetFile);
}
public File categorizeThumbnails(DatasetDescriptor datasetDescriptor,
File thumbnailsFile) throws FileNotFoundException, IOException {
// String thumbnailsFile = getCvsFileForStep(datasetDescriptor,
// STEP_THUMBNAILS);
// new File(thumbnailsFile)
File outputFile = getCollectionCsvFile(datasetDescriptor, STEP_CLASSIFIED_V1);
LargeThumbnailsetProcessing datasetCategorization = new LargeThumbnailsetProcessing(
thumbnailsFile);
// String imageFolder = getConfiguration().getImageFolder(getDataset());
String imageFolder = IMAGE_FOLDER;
GrayScaleSepiaDetector observer = new GrayScaleSepiaDetector(new File(
imageFolder), 85, 3);
//final File outputFile = new File(outFile);
observer.setOutputFile(outputFile);
datasetCategorization.addObserver(observer);
if(blockSize < 0)
blockSize = 1000;
datasetCategorization.processThumbnailset(start, limit, blockSize);
System.out.println("Skipped items: "
+ datasetCategorization.getFailureCount());
return outputFile;
}
protected String buildSubSetName(
SelectionDescriptionImpl selectionDescription) {
String subsetName = selectionDescription.getFieldValue(SelectionDescriptionEnum.TITLE);
subsetName = subsetName.substring(0, Math.min(subsetName.length(), 15));
return subsetName;
}
protected File getCollectionCsvFile(DatasetDescriptor dataset) {
return getCollectionCsvFile(dataset, getProcessingStep());
}
protected File getCollectionCsvFile(DatasetDescriptor dataset,
final String processingStep) {
if(processingStep != null){
String fileName = getCollectionsCvsFolder() + processingStep.toLowerCase() + "/" + dataset.getImageSetName()
+ "_" + encode(dataset.getCollectionName()) + ".csv";
return new File(fileName);
}else{
return super.getCollectionCsvFile(dataset);
}
}
// @Test
public void categorizeThumbnails() throws FileNotFoundException, IOException {
//DatasetDescriptor datasetDescriptor;
//File thumbnailsFile;
// String thumbnailsFile = getCvsFileForStep(datasetDescriptor,
// STEP_THUMBNAILS);
// new File(thumbnailsFile)
//String outFile = getCvsFileForStep(datasetDescriptor, STEP_CLASSIFIED);
File thumbnailsFile = new File(getCollectionsCvsFolder() + colectionThumbnailsFilename);
File outFile = new File(getCollectionsCvsFolder() + colectionClassifiedFilename);
LargeThumbnailsetProcessing datasetCategorization = new LargeThumbnailsetProcessing(
thumbnailsFile);
//String imageFolder = getConfiguration().getImageFolder(getDataset());
String imageFolder = IMAGE_FOLDER;
GrayScaleSepiaDetector observer = new GrayScaleSepiaDetector(new File(
imageFolder), 85, 3);
//final File outputFile = new File(outFile);
observer.setOutputFile(outFile);
datasetCategorization.addObserver(observer);
blockSize = 1000;
datasetCategorization.processThumbnailset(start, limit, blockSize);
System.out.println("Skipped items: "
+ datasetCategorization.getFailureCount());
//return outFile;
}
private void performDatasetAggregation(String step, File datasetFile) throws IOException {
File cvsFolder = new File(getCollectionsCvsFolder() + step.toLowerCase() + "/");
File[] collectionFiles = cvsFolder.listFiles();
BufferedReader reader = null;
// String headerLine = null;
String line = null;
//BufferedWriter datasetWriter = getDataSetFileWriter(false);
datasetFile.getParentFile().mkdirs();
BufferedWriter datasetWriter = new BufferedWriter(new FileWriter(datasetFile));
log.debug("Aggregating dataset: " + getDataset() + " to file: " + datasetFile);
for (int i = 0; i < collectionFiles.length; i++) {
reader = new BufferedReader(new FileReader(collectionFiles[i]));
boolean firstLine = true;
while ((line = reader.readLine()) != null) {
// write headers to sysout
if (firstLine) {
log.debug("Writting dataset headerline: " + line);
firstLine = false;
}
// write all data to dataset
datasetWriter.write(line);
datasetWriter.write("\n");
}
datasetWriter.flush();
// close reader
try {
reader.close();
} catch (IOException e) {
System.out.println("cannot close reader for: "
+ collectionFiles[i]);
e.printStackTrace();
}
}
log.trace("Closing dataset file");
datasetWriter.close();
}
protected IRConfiguration getConfig() {
IRConfiguration config = new IRConfigurationImpl();
return config;
}
public File getDataSetFile(boolean urls) {
IRConfiguration config = getConfig();
if (urls)
return config.getDatasetUrlsFile(getDataset());
else
return config.getDatasetFile(getDataset());
}
protected String getCollectionsCvsFolder() {
return getCollectionsCvsFolder(getDataset());
}
public String getCollectionsCvsFolder(String dataset) {
return IRTestConfigurations.COLLECTIONS_FOLDER + dataset + "/";
}
public String getProcessingStep() {
return processingStep;
}
public void setProcessingStep(String processingStep) {
this.processingStep = processingStep;
}
@Test
public void buildIndexedUrlsFile() throws FileNotFoundException,
IOException, ImageSearchingException {
IRConfiguration config = getConfig();
File datasetFile = config.getDatasetFile(getDataset());
IndexHelper ixHelper = new IndexHelper();
Map<String, String> thumbnailsMap = ixHelper
.getThumbnailsMap(datasetFile);
BufferedWriter indexedUrlsWriter = getDataSetFileWriter(true);
//EuropeanaId euId = new EuropeanaId();
int counter = 0;
for (Map.Entry<String, String> thumbnail : thumbnailsMap.entrySet()) {
//euId.setNewId(thumbnail.getKey());
try {
getImageSearchingService().searchSimilar(thumbnail.getKey());
if (getImageSearchingService().getTotalResults() > 0) {
// write to file
indexedUrlsWriter.append(thumbnail.getKey()).append("; ");
indexedUrlsWriter.append(thumbnail.getValue()).append("\n");
counter++;
} else {
// not indexed yet
System.out.println("Skipped item: " + thumbnail.getKey());
}
} catch (ImageSearchingException e) {
System.out.println(e.getMessage());
}
}
System.out.println("correct items: " + counter);
}
public ImageSearchingService getImageSearchingService() {
if (imageSearchingService == null) {
imageSearchingService = new ImageSearchingServiceImpl(getDataset(),
getConfig());
imageSearchingService.init();
}
return imageSearchingService;
}
protected File getImageFile(String id) {
return getConfig().getImageFile(getDataset(), id);
}
}