package eu.europeana.creative.dataset.culturecam;
import static org.junit.Assert.assertEquals;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import org.junit.Before;
import org.junit.Test;
import eu.europeana.api.client.dataset.DatasetDescriptor;
import eu.europeana.api.client.exception.EuropeanaApiProblem;
import eu.europeana.api.client.myeuropeana.exception.MyEuropeanaApiException;
import eu.europeana.api.client.thumbnails.ThumbnailAccessorUtils;
import eu.europeana.api.client.thumbnails.download.ThumbnailDownloader;
import eu.europeana.api.client.thumbnails.processing.LargeThumbnailsetProcessing;
import eu.europeana.creative.dataset.IRTestConfigurations;
import eu.europeana.creative.dataset.culturecam.input.SelectionDescriptionEnum;
import eu.europeana.creative.dataset.culturecam.input.SelectionDescriptionImpl;
import eu.europeana.service.ir.image.IRConfiguration;
import eu.europeana.service.ir.image.IRConfigurationImpl;
public class CultureCamThumbnailMapsTest extends ThumbnailAccessorUtils implements IRTestConfigurations{
//private boolean overwriteThumbnails = false;
String colSelectionFilename = "/selection/input/culturecam_5000.csv";
String designInputFilename = "/selection/input/design_v1.csv";
//String thumbnailMapFolder = "/selection/thumbnailmap";
final String STEP_THUMBNAILMAP = "THUMBNAILMAP";
final String STEP_SUBSET = "SUBSET";
private String processingStep = null;
private boolean overwriteThumbnails = false;
@Before
public void init(){
String dataset = "culturecam";
setDataset(dataset);
}
//@Test
public void buildSelectedCollections() throws IOException, EuropeanaApiProblem{
File colSelectionCvsFile = new File(getCollectionsCvsFolder() + colSelectionFilename);
//we misuse the readThumbnailsMap as this is the same implementation as readCollectionsMap
DatasetDescriptor descriptor;
int missingThumbnails;
int missingThumbnailsSum = 0;
int expectedResultsTotal = 0;
int expectedResults = 0;
SelectionDescriptionImpl selectionDescription;
String subsetName;
Map<String, String> selectedCollections = readThumbnailsMap(colSelectionCvsFile);
//#ID;Title;Portal link;Results;items;selection;dicriminator;Content selection comments
for (Map.Entry<String, String> collection : selectedCollections.entrySet()) {
selectionDescription = new SelectionDescriptionImpl(collection.getKey(), collection.getValue().split(";"));
subsetName = buildSubSetName(selectionDescription);
descriptor = new DatasetDescriptor(subsetName, selectionDescription.getId());
this.setProcessingStep(STEP_THUMBNAILMAP);
expectedResults = selectionDescription.getIntFieldValue(SelectionDescriptionEnum.RESULT_COUNT);
File thumbnailsMapFile = getCollectionCsvFile(descriptor, STEP_THUMBNAILMAP);
if(thumbnailsMapFile.exists()){
log.info("Skip selected collection. Thumbnailsmap exists already :" + thumbnailsMapFile);
continue;
}
missingThumbnails = createSubset(subsetName, selectionDescription.getId(), selectionDescription.getFieldValue(SelectionDescriptionEnum.PORTAL_LINK), 0, expectedResults);
missingThumbnailsSum += missingThumbnails;
expectedResultsTotal += expectedResults;
if(missingThumbnails > 0)
System.out.println("Missing thumbnails in dataset:" + descriptor + ": " + missingThumbnails);
}
//we expect no more than 10 missing Thumbnails
log.info("Number of missing thumbnails: " + missingThumbnailsSum);
log.info("Total expected results: " + expectedResultsTotal);
}
@Test
public void buildSubsets() throws MyEuropeanaApiException, IOException{
File colSelectionCvsFile = new File(getCollectionsCvsFolder() + colSelectionFilename);
//we misuse the readThumbnailsMap as this is the same implementation as readCollectionsMap
DatasetDescriptor descriptor;
int selectionCount;
String selectionType;
SelectionDescriptionImpl selectionDescription;
Map<String, String> selectedCollections = readThumbnailsMap(colSelectionCvsFile);
//#ID;Title;Portal link;Results;items;selection;dicriminator;Content selection comments
File thumbnailsMapFile = null;
File subsetFile = null;
Map<String, String> fullCollectionMap;
Map<String, String> subsetThumbnailMap;
for (Map.Entry<String, String> collection : selectedCollections.entrySet()) {
selectionDescription = new SelectionDescriptionImpl(collection.getKey(), collection.getValue().split(";"));
String subsetName = buildSubSetName(selectionDescription);
descriptor = new DatasetDescriptor(subsetName, selectionDescription.getId());
thumbnailsMapFile = getCollectionCsvFile(descriptor, STEP_THUMBNAILMAP);
subsetFile = getCollectionCsvFile(descriptor, STEP_SUBSET);
fullCollectionMap = readThumbnailsMap(thumbnailsMapFile);
this.setProcessingStep(STEP_THUMBNAILMAP);
selectionType = selectionDescription.getFieldValue(SelectionDescriptionEnum.SELECTION_TYPE);
selectionCount = selectionDescription.getIntFieldValue(SelectionDescriptionEnum.SELECTED_COUNT);
if("all".equals(selectionType)){
//copy whole file
log.trace("Copying to file: " + subsetFile );
copyFile(thumbnailsMapFile, subsetFile);
}else{
//select thumbnails
subsetThumbnailMap = generateSubset(fullCollectionMap, selectionCount);
//write thumbnails
log.debug("writing subset of size: " + subsetThumbnailMap.size() + "\n to file:" + subsetFile);
writeThumbnailsToCsvFile(descriptor, subsetThumbnailMap, subsetFile, POLICY_OVERWRITE_FILE);
}
}
}
private Map<String, String> generateSubset(
Map<String, String> fullCollectionMap, int selectionCount) {
Map<String, String> subsetMap = new HashMap<String, String>(selectionCount);
if(fullCollectionMap.size() < selectionCount)
throw new RuntimeException("Fullcollection has less items than the expected subset: " + selectionCount);
Object[] keys = fullCollectionMap.keySet().toArray();
int i;
Random random = new Random();
while(subsetMap.size() < selectionCount){
i = random.nextInt(fullCollectionMap.size());
subsetMap.put((String)keys[i], fullCollectionMap.get(keys[i]));
}
return subsetMap;
}
protected String buildSubSetName(
SelectionDescriptionImpl selectionDescription) {
String subsetName = selectionDescription.getFieldValue(SelectionDescriptionEnum.TITLE);
subsetName = subsetName.substring(0, Math.min(subsetName.length(), 10));
return subsetName;
}
protected File getCollectionCsvFile(DatasetDescriptor dataset) {
return getCollectionCsvFile(dataset, getProcessingStep());
}
protected File getCollectionCsvFile(DatasetDescriptor dataset,
final String processingStep) {
if(STEP_THUMBNAILMAP.equals(processingStep) || STEP_SUBSET.equals(processingStep)){
String fileName = getCollectionsCvsFolder() + processingStep.toLowerCase() + "/" + dataset.getImageSetName()
+ "_" + encode(dataset.getCollectionName()) + ".csv";
return new File(fileName);
}else{
return super.getCollectionCsvFile(dataset);
}
}
//@Test
public void downloadThumbnails() throws FileNotFoundException, IOException {
File datasetFile = getConfig().getDatasetFile(getDataset());
File downloadFolder = getConfig().getImageFolderAsFile(getDataset());
if(!datasetFile.exists())
performDatasetAggregation();
LargeThumbnailsetProcessing datasetDownloader = new LargeThumbnailsetProcessing(datasetFile);
ThumbnailDownloader observer = new ThumbnailDownloader(downloadFolder);
observer.setSkipExistingFiles(!overwriteThumbnails);
datasetDownloader.addObserver(observer);
datasetDownloader.processThumbnailset(0, -1, 1000);
log.debug("Skipped items: " + datasetDownloader.getSkippedItemsCount());
log.warn("Failed downloads: " + datasetDownloader.getFailureCount());
log.info("Downloaded files: " + datasetDownloader.getItemsProcessed());
assertEquals(0, datasetDownloader.getFailureCount());
// for (String itemId : skippedItems) {
// System.out.println(itemId);
// }
}
//@Test
public void filterDesignThumbnails() throws IOException {
//read design input
File desginV1 = new File(getCollectionsCvsFolder() + designInputFilename);
//we misuse the readThumbnailsMap as this is the same implementation as readCollectionsMap
Map<String, String> designV1Thumbnails = readThumbnailsMap(desginV1);
log.debug("Items in design_v1 dataset :" + designV1Thumbnails.size());
//read culturecam subsets
File cvsFolder = new File(getCollectionsCvsFolder() + STEP_SUBSET.toLowerCase() + "/");
File[] collectionFiles = cvsFolder.listFiles();
Map<String, String> subset;
for (int i = 0; i < collectionFiles.length; i++) {
//for each subset
subset = readThumbnailsMap(collectionFiles[i]);
for (String thumbnailId: subset.keySet()) {
//remove items available in subsets
if(designV1Thumbnails.containsKey(thumbnailId)){
log.trace("removing item from subset: " + thumbnailId);
designV1Thumbnails.remove(thumbnailId);
}
}
}
DatasetDescriptor designSubset = new DatasetDescriptor("Design", "V1");
File designSubsetFile = getCollectionCsvFile(designSubset, STEP_SUBSET);
log.info("Writing items in desing subset: " + designV1Thumbnails.size());
writeThumbnailsToCsvFile(designSubset, designV1Thumbnails, designSubsetFile);
// log.trace("Closing dataset file");
// datasetWriter.close();
}
private void performDatasetAggregation() throws IOException {
File cvsFolder = new File(getCollectionsCvsFolder() + STEP_SUBSET.toLowerCase() + "/");
File[] collectionFiles = cvsFolder.listFiles();
BufferedReader reader = null;
// String headerLine = null;
String line = null;
BufferedWriter datasetWriter = getDataSetFileWriter(false);
log.debug("Aggregating dataset: " + getDataset());
for (int i = 0; i < collectionFiles.length; i++) {
reader = new BufferedReader(new FileReader(collectionFiles[i]));
boolean firstLine = true;
while ((line = reader.readLine()) != null) {
// write headers to sysout
if (firstLine) {
log.debug("Writting dataset headerline: " + line);
firstLine = false;
}
// write all data to dataset
datasetWriter.write(line);
datasetWriter.write("\n");
}
datasetWriter.flush();
// close reader
try {
reader.close();
} catch (IOException e) {
System.out.println("cannot close reader for: "
+ collectionFiles[i]);
e.printStackTrace();
}
}
log.trace("Closing dataset file");
datasetWriter.close();
}
protected IRConfiguration getConfig() {
IRConfiguration config = new IRConfigurationImpl();
return config;
}
public File getDataSetFile(boolean urls) {
IRConfiguration config = getConfig();
if (urls)
return config.getDatasetUrlsFile(getDataset());
else
return config.getDatasetFile(getDataset());
}
protected String getCollectionsCvsFolder() {
return getCollectionsCvsFolder(getDataset());
}
public String getCollectionsCvsFolder(String dataset) {
return IRTestConfigurations.COLLECTIONS_FOLDER + dataset + "/";
}
public String getProcessingStep() {
return processingStep;
}
public void setProcessingStep(String processingStep) {
this.processingStep = processingStep;
}
protected void copyFile(File sourceFile, File destFile) throws IOException {
if(!destFile.exists()) {
destFile.getParentFile().mkdirs();
destFile.createNewFile();
}
FileChannel source = null;
FileChannel destination = null;
try {
source = new FileInputStream(sourceFile).getChannel();
destination = new FileOutputStream(destFile).getChannel();
destination.transferFrom(source, 0, source.size());
}
finally {
if(source != null) {
source.close();
}
if(destination != null) {
destination.close();
}
}
}
//@Test
public void copyThumbnails() throws FileNotFoundException, IOException{
File datasetFile = getDataSetFile(false);
Map<String, String> thumbnailMap = readThumbnailsMap(datasetFile);
File imageFile;
File destFile;
for (String id : thumbnailMap.keySet()) {
System.out.println("copying image with id: " + id);
imageFile = getConfig().getImageFile(getDataset(), id);
if(!imageFile.exists())
System.out.println("Error: file not found + " + imageFile.getCanonicalPath());
else{
destFile = new File(imageFile.getPath().replaceFirst("app", "inst"));
copyFile(imageFile, destFile);
}
}
}
}