package eu.europeana.creative.dataset.culturecam.v2;
import static org.junit.Assert.assertEquals;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.net.URLEncoder;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.junit.Before;
import org.junit.Test;
import eu.europeana.api.client.MyEuropeanaClient;
import eu.europeana.api.client.dataset.DatasetDescriptor;
import eu.europeana.api.client.exception.EuropeanaApiProblem;
import eu.europeana.api.client.exception.TechnicalRuntimeException;
import eu.europeana.api.client.metadata.MetadataAccessor;
import eu.europeana.api.client.model.search.CommonMetadata;
import eu.europeana.api.client.myeuropeana.exception.MyEuropeanaApiException;
import eu.europeana.api.client.myeuropeana.impl.MyEuropeanaClientImpl;
import eu.europeana.api.client.myeuropeana.response.TagsApiResponse;
import eu.europeana.api.client.myeuropeana.thumbnails.ThumbnailFromTagsResponseAccessor;
import eu.europeana.api.client.search.query.Api2QueryBuilder;
import eu.europeana.api.client.search.query.Api2QueryInterface;
import eu.europeana.api.client.thumbnails.ThumbnailAccessorUtils;
import eu.europeana.api.client.thumbnails.download.ThumbnailDownloader;
import eu.europeana.api.client.thumbnails.processing.LargeThumbnailsetProcessing;
import eu.europeana.creative.dataset.IRTestConfigurations;
import eu.europeana.creative.dataset.culturecam.input.SelectionDescriptionEnum;
import eu.europeana.creative.dataset.culturecam.input.SelectionDescriptionImpl;
import eu.europeana.creative.dataset.culturecam.v2.download.CimecThumbnailDownloader;
import eu.europeana.service.ir.image.IRConfiguration;
import eu.europeana.service.ir.image.IRConfigurationImpl;
public class CultureCamV2ThumbnailMapsTest extends ThumbnailAccessorUtils
implements IRTestConfigurations {
// private boolean overwriteThumbnails = false;
String tagSelectionFilename = "/selection/input/tags/new.csv";
String cimecIdsFilename = "/selection/input/tags/cimec_ids.csv";
String onbIdsFilename = "/selection/input/tags/onb_ids.csv";
String e280IdsFilename = "/selection/input/tags/e280_ids.csv";
String e280SetFilename = "/selection/input/tags/e280_dataset.csv";
// String colectionThumbnailsFilename =
// "/selection/input/thumbnails_v1/culturecam_5000.csv";
// String colectionClassifiedFilename =
// "/selection/input/thumbnails_v1/culturecam_pd_classified.csv";
//
// String designInputFilename = "/selection/input/design_v1.csv";
// String thumbnailMapFolder = "/selection/thumbnailmap";
// final String STEP_THUMBNAILMAP = "THUMBNAILMAP";
final String STEP_THUMBNAILMAP_V2 = "THUMBNAILMAP_V2";
final String STEP_SUBSET_V2 = "SUBSET_V2";
// final String STEP_CLASSIFIED_V1 = "CLASSIFIED_V1";
// final String STEP_FILTER_OUT = "FILTER_OUT";
private String processingStep = null;
private boolean overwriteThumbnails = false;
final String IMAGE_FOLDER = "/app/eucreative/imagesimilarityhome/culturecam/";
@Before
public void init() {
String dataset = "culturecam";
setDataset(dataset);
}
@Test
public void buildEuropeana280Selection() throws IOException, TechnicalRuntimeException, EuropeanaApiProblem {
Api2QueryBuilder queryBuilder = new Api2QueryBuilder();
String query = "(PROVIDER:\"Europeana 280\" AND TYPE:IMAGE)";
String e280Search = "http://www.europeana.eu/portal/search?query="+ URLEncoder.encode(query, "UTF8");
File file = new File(getCollectionsCvsFolder(), e280IdsFilename);
//write open access
String searchUrl = e280Search + "&reusability=open";
Api2QueryInterface apiQuery = queryBuilder.buildQuery(searchUrl);
apiQuery.setProfile("rich");
MetadataAccessor ma = new MetadataAccessor(apiQuery, null);
Map<String, String> contentMap = ma.getContentMap(CommonMetadata.EDM_FIELD_PREVIEW, -1, -1, MetadataAccessor.ERROR_POLICY_CONTINUE);
DatasetDescriptor descriptor = new DatasetDescriptor("e280", "open");
writeMapToCsvFile(descriptor, contentMap, file, POLICY_OVERWRITE_FILE);
System.out.println("Items found in e280 open selection: " + contentMap.size());
System.out.println("Items written to file: " + file.getAbsolutePath());
// //write restricted access
// searchUrl = "(PROVIDER:\"Europeana 280\" AND TYPE:IMAGE AND (RIGHTS:*/by/*)";
// apiQuery = queryBuilder.buildQuery(searchUrl);
// apiQuery.setProfile("rich");
// ma = new MetadataAccessor(apiQuery, null);
// contentMap = ma.getContentMap(CommonMetadata.EDM_FIELD_PREVIEW, -1, -1, MetadataAccessor.ERROR_POLICY_CONTINUE);
//
// descriptor = new DatasetDescriptor("e280", "restricted");
//
// writeMapToCsvFile(descriptor, contentMap, file, POLICY_APPEND_TO_FILE);
// System.out.println("Items found in e280 restricted selection: " + contentMap.size());
// System.out.println("Items written to file: " + file.getAbsolutePath());
}
@Test
public void buildOnbSelection() throws IOException, TechnicalRuntimeException, EuropeanaApiProblem {
Api2QueryBuilder queryBuilder = new Api2QueryBuilder();
String portalUrl = "http://www.europeana.eu/portal/search.html?query=europeana_collectionName%3A9200388*&rows=24&start=193&qt=false";
Api2QueryInterface apiQuery = queryBuilder.buildQuery(portalUrl);
apiQuery.setProfile("rich");
MetadataAccessor ma = new MetadataAccessor(apiQuery, null);
Map<String, String> contentMap = ma.getContentMap(CommonMetadata.EDM_FIELD_IS_SHOWN_BY, -1, -1, MetadataAccessor.ERROR_POLICY_CONTINUE);
DatasetDescriptor descriptor = new DatasetDescriptor("onb", "cc");
File file = new File(getCollectionsCvsFolder(), onbIdsFilename);
writeMapToCsvFile(descriptor, contentMap, file, POLICY_OVERWRITE_FILE);
System.out.println("Items found in onb selection: " + contentMap.size());
System.out.println("Items written to file: " + file.getAbsolutePath());
}
@Test
public void downloadOnbImages() throws FileNotFoundException, IOException {
File onbMapFile = new File(getCollectionsCvsFolder(), onbIdsFilename);
Map<String, String> thumbnailMap = readThumbnailsMap(onbMapFile);
final File downloadFolder = new File("/tmp/eucreative/onb/");
ThumbnailDownloader downloader = new ThumbnailDownloader(downloadFolder);
downloader.downloadImages(thumbnailMap);
}
@Test
public void downloadE280Images() throws FileNotFoundException, IOException {
File e280MapFile= new File(getCollectionsCvsFolder(), e280IdsFilename);
Map<String, String> thumbnailMap = readThumbnailsMap(e280MapFile);
final File downloadFolder = new File("/tmp/eucreative/e280/");
ThumbnailDownloader downloader = new ThumbnailDownloader(downloadFolder);
downloader.downloadImages(thumbnailMap);
}
@Test
public void buildEuropeana280Dataset() throws FileNotFoundException, IOException {
File e280MapFile= new File(getCollectionsCvsFolder(), e280IdsFilename);
File e280DatasetFile= new File(getCollectionsCvsFolder(), e280SetFilename);
Map<String, String> e280ThumbnailMap = readThumbnailsMap(e280MapFile);
//read CultureCam dataset (V2)
Map<String, String> ccThumbnailMap = readThumbnailsMap(getDataSetFile(false));
for (String id : ccThumbnailMap.keySet()) {
if(e280ThumbnailMap.containsKey(id)){
System.out.println("removing dupplicated item in dataset: " + id);
e280ThumbnailMap.remove(id);
}
}
System.out.println("new dataset size: " + e280ThumbnailMap.size());
DatasetDescriptor descriptor = new DatasetDescriptor("e280", "openreuse");
writeMapToCsvFile(descriptor, e280ThumbnailMap, e280DatasetFile, POLICY_OVERWRITE_FILE);
// final File downloadFolder = new File("/tmp/eucreative/e280/");
// ThumbnailDownloader downloader = new ThumbnailDownloader(downloadFolder);
// downloader.downloadImages(thumbnailMap);
}
//@Test
public void buildNewTagSelection() throws MyEuropeanaApiException,
IOException {
String jsonFile = "/myeuropeana/culturecam/user_tag_action=LIST&tag=new.json";
TagsApiResponse apiResponse = readJsonFile(jsonFile);
ThumbnailFromTagsResponseAccessor ta = new ThumbnailFromTagsResponseAccessor();
File tagSelectionFile = new File(getCollectionsCvsFolder()
+ tagSelectionFilename);
if (tagSelectionFile.exists())
tagSelectionFile.delete();
Map<String, Map<String, String>> thumbnailsByTag = ta
.getThumbnailsFromTagsApiResponse(apiResponse);
DatasetDescriptor descriptor;
int objectCount = 0;
for (Map.Entry<String, Map<String, String>> entry : thumbnailsByTag
.entrySet()) {
descriptor = new DatasetDescriptor(getDataset(), entry.getKey());
descriptor.setClassifications(new String[] { entry.getKey() });
writeThumbnailsToCsvFile(descriptor, entry.getValue(),
tagSelectionFile, POLICY_APPEND_TO_FILE);
objectCount += entry.getValue().size();
System.out.println("TAG: " + entry.getKey() + " - "
+ entry.getValue().size());
}
assertEquals(apiResponse.getTotalResults(), objectCount);
}
private TagsApiResponse readJsonFile(String testResource)
throws IOException, MyEuropeanaApiException {
TagsApiResponse res = null;
InputStream resourceAsStream = null;
try {
resourceAsStream = getClass().getResourceAsStream(testResource);
MyEuropeanaClient client = new MyEuropeanaClientImpl();
res = client.parseTagsApiResponse(resourceAsStream);
} finally {
if (resourceAsStream != null)
resourceAsStream.close();
}
return res;
}
@Test
public void verifyThumbnails() throws FileNotFoundException, IOException {
// File datasetFile = getConfig().getDatasetFile(getDataset());
//File downloadFolder = getConfig().getImageFolderAsFile(getDataset());
File tagThumbnailMap = new File(getCollectionsCvsFolder()
+ tagSelectionFilename);
Map<String, String> newTagMap = readThumbnailsMap(tagThumbnailMap);
String id;
File thumbnailFile;
for (Map.Entry<String, String> tag : newTagMap.entrySet()) {
id = tag.getKey();
thumbnailFile = getConfig().getImageFile(getDataset(), id);
if(thumbnailFile.length() == 3583)
System.out.println(id);
}
}
@Test
public void downloadCimecThumbnails() throws FileNotFoundException, IOException {
// File datasetFile = getConfig().getDatasetFile(getDataset());
//File downloadFolder = getConfig().getImageFolderAsFile(getDataset());
File cimecIds = new File(getCollectionsCvsFolder()
+ cimecIdsFilename);
File downloadFolder = getConfig().getImageFolderAsFile(getDataset());
CimecThumbnailDownloader downloader = new CimecThumbnailDownloader(downloadFolder);
//ThumbnailDownloader observer = new ThumbnailDownloader(downloadFolder);
// observer.setSkipExistingFiles(!overwriteThumbnails);
// datasetDownloader.addObserver(observer);
// datasetDownloader.processThumbnailset(0, -1, 1000);
Map<String, String> thumbnailMap = readThumbnailsMap(cimecIds);
downloader.downloadCimecIds(thumbnailMap);
}
//@Test
public void downloadThumbnails() throws FileNotFoundException, IOException {
// File datasetFile = getConfig().getDatasetFile(getDataset());
File downloadFolder = getConfig().getImageFolderAsFile(getDataset());
File tagThumbnailMap = new File(getCollectionsCvsFolder()
+ tagSelectionFilename);
// if (!tagThumbnailMap.exists())
// performDatasetAggregation(STEP_THUMBNAILMAP_V2, tagThumbnailMap);
LargeThumbnailsetProcessing datasetDownloader = new LargeThumbnailsetProcessing(
tagThumbnailMap);
ThumbnailDownloader observer = new ThumbnailDownloader(downloadFolder);
observer.setSkipExistingFiles(!overwriteThumbnails);
datasetDownloader.addObserver(observer);
datasetDownloader.processThumbnailset(0, -1, 1000);
log.debug("Skipped items: " + datasetDownloader.getSkippedItemsCount());
log.warn("Failed downloads: " + datasetDownloader.getFailureCount());
log.info("Downloaded files: " + datasetDownloader.getItemsProcessed());
assertEquals(0, datasetDownloader.getFailureCount());
// for (String itemId : skippedItems) {
// System.out.println(itemId);
// }
}
// File colSelectionCvsFile = new File(getCollectionsCvsFolder() +
// tagSelectionFilename);
//
//
//
//
//
// //we misuse the readThumbnailsMap as this is the same implementation as
// readCollectionsMap
// DatasetDescriptor descriptor;
// int missingThumbnails;
// int missingThumbnailsSum = 0;
// int expectedResultsTotal = 0;
// int expectedResults = 0;
// SelectionDescriptionImpl selectionDescription;
// String subsetName;
//
// Map<String, String> selectedCollections =
// readThumbnailsMap(colSelectionCvsFile);
// //#ID;Title;Portal link;Results;items;selection;dicriminator;Content
// selection comments
//
// for (Map.Entry<String, String> collection :
// selectedCollections.entrySet()) {
// selectionDescription = new SelectionDescriptionImpl(collection.getKey(),
// collection.getValue().split(";"));
//
// subsetName = buildSubSetName(selectionDescription);
// descriptor = new DatasetDescriptor(subsetName,
// selectionDescription.getId());
// this.setProcessingStep(STEP_THUMBNAILMAP_V2);
// expectedResults =
// selectionDescription.getIntFieldValue(SelectionDescriptionEnum.RESULT_COUNT);
//
// File thumbnailsMapFile = getCollectionCsvFile(descriptor,
// STEP_THUMBNAILMAP_V2);
// if(thumbnailsMapFile.exists()){
// log.info("Skip selected collection. Thumbnailsmap exists already :" +
// thumbnailsMapFile);
// continue;
// }
//
// missingThumbnails = createSubset(subsetName,
// selectionDescription.getId(),
// selectionDescription.getFieldValue(SelectionDescriptionEnum.PORTAL_LINK),
// 0, expectedResults);
// missingThumbnailsSum += missingThumbnails;
// expectedResultsTotal += expectedResults;
// if(missingThumbnails > 0)
// System.out.println("Missing thumbnails in dataset:" + descriptor + ": " +
// missingThumbnails);
// }
// //we expect no more than 10 missing Thumbnails
// log.info("Number of missing thumbnails: " + missingThumbnailsSum);
// log.info("Total expected results: " + expectedResultsTotal);
// 3@Test
// public void categorizeSubsetThumbnails() throws FileNotFoundException,
// IOException {
//
// File colSelectionCvsFile = new File(getCollectionsCvsFolder() +
// colSelectionFilename);
// //we misuse the readThumbnailsMap as this is the same implementation as
// readCollectionsMap
// DatasetDescriptor descriptor;
// SelectionDescriptionImpl selectionDescription;
// String subsetName;
//
// Map<String, String> selectedCollections =
// readThumbnailsMap(colSelectionCvsFile);
// //#ID;Title;Portal link;Results;items;selection;dicriminator;Content
// selection comments
//
// File categorizedThumbnailsFile;
// File thumbnailsCvsFile;
//
// for (Map.Entry<String, String> collection :
// selectedCollections.entrySet()) {
// selectionDescription = new SelectionDescriptionImpl(collection.getKey(),
// collection.getValue().split(";"));
//
// subsetName = buildSubSetName(selectionDescription);
// descriptor = new DatasetDescriptor(subsetName,
// selectionDescription.getId());
// this.setProcessingStep(STEP_CLASSIFIED_V1);
//
// thumbnailsCvsFile = getCollectionCsvFile(descriptor,
// STEP_THUMBNAILMAP_V2);
//
// //categorize by color-fullness
// categorizedThumbnailsFile = categorizeThumbnails(descriptor,
// thumbnailsCvsFile);
//
// System.out.println("subset written to file: " +
// categorizedThumbnailsFile);
// }
// }
// 4@Test
// public void filterDesignThumbnails() throws IOException {
//
// //read design input
// File desginV1 = new File(getCollectionsCvsFolder() +
// designInputFilename);
// //we misuse the readThumbnailsMap as this is the same implementation as
// readCollectionsMap
// Map<String, String> designV1Thumbnails = readThumbnailsMap(desginV1);
// log.debug("Items in design_v1 dataset :" + designV1Thumbnails.size());
//
// //read culturecam subsets
// File cvsFolder = new File(getCollectionsCvsFolder() +
// STEP_SUBSET_V1.toLowerCase() + "/");
// File[] collectionFiles = cvsFolder.listFiles();
// Map<String, String> subset;
//
// for (int i = 0; i < collectionFiles.length; i++) {
// //for each subset
// subset = readThumbnailsMap(collectionFiles[i]);
//
// for (String thumbnailId: subset.keySet()) {
// //remove items available in subsets
// if(designV1Thumbnails.containsKey(thumbnailId)){
// log.trace("removing item from subset: " + thumbnailId);
// designV1Thumbnails.remove(thumbnailId);
// }
// }
//
// }
//
// DatasetDescriptor designSubset = new DatasetDescriptor("Design", "V1");
// File designSubsetFile = getCollectionCsvFile(designSubset,
// STEP_SUBSET_V1);
// log.info("Writing items in desing subset: " + designV1Thumbnails.size());
// writeThumbnailsToCsvFile(designSubset, designV1Thumbnails,
// designSubsetFile);
//
//
// // log.trace("Closing dataset file");
// // datasetWriter.close();
// }
// @Test
// public void filterDesignNonPDThumbnails() throws IOException,
// EuropeanaApiProblem {
//
// //read design input
// File desginV1 = new File(getCollectionsCvsFolder() +
// designInputFilename);
// //we misuse the readThumbnailsMap as this is the same implementation as
// readCollectionsMap
// Map<String, String> designV1Thumbnails = readThumbnailsMap(desginV1);
// log.debug("Items in design_v1 dataset :" + designV1Thumbnails.size());
//
// EuropeanaApi2Client euClient = new EuropeanaApi2Client();
// EuropeanaApi2Item obj;
//
// Map<String, String> designNonPDThumbnails = new HashMap<String,
// String>();
// Api2Query searchQuery = new Api2Query();
// EuropeanaApi2Results searchResults;
//
//
// for (Map.Entry<String, String> item : designV1Thumbnails.entrySet()) {
// obj = null;//clear obj
// if(!(searchQuery.getSubQueries() == null) &&
// !searchQuery.getSubQueries().isEmpty())
// searchQuery.getSubQueries().remove(0);//re-initialize
//
// searchQuery.addSubQuery(new SubQuery("europeana_id", item.getKey(),
// false, true, false));
//
// searchResults = euClient.searchApi2(searchQuery, 2, -1);
//
// if(!searchResults.getItems().isEmpty())
// obj = searchResults.getItems().get(0);
// else
// System.out.println("Cannot find object with id query: " + item.getKey());
//
// if(obj != null && !hasPdRights(obj)){
// designNonPDThumbnails.put(item.getKey(), item.getValue());
// }
// }
//
// DatasetDescriptor designSubset = new DatasetDescriptor("Design", "V1");
// File filterOoutFile = getCollectionCsvFile(designSubset,
// STEP_FILTER_OUT);
// log.info("Writing items in desing subset: " +
// designNonPDThumbnails.size());
// writeThumbnailsToCsvFile(designSubset, designNonPDThumbnails,
// filterOoutFile);
//
//
// // log.trace("Closing dataset file");
// // datasetWriter.close();
// }
//
// protected boolean hasPdRights(BaseAggregation aggregation) {
// boolean ret = false;
// if(aggregation != null && aggregation.getEdmRights() != null){
//
// Collection<List<String>> allRights = aggregation.getEdmRights().values();
//
// for (List<String> rightsList : allRights) {
// if(rightsList == null || rightsList.isEmpty())
// continue;
//
// else for (String rights : rightsList) {
// if(rights != null && rights.indexOf("/publicdomain/") > -1)
// ret = true;
// }
// }
//
//
// }
// return ret;
// }
//
// protected boolean hasPdRights(EuropeanaApi2Item item) {
// boolean ret = false;
// if(item != null && item.getRights() != null){
// for (String rights : item.getRights()) {
// if(rights.indexOf("/publicdomain/") > -1)
// return true;
// }
// }
// return ret;
// }
// @Test
public void aggregateDataset() throws IOException {
File datasetFile = getConfig().getDatasetFile(getDataset());
performDatasetAggregation(STEP_SUBSET_V2, datasetFile);
}
// public File categorizeThumbnails(DatasetDescriptor datasetDescriptor,
// File thumbnailsFile) throws FileNotFoundException, IOException {
//
// // String thumbnailsFile = getCvsFileForStep(datasetDescriptor,
// // STEP_THUMBNAILS);
// // new File(thumbnailsFile)
// File outputFile = getCollectionCsvFile(datasetDescriptor,
// STEP_CLASSIFIED_V1);
//
// LargeThumbnailsetProcessing datasetCategorization = new
// LargeThumbnailsetProcessing(
// thumbnailsFile);
// // String imageFolder = getConfiguration().getImageFolder(getDataset());
// String imageFolder = IMAGE_FOLDER;
//
// GrayScaleSepiaDetector observer = new GrayScaleSepiaDetector(new File(
// imageFolder), 85, 3);
// //final File outputFile = new File(outFile);
// observer.setOutputFile(outputFile);
//
// datasetCategorization.addObserver(observer);
// if(blockSize < 0)
// blockSize = 1000;
//
// datasetCategorization.processThumbnailset(start, limit, blockSize);
//
// System.out.println("Skipped items: "
// + datasetCategorization.getFailureCount());
// return outputFile;
//
// }
protected String buildSubSetName(
SelectionDescriptionImpl selectionDescription) {
String subsetName = selectionDescription
.getFieldValue(SelectionDescriptionEnum.TITLE);
subsetName = subsetName.substring(0, Math.min(subsetName.length(), 15));
return subsetName;
}
protected File getCollectionCsvFile(DatasetDescriptor dataset) {
return getCollectionCsvFile(dataset, getProcessingStep());
}
protected File getCollectionCsvFile(DatasetDescriptor dataset,
final String processingStep) {
if (processingStep != null) {
String fileName = getCollectionsCvsFolder()
+ processingStep.toLowerCase() + "/"
+ dataset.getImageSetName() + "_"
+ encode(dataset.getCollectionName()) + ".csv";
return new File(fileName);
} else {
return super.getCollectionCsvFile(dataset);
}
}
// @Test
// public void categorizeThumbnails() throws FileNotFoundException,
// IOException {
// DatasetDescriptor datasetDescriptor;
// File thumbnailsFile;
// String thumbnailsFile = getCvsFileForStep(datasetDescriptor,
// STEP_THUMBNAILS);
// new File(thumbnailsFile)
// String outFile = getCvsFileForStep(datasetDescriptor, STEP_CLASSIFIED);
// File thumbnailsFile = new File(getCollectionsCvsFolder() +
// colectionThumbnailsFilename);
// File outFile = new File(getCollectionsCvsFolder() +
// colectionClassifiedFilename);
//
//
// LargeThumbnailsetProcessing datasetCategorization = new
// LargeThumbnailsetProcessing(
// thumbnailsFile);
// //String imageFolder = getConfiguration().getImageFolder(getDataset());
// String imageFolder = IMAGE_FOLDER;
//
// GrayScaleSepiaDetector observer = new GrayScaleSepiaDetector(new File(
// imageFolder), 85, 3);
// //final File outputFile = new File(outFile);
// observer.setOutputFile(outFile);
//
// datasetCategorization.addObserver(observer);
// blockSize = 1000;
// datasetCategorization.processThumbnailset(start, limit, blockSize);
//
// System.out.println("Skipped items: "
// + datasetCategorization.getFailureCount());
// //return outFile;
//
// }
private void performDatasetAggregation(String step, File datasetFile)
throws IOException {
File cvsFolder = new File(getCollectionsCvsFolder()
+ step.toLowerCase() + "/");
File[] collectionFiles = cvsFolder.listFiles();
BufferedReader reader = null;
// String headerLine = null;
String line = null;
// BufferedWriter datasetWriter = getDataSetFileWriter(false);
datasetFile.getParentFile().mkdirs();
BufferedWriter datasetWriter = new BufferedWriter(new FileWriter(
datasetFile));
log.debug("Aggregating dataset: " + getDataset() + " to file: "
+ datasetFile);
for (int i = 0; i < collectionFiles.length; i++) {
reader = new BufferedReader(new FileReader(collectionFiles[i]));
boolean firstLine = true;
while ((line = reader.readLine()) != null) {
// write headers to sysout
if (firstLine) {
log.debug("Writting dataset headerline: " + line);
firstLine = false;
}
// write all data to dataset
datasetWriter.write(line);
datasetWriter.write("\n");
}
datasetWriter.flush();
// close reader
try {
reader.close();
} catch (IOException e) {
System.out.println("cannot close reader for: "
+ collectionFiles[i]);
e.printStackTrace();
}
}
log.trace("Closing dataset file");
datasetWriter.close();
}
@Test
public void buildOnbHtmlView() throws IOException{
//setDataset("smk");
DatasetDescriptor descriptor = new DatasetDescriptor("onb", "cc");
File csvInput = new File(getCollectionsCvsFolder(), onbIdsFilename);
File outputFolder = new File("/tmp/eucreative/design/onb_view/");
createSubsetHtml(descriptor, csvInput, outputFolder);
}
protected void createSubsetHtml(DatasetDescriptor descriptor,
File csvInput, File outputFolder) throws IOException{
// DatasetDescriptor dataset = new DatasetDescriptor(subsetName,
// collectionName);
Map<String, String> thumbnailMap = readThumbnailsMap(csvInput);
File thumbnailsFolder = getConfig().getImageFolderAsFile(getDataset());
File thumbnailFile;
File imagesFile;
File csvFile;
File htmlFile;
//write csv header
csvFile = new File(outputFolder, descriptor.getStringId()+".csv");
if(csvFile.exists())
csvFile.delete();
FileUtils.writeStringToFile(csvFile, "#nr;id\n", "utf-8", true);
//write html header
htmlFile = new File(outputFolder, descriptor.getStringId()+".html");
if(htmlFile.exists())
htmlFile.delete();
String head = "<html charset='utf-8'> <body> image #nr;id<br>\n";
FileUtils.writeStringToFile(htmlFile, head, "utf-8", true);
int count = 0;
String htmlRow;
String csvRow;
for (Map.Entry<String, String> thumbnail : thumbnailMap.entrySet()) {
// copy thumbnail
count++;
thumbnailFile = new File(thumbnailsFolder, thumbnail.getKey()+".jpg");
imagesFile = new File(outputFolder, "/image"+thumbnail.getKey() + ".jpg");
copyFile(thumbnailFile, imagesFile);
//write thumbnail to html file
htmlRow = "<img src='./image" + thumbnail.getKey()+".jpg" + "'/><BR>";
htmlRow += count + ";" + thumbnail.getKey()+"<BR>\n";
FileUtils.writeStringToFile(htmlFile, htmlRow, "utf-8", true);
//write thumbnail to csv file
csvRow = count + ";" + thumbnail.getKey() + "\n";
FileUtils.writeStringToFile(csvFile, csvRow, "utf-8", true);
}
//write html footer
FileUtils.writeStringToFile(htmlFile, "</html>", "utf-8", true);
}
protected IRConfiguration getConfig() {
IRConfiguration config = new IRConfigurationImpl();
return config;
}
public File getDataSetFile(boolean urls) {
IRConfiguration config = getConfig();
if (urls)
return config.getDatasetUrlsFile(getDataset());
else
return config.getDatasetFile(getDataset());
}
protected String getCollectionsCvsFolder() {
return getCollectionsCvsFolder(getDataset());
}
public String getCollectionsCvsFolder(String dataset) {
return IRTestConfigurations.COLLECTIONS_FOLDER + dataset + "/";
}
public String getProcessingStep() {
return processingStep;
}
public void setProcessingStep(String processingStep) {
this.processingStep = processingStep;
}
protected File getImageFile(String id) {
return getConfig().getImageFile(getDataset(), id);
}
}