package eu.europeana.creative.dataset;
import static org.junit.Assert.assertEquals;
import it.cnr.isti.indexer.IndexHelper;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.junit.Test;
import eu.europeana.api.client.dataset.DatasetDescriptor;
import eu.europeana.api.client.exception.EuropeanaApiProblem;
import eu.europeana.api.client.search.query.Api2QueryInterface;
import eu.europeana.api.client.thumbnails.ThumbnailsAccessor;
import eu.europeana.api.client.thumbnails.ThumbnailsForCollectionAccessorTest;
import eu.europeana.service.ir.image.IRConfiguration;
import eu.europeana.service.ir.image.IRConfigurationImpl;
import eu.europeana.service.ir.image.api.ImageSearchingService;
import eu.europeana.service.ir.image.api.ImageSearchingServiceImpl;
import eu.europeana.service.ir.image.exceptions.ImageSearchingException;
public class EvaluationDatasetBuilderTest extends
ThumbnailsForCollectionAccessorTest implements IRTestConfigurations {
public static String CLASS_PAINTINGS = "paintings";
public static String CLASS_OBJECTS = "objects";
public static String CLASS_DRAWINGS = "drawings";
public static String CLASS_BIRDS = "birds";
public static String CLASS_INSECTS = "insects";
public static String CLASS_BUILDINGS = "buildings";
public static String SUB_CLASS_PORTRAINTS = "portraits";
public static String SUB_CLASS_DECOR_MINIATURS = "decor miniaturs";
public static String SUB_CLASS_LANDSCAPES = "landscapes";
public static String SUB_CLASS_BOTTLES = "bottles";
public static String SUB_CLASS_PORCELAIN = "porcelain";
public static String SUB_CLASS_PARROTS = "parrots";
public static String SUB_CLASS_DUCKS = "ducks";
public static String SUB_CLASS_WOODPECKERS = "woodpeckers";
public static String SUB_CLASS_HAWKS_EAGLES = "hawks and eagles";
public static String SUB_CLASS_ELECTRICS = "electrical engineering";
public static String SUB_CLASS_OPTICS = "optical engineering";
public static String SUB_CLASS_BUTTERFLIES = "butterflies";
public static String SUB_CLASS_ICONS = "icons";
public static String SUB_CLASS_MURALPAINTINGS = "mural paintings";
public static String SUB_CLASS_CIVILS = "civils";
public static String SUB_CLASS_PEASANT_HOUSES = "peasent houses";
public static String SUB_CLASS_INTERIORS = "interiors";
public static String SUB_CLASS_CHURCHES = "churches";
public static String SUB_CLASS_TRUMPETS = "musical trumpets";
public static String SUB_CLASS_CLOCK_TOWERS = "clock towers";
final String DATASET_DEMO = "demo";
private String dataset = null;
private ImageSearchingService imageSearchingService;
// @Test
public void createDemoDataset() throws IOException, EuropeanaApiProblem {
setDataset(DATASET_DEMO);
//0
DatasetDescriptor dataset = new DatasetDescriptor("Rijksmuseum-portrets",
"90402_M_NL_Rijksmuseum", new String[] { CLASS_PAINTINGS,
SUB_CLASS_PORTRAINTS });
Api2QueryInterface query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "portret", "schilderij");
int objects0 = buildImageSet(dataset, query);
assertEquals(1243, objects0);
//
dataset = new DatasetDescriptor("Rijksmuseum-miniatuur",
"90402_M_NL_Rijksmuseum", new String[] { CLASS_OBJECTS,
SUB_CLASS_DECOR_MINIATURS });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "miniatuur beeld", null);
int objects = buildImageSet(dataset, query);
assertEquals(68, objects);
//1
dataset = new DatasetDescriptor("Rijksmuseum-landschap",
"90402_M_NL_Rijksmuseum", new String[] { CLASS_PAINTINGS,
SUB_CLASS_LANDSCAPES });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "landschap", "schilderij");
int objects1 = buildImageSet(dataset, query);
assertEquals(424, objects1);
//2
dataset = new DatasetDescriptor("Rijksmuseum-fles",
"90402_M_NL_Rijksmuseum", new String[] { CLASS_OBJECTS,
SUB_CLASS_BOTTLES });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), null, "fles");
int objects2 = buildImageSet(dataset, query);
assertEquals(139, objects2);
//3
dataset = new DatasetDescriptor("Rijksmuseum-drawing-lanschap",
"90402_M_NL_Rijksmuseum", new String[] { CLASS_DRAWINGS,
SUB_CLASS_LANDSCAPES });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "landschap", "tekening");
int objects3 = buildImageSet(dataset, query);
assertEquals(701, objects3);
//4
dataset = new DatasetDescriptor("Rijksmuseum-porselein",
"90402_M_NL_Rijksmuseum", new String[] { CLASS_OBJECTS,
SUB_CLASS_PORCELAIN });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "Hollands porselein", null);
int objects4 = buildImageSet(dataset, query);
assertEquals(145, objects4);
//5
dataset = new DatasetDescriptor("Teylers-parrot", "10106_Ag_EU_STERNA_48",
new String[] { CLASS_BIRDS, SUB_CLASS_PARROTS });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "parrot", null);
int objects5 = buildImageSet(dataset, query);
assertEquals(105, objects5);
//6
dataset = new DatasetDescriptor("Teylers-duck", "10106_Ag_EU_STERNA_48",
new String[] { CLASS_BIRDS, SUB_CLASS_DUCKS });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "duck", null);
int objects6 = buildImageSet(dataset, query);
assertEquals(120, objects6);
//7
dataset = new DatasetDescriptor("Teylers-woodpecker",
"10106_Ag_EU_STERNA_48", new String[] { CLASS_BIRDS,
SUB_CLASS_WOODPECKERS });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "woodpecker", null);
int objects7 = buildImageSet(dataset, query);
assertEquals(210, objects7);
//8
dataset = new DatasetDescriptor("Teylers-falco", "10106_Ag_EU_STERNA_48",
new String[] { CLASS_BIRDS, SUB_CLASS_HAWKS_EAGLES });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "falco",
null);
int objects8 = buildImageSet(dataset, query);
assertEquals(146, objects8);
//9
dataset = new DatasetDescriptor("Galileo-elettrica",
"02301_Ag_IT_MG_catalogue", new String[] { CLASS_OBJECTS,
SUB_CLASS_ELECTRICS });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "ingegneria elettrica", null);
int objects9 = buildImageSet(dataset, query);
assertEquals(231, objects9);
//10
dataset = new DatasetDescriptor("Galileo-optics",
"02301_Ag_IT_MG_catalogue", new String[] { CLASS_OBJECTS,
SUB_CLASS_OPTICS });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "optics", null, "IMAGE");
int objects10 = buildImageSet(dataset, query);
assertEquals(195, objects10);
//11
dataset = new DatasetDescriptor("MIMO-trompe", "09102_Ag_EU_MIMO_ESE",
new String[] { CLASS_OBJECTS, SUB_CLASS_TRUMPETS });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "trompe", null);
int objects11 = buildImageSet(dataset, query);
assertEquals(1194, objects11);
//12
dataset = new DatasetDescriptor("NHM-LISABON-butterflies",
"2023901_Ag_EU_NaturalEurope_all", new String[] {
CLASS_INSECTS, SUB_CLASS_BUTTERFLIES });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "butterflies", null, "IMAGE");
int objects12 = buildImageSet(dataset, query);
assertEquals(376, objects12);
//13
dataset = new DatasetDescriptor("Athena-icon",
"08515_Ag_EU_ATHENA_ChouvashiaStateArtMuseum",
// "2023901_Ag_EU_NaturalEurope_all",
new String[] { CLASS_PAINTINGS, SUB_CLASS_ICONS });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), null, // "butterflies",
null, // what
"Неизвестный иконописец", // who
null);
int objects13 = buildImageSet(dataset, query);
assertEquals(117, objects13);
//14
dataset = new DatasetDescriptor("Athena-icon",
"08559_Ag_EU_ATHENA_The_State_Tretyakov_Gallery",
// "2023901_Ag_EU_NaturalEurope_all",
new String[] { CLASS_PAINTINGS, SUB_CLASS_ICONS });
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), null, // "butterflies",
null, // what
"Неизвестный иконописец", // who
null);
int objects14 = buildImageSet(dataset, query);
assertEquals(33, objects14);
//15
dataset = new DatasetDescriptor("Cimec-icoana", "05812_L_RO_CIMEC_ese", // "2023901_Ag_EU_NaturalEurope_all",
new String[]{CLASS_PAINTINGS, SUB_CLASS_ICONS});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(),
"icoana", // "butterflies",
null, // what
null, // who
null, "cIMeC - Institutul de Memorie Culturală");
int objects15 = buildImageSet(dataset, query);
assertEquals(244, objects15);
//16
dataset = new DatasetDescriptor("Cimec-icoana", "05811_L_RO_CIMEC_ese", // "2023901_Ag_EU_NaturalEurope_all",
new String[]{CLASS_PAINTINGS, SUB_CLASS_ICONS});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(),
"icoana", // "butterflies",
null, // what
null, // who
null, "cIMeC - Institutul de Memorie Culturală");
int objects16 = buildImageSet(dataset, query);
assertEquals(57, objects16);
//17
dataset = new DatasetDescriptor("Tel-muralpainting",
"9200170_Ag_EU_TEL_a1019d_EU_Libraries_Vienna",
new String[]{CLASS_PAINTINGS, SUB_CLASS_MURALPAINTINGS});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(),
"church", // "butterflies",
"mural paintings");
int objects17 = buildImageSet(dataset, query);
assertEquals(1088, objects17);
//18
dataset = new DatasetDescriptor("Romania-building",
"2022404_Ag_RO_Elocal_clujulin",
new String[]{CLASS_BUILDINGS, SUB_CLASS_CIVILS});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(),null,
"building");
int objects18 = buildImageSet(dataset, query);// object type
assertEquals(134, objects18);
//19
dataset = new DatasetDescriptor("Romania-building",
"08511_Ag_EU_ATHENA_InstituteforCulturalMemory*", // "2023901_Ag_EU_NaturalEurope_all",
new String[]{CLASS_BUILDINGS, SUB_CLASS_PEASANT_HOUSES});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), null, // "butterflies",
"building", // what
null, // who
null, null, null, // provider
new String[] { "NOT gips", "NOT capitel" });
int objects19 = buildImageSet(dataset, query);
assertEquals(192, objects19);
//20
dataset = new DatasetDescriptor("Judaica-spicetower",
"09307_Ag_EU_Judaica_Jewish_Museum_London", // "2023901_Ag_EU_NaturalEurope_all",
new String[]{CLASS_OBJECTS, SUB_CLASS_DECOR_MINIATURS});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(), "\"spice tower\"", null);
int objects20 = buildImageSet(dataset, query);
assertEquals(121, objects20);
//21
dataset = new DatasetDescriptor("CultureGrid-clocktower", "2022*", // "2023901_Ag_EU_NaturalEurope_all",
new String[]{CLASS_BUILDINGS, SUB_CLASS_CLOCK_TOWERS});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(),
null, "\"clock tower\"", null, "IMAGE", "CultureGrid", null,
new String[] { "NOT bridge","NOT square" });
int objects21 = buildImageSet(dataset, query);// object type
assertEquals(296, objects21);
//22
dataset = new DatasetDescriptor("TEL-palace",
"92037_Ag_EU_TEL_a0444_BritishLibrary", // "2023901_Ag_EU_NaturalEurope_all",
new String[]{CLASS_BUILDINGS, SUB_CLASS_INTERIORS});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(),
null, // "butterflies",
"palace", // what
null, // who
null, null, null, new String[] { "room", "interior", "NOT Mary",
"NOT \"Veliko Tarnovo\"" });
int objects22 = buildImageSet(dataset, query);
assertEquals(36, objects22);
//23
dataset = new DatasetDescriptor("TEL-palace", "2022*", // "2023901_Ag_EU_NaturalEurope_all",
new String[]{CLASS_BUILDINGS, SUB_CLASS_INTERIORS});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(),
null, // "butterflies",
"palace", // what
null, // who
null, null, null, new String[] { "room", "interior", "NOT Mary",
"NOT \"Veliko Tarnovo\"" });
int objects23 = buildImageSet(dataset, query);// object type
assertEquals(95, objects23);
//24
dataset = new DatasetDescriptor("EU_LOCAL_Durhamcathedral",
"2022316_Ag_UK_ELocal_DurhamCountyCouncil", // "2023901_Ag_EU_NaturalEurope_all",
new String[]{CLASS_BUILDINGS, SUB_CLASS_CHURCHES});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(),
null, // "butterflies",
"\"Durham Cathedral\"", // what
null, // who
"IMAGE", null, null, new String[] { "NOT interior", "NOT Cloister",
"NOT Sanctuary", "NOT roof", "NOT exhibit",
"NOT \"vibration tests\"" });
int objects24 = buildImageSet(dataset, query);// object type
assertEquals(154, objects24);
//25
dataset = new DatasetDescriptor("OpenUp-butterfly",
"11617_Ag_EU_OpenUp*", // "2023901_Ag_EU_NaturalEurope_all",
new String[]{CLASS_INSECTS, SUB_CLASS_BUTTERFLIES});
query = getQueryBuilder().buildQuery(dataset.getCollectionName(),
"\"Zoological collections of the University of Tartu\"", null);
int objects25 = buildImageSet(dataset, query, 400, 500, ThumbnailsAccessor.ERROR_POLICY_RETHROW);
assertEquals(500, objects25);
performDatasetAggregation();
}
private void performDatasetAggregation() throws IOException {
File cvsFolder = new File(getCollectionsCvsFolder());
File[] collectionFiles = cvsFolder.listFiles();
BufferedReader reader = null;
// String headerLine = null;
String line = null;
BufferedWriter datasetWriter = getDataSetFileWriter(false);
for (int i = 0; i < collectionFiles.length; i++) {
reader = new BufferedReader(new FileReader(collectionFiles[i]));
boolean firstLine = true;
while ((line = reader.readLine()) != null) {
// write headers to sysout
if (firstLine) {
System.out.println(line);
firstLine = false;
}
// write all data to dataset
datasetWriter.write(line);
datasetWriter.write("\n");
}
datasetWriter.flush();
// close reader
try {
reader.close();
} catch (IOException e) {
System.out.println("cannot close reader for: "
+ collectionFiles[i]);
e.printStackTrace();
}
}
datasetWriter.close();
}
// BufferedWriter getDataSetFileWriter(boolean urls)
// throws IOException {
// super.getDataSetFileWriter(urls)();
// File datasetFile = getDataSetFile(urls);
// datasetFile.getParentFile().mkdirs();
//
// return new BufferedWriter(new FileWriter(datasetFile));
// }
public File getDataSetFile(boolean urls) {
IRConfiguration config = getConfig();
if (urls)
return config.getDatasetUrlsFile(getDataset());
else
return config.getDatasetFile(getDataset());
}
public void testGetThumbnailsForCollectionLimit() {
// avoid execution
}
public void testGetThumbnailsForCollectionAll() {
// avoid execution
}
public String getDataset() {
return dataset;
}
public void setDataset(String dataset) {
this.dataset = dataset;
}
// @Test
public void downloadThumbnails() throws FileNotFoundException, IOException {
setDataset(DATASET_DEMO);
File datasetFile = getConfig().getDatasetFile(DATASET_DEMO);
IndexHelper ixHelper = new IndexHelper();
Map<String, String> thumbnailsMap = ixHelper
.getThumbnailsMap(datasetFile);
ThumbnailsAccessor ta = new ThumbnailsAccessor();
List<String> skippedItems = ta.copyThumbnails(thumbnailsMap,
getConfig().getImageFolderAsFile(DATASET_DEMO));
System.out.println("Skipped items: " + skippedItems.size());
for (String itemId : skippedItems) {
System.out.println(itemId);
}
}
protected IRConfiguration getConfig() {
IRConfiguration config = new IRConfigurationImpl();
return config;
}
@Test
public void buildIndexedUrlsFile() throws FileNotFoundException,
IOException, ImageSearchingException {
setDataset(DATASET_DEMO);
IRConfiguration config = getConfig();
File datasetFile = config.getDatasetFile(getDataset());
IndexHelper ixHelper = new IndexHelper();
Map<String, String> thumbnailsMap = ixHelper
.getThumbnailsMap(datasetFile);
BufferedWriter indexedUrlsWriter = getDataSetFileWriter(true);
//EuropeanaId euId = new EuropeanaId();
int counter = 0;
for (Map.Entry<String, String> thumbnail : thumbnailsMap.entrySet()) {
//euId.setNewId(thumbnail.getKey());
try {
getImageSearchingService().searchSimilar(thumbnail.getKey());
if (getImageSearchingService().getTotalResults() > 0) {
// write to file
indexedUrlsWriter.append(thumbnail.getKey()).append("; ");
indexedUrlsWriter.append(thumbnail.getValue()).append("\n");
counter++;
} else {
// not indexed yet
System.out.println("Skipped item: " + thumbnail.getKey());
}
} catch (ImageSearchingException e) {
System.out.println(e.getMessage());
}
}
System.out.println("correct items: " + counter);
}
public ImageSearchingService getImageSearchingService() {
if (imageSearchingService == null) {
imageSearchingService = new ImageSearchingServiceImpl(getDataset(),
getConfig());
imageSearchingService.init();
}
return imageSearchingService;
}
@Override
public String getCollectionsCvsFolder(String dataset) {
return COLLECTIONS_FOLDER + dataset + "/";
}
@Override
protected String getCollectionsCvsFolder() {
return getCollectionsCvsFolder(getDataset());
}
}