/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.api.datasets;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.datasets.Dataset;
import de.tudarmstadt.ukp.dkpro.core.api.datasets.DatasetFactory;
import de.tudarmstadt.ukp.dkpro.core.api.datasets.Split;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
public class DatasetFactoryTest
{
@Ignore("Used at times for offline testing / development")
@Test
public void testOne()
throws Exception
{
Path cache = testContext.getTestOutputFolder().toPath();
DatasetFactory df = new DatasetFactory(cache);
{
Dataset ds = df.load("wasr-en-xl-1.00");
assertDatasetOk(ds);
}
// {
// Dataset ds = df.load("ndt-nb-1.01");
// assertDatasetOk(ds);
// }
}
@Ignore("Used at times for offline testing / development")
@Test
public void testLoadAll()
throws Exception
{
Path cache = testContext.getTestOutputFolder().toPath();
DatasetFactory df = new DatasetFactory(cache);
for (String id : df.listIds()) {
Dataset ds = df.load(id);
assertDatasetOk(ds);
}
}
@Ignore("Used at times for offline testing / development")
@Test
public void testShared()
throws Exception
{
Path cache = testContext.getTestOutputFolder().toPath();
DatasetFactory df = new DatasetFactory(cache);
Dataset ds1 = df.load("perseus-el-2.1");
assertDatasetOk(ds1);
Dataset ds2 = df.load("perseus-la-2.1");
assertDatasetOk(ds2);
}
@Ignore("Used at times for offline testing / development")
@Test
public void testLoadSimple()
throws Exception
{
Path cache = testContext.getTestOutputFolder().toPath();
DatasetFactory df = new DatasetFactory(cache);
Dataset ds = df.load("germeval2014-de");
assertDatasetOk(ds);
}
@Ignore("Used at times for offline testing / development")
@Test
public void testLoadWithExplode()
throws Exception
{
Path cache = testContext.getTestOutputFolder().toPath();
DatasetFactory df = new DatasetFactory(cache);
Dataset ds = df.load("brown-en-teixml");
assertDatasetOk(ds);
assertFalse(Files.exists(cache.resolve("brownCorpus-TEI-XML/brown_tei/Corpus.xml")));
}
private void assertDatasetOk(Dataset ds)
{
System.out.printf("Dataset : %s%n", ds.getName());
System.out.printf("Data files : %d%n", ds.getDataFiles().length);
Split split = ds.getDefaultSplit();
if (split != null) {
System.out.printf("Training set : %d%n",
split.getTrainingFiles() != null ? split.getTrainingFiles().length : "none");
System.out.printf("Development set : %d%n",
split.getDevelopmentFiles() != null ? split.getDevelopmentFiles().length : "none");
System.out.printf("Testing set : %d%n",
split.getTestFiles() != null ? split.getTestFiles().length : "none");
}
assertNotNull("Name not set", ds.getName());
assertNotNull("Language not set", ds.getLanguage());
if (split != null) {
assertNullOrExists(split.getTrainingFiles());
assertNullOrExists(split.getTestFiles());
assertNullOrExists(split.getDevelopmentFiles());
}
assertNullOrExists(ds.getLicenseFiles());
assertNotNull(ds.getDataFiles());
assertTrue(ds.getDataFiles().length > 0);
}
private void assertNullOrExists(File... aFiles)
{
if (aFiles != null) {
for (File f : aFiles) {
assertTrue("File does not exist: [" + f + "]", f.exists());
}
}
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}