/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.api.datasets.internal.ud;
import static java.util.Arrays.asList;
import java.io.File;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import de.tudarmstadt.ukp.dkpro.core.api.datasets.Dataset;
import de.tudarmstadt.ukp.dkpro.core.api.datasets.Split;
import de.tudarmstadt.ukp.dkpro.core.api.datasets.internal.SplitImpl;
public class UDDataset
implements Dataset
{
private File baseDir;
private Split defaultSplit;
public UDDataset(File aBaseDir)
{
baseDir = aBaseDir;
File[] train = baseDir.listFiles((File f) -> {
return f.getName().endsWith("-train.conllu");
});
File[] test = baseDir.listFiles((File f) -> {
return f.getName().endsWith("-test.conllu");
});
File[] dev = baseDir.listFiles((File f) -> {
return f.getName().endsWith("-dev.conllu");
});
defaultSplit = new SplitImpl(train, test, dev);
}
@Override
public String getName()
{
return baseDir.getName();
}
@Override
public String getLanguage()
{
return defaultSplit.getTrainingFiles()[0].getName().split("-")[0];
}
@Override
public String getEncoding()
{
return "UTF-8";
}
@Override
public File[] getLicenseFiles()
{
return new File[] { new File(baseDir, "LICENSE.txt") };
}
@Override
public File[] getDataFiles()
{
Set<File> all = new HashSet<>();
all.addAll(asList(defaultSplit.getTrainingFiles()));
all.addAll(asList(defaultSplit.getTestFiles()));
all.addAll(asList(defaultSplit.getDevelopmentFiles()));
File[] result = all.toArray(all.toArray(new File[all.size()]));
Arrays.sort(result, (a, b) -> { return a.getPath().compareTo(b.getPath()); });
return result;
}
@Override
public Split getDefaultSplit()
{
return defaultSplit;
}
}