package at.ac.univie.mminf.qskos4j;
import at.ac.univie.mminf.qskos4j.issues.HierarchyGraphBuilder;
import at.ac.univie.mminf.qskos4j.issues.Issue;
import at.ac.univie.mminf.qskos4j.issues.clusters.DisconnectedConceptClusters;
import at.ac.univie.mminf.qskos4j.issues.concepts.AuthoritativeConcepts;
import at.ac.univie.mminf.qskos4j.issues.concepts.InvolvedConcepts;
import at.ac.univie.mminf.qskos4j.issues.concepts.OrphanConcepts;
import at.ac.univie.mminf.qskos4j.issues.concepts.UndocumentedConcepts;
import at.ac.univie.mminf.qskos4j.issues.conceptscheme.*;
import at.ac.univie.mminf.qskos4j.issues.count.AggregationRelations;
import at.ac.univie.mminf.qskos4j.issues.count.SemanticRelations;
import at.ac.univie.mminf.qskos4j.issues.cycles.HierarchicalCycles;
import at.ac.univie.mminf.qskos4j.issues.inlinks.MissingInLinks;
import at.ac.univie.mminf.qskos4j.issues.labels.*;
import at.ac.univie.mminf.qskos4j.issues.labels.util.ResourceLabelsCollector;
import at.ac.univie.mminf.qskos4j.issues.language.IncompleteLanguageCoverage;
import at.ac.univie.mminf.qskos4j.issues.language.NoCommonLanguages;
import at.ac.univie.mminf.qskos4j.issues.language.OmittedOrInvalidLanguageTags;
import at.ac.univie.mminf.qskos4j.issues.language.util.LanguageCoverage;
import at.ac.univie.mminf.qskos4j.issues.outlinks.BrokenLinks;
import at.ac.univie.mminf.qskos4j.issues.outlinks.HttpURIs;
import at.ac.univie.mminf.qskos4j.issues.outlinks.HttpUriSchemeViolations;
import at.ac.univie.mminf.qskos4j.issues.outlinks.MissingOutLinks;
import at.ac.univie.mminf.qskos4j.issues.relations.ReflexivelyRelatedConcepts;
import at.ac.univie.mminf.qskos4j.issues.relations.SolelyTransitivelyRelatedConcepts;
import at.ac.univie.mminf.qskos4j.issues.relations.UnidirectionallyRelatedConcepts;
import at.ac.univie.mminf.qskos4j.issues.relations.ValuelessAssociativeRelations;
import at.ac.univie.mminf.qskos4j.issues.skosintegrity.HierarchicalRedundancy;
import at.ac.univie.mminf.qskos4j.issues.skosintegrity.MappingClashes;
import at.ac.univie.mminf.qskos4j.issues.skosintegrity.RelationClashes;
import at.ac.univie.mminf.qskos4j.issues.skosintegrity.UndefinedSkosResources;
import at.ac.univie.mminf.qskos4j.progress.IProgressMonitor;
import at.ac.univie.mminf.qskos4j.progress.StubProgressMonitor;
import at.ac.univie.mminf.qskos4j.util.vocab.RepositoryBuilder;
import org.openrdf.OpenRDFException;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import java.io.File;
import java.io.IOException;
import java.util.*;
/**
* Main class intended for easy interaction with qSKOS. On instantiation an in-memory ("local") repository
* containing the passed controlled vocabulary is created which can be queried by calling the methods of this class.
*
* @author christian
*
*/
public class QSkos {
/**
* Delay to avoid flooding "external" sources. This is used, e.g., when dereferencing lots of links or sending
* many queryies to a SPARQL endpoint
*/
private final static int EXT_ACCESS_MILLIS = 1500;
private String baseURI;
private LanguageCoverage languageCoverage;
private BrokenLinks brokenLinks;
private InvolvedConcepts involvedConcepts;
private AuthoritativeConcepts authoritativeConcepts;
private MissingInLinks missingInLinks;
private ConceptSchemes conceptSchemes;
private HttpURIs httpURIs;
private List<Issue> registeredIssues = new ArrayList<Issue>();
public QSkos(File file) throws OpenRDFException, IOException {
this();
RepositoryBuilder repositoryBuilder = new RepositoryBuilder();
Repository repo = repositoryBuilder.setUpFromFile(file, null, null);
setRepositoryConnection(repo.getConnection());
}
public QSkos() {
addStatisticalIssues();
addAnalyticalIssues();
addSkosIntegrityIssues();
addSlowIssues();
setProgressMonitor(new StubProgressMonitor());
}
private void addStatisticalIssues() {
involvedConcepts = new InvolvedConcepts();
authoritativeConcepts = new AuthoritativeConcepts(involvedConcepts);
authoritativeConcepts.setBaseURI(baseURI);
conceptSchemes = new ConceptSchemes();
httpURIs = new HttpURIs();
languageCoverage = new LanguageCoverage(involvedConcepts);
registeredIssues.add(involvedConcepts);
registeredIssues.add(authoritativeConcepts);
registeredIssues.add(new LexicalRelations(involvedConcepts));
registeredIssues.add(new SemanticRelations());
registeredIssues.add(new AggregationRelations());
registeredIssues.add(conceptSchemes);
registeredIssues.add(new at.ac.univie.mminf.qskos4j.issues.count.Collections());
registeredIssues.add(httpURIs);
}
private void addAnalyticalIssues() {
HierarchyGraphBuilder hierarchyGraphBuilder = new HierarchyGraphBuilder();
registeredIssues.add(new EmptyLabeledResources());
registeredIssues.add(new OmittedOrInvalidLanguageTags());
registeredIssues.add(new IncompleteLanguageCoverage(languageCoverage));
registeredIssues.add(new UndocumentedConcepts(authoritativeConcepts));
registeredIssues.add(new NoCommonLanguages(languageCoverage));
registeredIssues.add(new MissingLabels(authoritativeConcepts, conceptSchemes));
registeredIssues.add(new OverlappingLabels(involvedConcepts));
registeredIssues.add(new OrphanConcepts(involvedConcepts));
registeredIssues.add(new DisconnectedConceptClusters(involvedConcepts));
registeredIssues.add(new HierarchicalCycles(hierarchyGraphBuilder));
registeredIssues.add(new ValuelessAssociativeRelations());
registeredIssues.add(new SolelyTransitivelyRelatedConcepts());
registeredIssues.add(new OmittedTopConcepts(conceptSchemes));
registeredIssues.add(new TopConceptsHavingBroaderConcepts());
registeredIssues.add(new HierarchicalRedundancy(hierarchyGraphBuilder));
registeredIssues.add(new MappingRelationsMisuse(authoritativeConcepts));
registeredIssues.add(new ReflexivelyRelatedConcepts(authoritativeConcepts));
registeredIssues.add(new AmbiguousNotationReferences(authoritativeConcepts));
registeredIssues.add(new UnprintableCharactersInLabels(authoritativeConcepts));
registeredIssues.add(new MissingOutLinks(authoritativeConcepts));
registeredIssues.add(new UndefinedSkosResources());
registeredIssues.add(new UnidirectionallyRelatedConcepts(authoritativeConcepts));
registeredIssues.add(new HttpUriSchemeViolations());
registeredIssues.add(new RelationClashes(hierarchyGraphBuilder));
registeredIssues.add(new MappingClashes());
}
private void addSkosIntegrityIssues() {
ResourceLabelsCollector resourceLabelsCollector = new ResourceLabelsCollector();
registeredIssues.add(new InconsistentPrefLabels(resourceLabelsCollector));
registeredIssues.add(new DisjointLabelsViolations(resourceLabelsCollector));
}
private void addSlowIssues() {
brokenLinks = new BrokenLinks(httpURIs);
brokenLinks.setExtAccessDelayMillis(EXT_ACCESS_MILLIS);
registeredIssues.add(brokenLinks);
missingInLinks = new MissingInLinks(authoritativeConcepts);
missingInLinks.setQueryDelayMillis(EXT_ACCESS_MILLIS);
registeredIssues.add(missingInLinks);
}
public List<Issue> getAllIssues() {
return registeredIssues;
}
public Collection<Issue> getIssues(String commaSeparatedIssueIDs)
{
if (commaSeparatedIssueIDs == null || commaSeparatedIssueIDs.isEmpty()) {
return Collections.emptySet();
}
Collection<Issue> issues = new ArrayList<Issue>();
StringTokenizer tokenizer = new StringTokenizer(commaSeparatedIssueIDs, ",");
while (tokenizer.hasMoreElements()) {
issues.add(findIssue(tokenizer.nextToken().trim()));
}
return issues;
}
private Issue findIssue(String issueId) {
for (Issue issue : registeredIssues) {
if (issue.getId().equalsIgnoreCase(issueId)) {
return issue;
}
}
throw new UnknownIssueIdException(issueId, generateSupportedIssueIdList());
}
private String generateSupportedIssueIdList() {
String supportedIssueIds = "";
Iterator<Issue> allIssuesIt = getAllIssues().iterator();
while (allIssuesIt.hasNext()) {
supportedIssueIds += allIssuesIt.next().getId() + (allIssuesIt.hasNext() ? ", " : "");
}
return supportedIssueIds;
}
public void setRepositoryConnection(RepositoryConnection repCon) {
for (Issue issue : registeredIssues) {
issue.setRepositoryConnection(repCon);
}
}
/**
* Set an IProgressMonitor that is notified on changes in the evaluation progress for every managed issues.
* @param progressMonitor monitor instance to be notified
*/
public void setProgressMonitor(IProgressMonitor progressMonitor) {
for (Issue issue : registeredIssues) {
issue.setProgressMonitor(progressMonitor);
}
}
/**
* Sets a delay time in milliseconds that must pass between accessing an external resource. This is intended to
* avoid flooding of, e.g., vocabulary hosts or SPARQL endpoints with HTTP requests.
*
* @param delayMillis delay time in milliseconds
*/
@SuppressWarnings("unused")
public void setExtAccessDelayMillis(int delayMillis) {
missingInLinks.setQueryDelayMillis(delayMillis);
brokenLinks.setExtAccessDelayMillis(delayMillis);
}
/**
* Some methods in this class support investigating only a subset of the vocabulary and extrapolate the results
* to shorten evaluation time. Works for, e.g., finding broken links.
*
* @param subsetSizePercent percentage of the total resources to investigate.
*/
public void setSubsetSize(Float subsetSizePercent) {
missingInLinks.setSubsetSize(subsetSizePercent);
brokenLinks.setSubsetSize(subsetSizePercent);
}
/**
* Sets a string that is used to identify if an URI is authoritative. This is required to, e.g., find all
* out-links to distinguish between URIs in the vocabulary namespace and other resources on the Web.
*
* @param authResourceIdentifier a string, usually a substring of an URI in the vocabulary's namespace,
* that uniquely identifies an authoritative URI.
*/
public void setAuthResourceIdentifier(String authResourceIdentifier) {
authoritativeConcepts.setAuthResourceIdentifier(authResourceIdentifier);
}
@SuppressWarnings("unused")
public void setBaseURI(String baseURI) {
this.baseURI = baseURI;
}
public void addSparqlEndPoint(String endpointUrl) throws OpenRDFException {
missingInLinks.addSparqlEndPoint(endpointUrl);
}
}