/*
* Copyright 2015 Trento Rise (trentorise.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.trentorise.opendata.jackan.dcat;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.trentorise.opendata.jackan.CkanClient;
import eu.trentorise.opendata.jackan.model.CkanResource;
import eu.trentorise.opendata.commons.Dict;
import eu.trentorise.opendata.traceprov.dcat.DcatDistribution;
import java.util.Locale;
import java.util.logging.Level;
import com.google.common.annotations.Beta;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.common.collect.ImmutableMap;
import eu.trentorise.opendata.commons.TodUtils;
import eu.trentorise.opendata.commons.PeriodOfTime;
import static eu.trentorise.opendata.commons.validation.Preconditions.checkNotEmpty;
import static eu.trentorise.opendata.commons.TodUtils.isNotEmpty;
import eu.trentorise.opendata.jackan.exceptions.JackanException;
import eu.trentorise.opendata.jackan.exceptions.JackanNotFoundException;
import eu.trentorise.opendata.jackan.model.CkanDataset;
import eu.trentorise.opendata.jackan.model.CkanTag;
import eu.trentorise.opendata.traceprov.TraceProvModule;
import eu.trentorise.opendata.traceprov.dcat.DcatDataset;
import eu.trentorise.opendata.traceprov.dcat.FoafAgent;
import eu.trentorise.opendata.traceprov.dcat.SkosConcept;
import eu.trentorise.opendata.traceprov.dcat.SkosConceptScheme;
import eu.trentorise.opendata.traceprov.dcat.VCard;
import eu.trentorise.opendata.traceprov.geojson.Feature;
import eu.trentorise.opendata.traceprov.geojson.GeoJson;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import javax.annotation.Nullable;
/**
* Factory to generate Dcat objects from Ckan ones. Conversion is done according
* to <a href=
* "https://github.com/ckan/ckanext-dcat#rdf-dcat-to-ckan-dataset-mapping"
* target="_blank">this mapping </a> in Ckanext-dcat repository. In most cases
* this mapping is deliberately a loose one, for instance, it does not try to
* link the DCAT publisher property with a CKAN dataset author, maintainer or
* organization, as the link between them is not straight-forward and may depend
* on a particular instance needs.
*
* To extract more stuff during conversion, you can use
* {@link GreedyDcatFactory} or extend this class and override the extract*
* and/or postProcess* methods.
*
* @author David Leoni
* @since 0.4.1
*/
public class DcatFactory {
protected static final String ISSUED = "issued";
protected static final String MODIFIED = "modified";
protected static final String DESCRIPTION = "description";
protected static final String URI_FIELD = "uri";
protected static final String TITLE = "title";
private Logger logger;
private ObjectMapper objectMapper;
/**
* Creates a factory with default configuration.
*/
public DcatFactory() {
this.logger = Logger.getLogger(DcatFactory.class.getName());
this.objectMapper = new ObjectMapper();
TraceProvModule.registerModulesInto(this.objectMapper);
}
/**
* Returns internal logger
*/
protected Logger getLogger() {
return logger;
}
/**
* Sets internal logger
*/
protected void setLogger(Logger logger) {
this.logger = logger;
}
/**
* Returns internal object mapper
*/
protected ObjectMapper getObjectMapper() {
return objectMapper;
}
/**
* Sets internal object mapper, registering also required modules of
* traceprov
*/
protected void setObjectMapper(ObjectMapper objectMapper) {
this.objectMapper = objectMapper;
TraceProvModule.registerModulesInto(this.objectMapper);
}
/**
* Formats languages list so they can be put into a ckan extras field as
* string (i.e. "[\"ca\", \"en\", \"es\"]")
*
* @throws JackanException
* on error
*/
protected String formatLanguages(Iterable<Locale> locales) {
try {
return objectMapper.writeValueAsString(locales);
} catch (Exception ex) {
throw new JackanException("Couldn't serialize locales! " + locales, ex);
}
}
/**
* i.e. "[\"ca\", \"en\", \"es\"]"
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected List<Locale> extractLanguages(CkanDataset dataset) {
String string = extractFieldAsNonEmptyString(dataset, "language");
try {
return objectMapper.readValue(string, new TypeReference<List<Locale>>() {
});
} catch (Exception ex) {
throw new JackanException("Couldn't deserialize locales: " + string, ex);
}
}
/**
* Like {@link #extractFieldAsString(CkanDataset, String)} but also checks
* for trimmed non-emptiness.
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractFieldAsNonEmptyString(CkanDataset dataset, String field) {
String ret = extractFieldAsString(dataset, field).trim();
if (ret.isEmpty()) {
throw new JackanNotFoundException("Couldn't find valid non-empty field " + field + " in CkanDataset");
} else {
return ret;
}
}
/**
* Like {@link #extractFieldAsString(CkanResource, String)} but also checks
* for trimmed non-emptiness.
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractFieldAsNonEmptyString(CkanResource resource, String field) {
String ret = extractFieldAsString(resource, field).trim();
if (ret.isEmpty()) {
throw new JackanNotFoundException("Couldn't find valid non-empty field " + field + " in CkanResource!");
} else {
return ret;
}
}
/**
* Searches a field in {@link CkanDataset#getOthers() } and then in
* {@link CkanDataset#getExtras() }. If search fails throws
* JackanNotFoundException, even if field is found but has null value.
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractFieldAsString(CkanDataset dataset, String field) {
checkNotNull(dataset);
checkNotEmpty(field, "Invalid field to search!");
String candidateString = null;
if (dataset.getOthers() != null) {
Object candidateObject = dataset.getOthers().get(field);
if (candidateObject instanceof String) {
candidateString = (String) candidateObject;
}
}
if (candidateString == null && dataset.getExtras() != null) {
candidateString = dataset.getExtrasAsHashMap().get(field);
}
if (candidateString == null) {
throw new JackanNotFoundException("Can't find string field " + field + "!");
}
return candidateString;
}
/**
* Searches a field in {@link CkanResource#getOthers() }. If search fails
* throws JackanNotFoundException, even if field is found but has null value.
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractFieldAsString(CkanResource resource, String field) {
checkNotNull(resource);
checkNotEmpty(field, "Invalid field to search!");
String candidateString = null;
if (resource.getOthers() != null) {
Object candidateObject = resource.getOthers().get(field);
if (candidateObject instanceof String) {
candidateString = (String) candidateObject;
}
}
if (candidateString == null) {
throw new JackanNotFoundException("Can't find string field " + field + "!");
}
return candidateString;
}
/**
* Searches a field in {@link CkanDataset#getOthers() } and then in
* {@link CkanDataset#getExtras() }. If search fails throws
* JackanNotFoundException, even if field is found but has null value.
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected Object extractFieldAsObject(CkanDataset dataset, String field) {
checkNotNull(dataset);
checkNotEmpty(field, "Invalid field to search!");
Object candidateObject = null;
if (dataset.getOthers() != null) {
candidateObject = dataset.getOthers().get(field);
}
if (candidateObject == null && dataset.getExtras() != null) {
candidateObject = dataset.getExtrasAsHashMap().get(field);
}
if (candidateObject == null) {
throw new JackanNotFoundException("Can't find object field " + field + "!");
}
return candidateObject;
}
/**
* Tries to extract a string field from a CkanDataset and casts it to target
* type
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected <T> T extractField(CkanDataset dataset, String field, TypeReference<T> toType) {
String json = extractFieldAsNonEmptyString(dataset, field);
try {
return objectMapper.readValue(json, toType);
} catch (Exception ex) {
throw new JackanException("Error while extracting field " + field + " into type " + toType.toString(), ex);
}
}
/**
* @see #extractField(eu.trentorise.opendata.jackan.model.CkanDataset,
* java.lang.String, com.fasterxml.jackson.core.type.TypeReference)
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected <T> T extractField(CkanDataset dataset, String field, Class<T> toClass) {
String json = extractFieldAsNonEmptyString(dataset, field);
try {
return objectMapper.readValue(json, toClass);
} catch (Exception ex) {
throw new JackanException("Error while extracting field " + field + " into class " + toClass, ex);
}
}
/**
* Formats CKAN timestamp according to ISO 8601. Differently from CKAN, it
* adds a 'Z' for clarity.
*/
protected String formatTimestamp(Timestamp timestamp) {
return CkanClient.formatTimestamp(timestamp) + "Z";
}
/**
* Returns a GeoJson made only with textual, possibly low-quality
* information.
*
* @param name
* the name of the geometry, if not known use empty string
* @param description
* the description of the geometry, if not known use empty string
* @param id
* the Jsonld id for the geometric object
* @param spatialDump
* the geometry in any format, could even be an unparseable json
* or xml dump
*/
private GeoJson calcGeoJson(String name, String description, String id, String spatialDump) {
if (name.isEmpty() && description.isEmpty()) {
throw new JackanNotFoundException("Could not find valid dataset spatial field nor natural language name!");
}
if (name.isEmpty() && !description.isEmpty()) {
return Feature.builder().setProperties(ImmutableMap.of("description", spatialDump)).setId(id).build();
}
logger.log(Level.INFO, "Putting found natural language name in Feature.properties['name']");
if (!name.isEmpty() && description.isEmpty()) {
return Feature.ofName(name).withId(id);
}
if (!name.isEmpty() && !description.isEmpty()) {
return Feature.builder().setProperties(ImmutableMap.of("name", name, "description", spatialDump)).setId(id)
.build();
}
throw new JackanException(
"Internal error, reached a supposedly unreachable place while extracting spatial attribute from CkanDataset.");
}
/**
* @throws JackanNotFoundException
* if spatial is not found
* @throws JackanException
* for other errors.
*/
protected GeoJson extractSpatial(CkanDataset dataset) {
String name = "";
String description = "";
String id = "";
String spatial = "";
@Nullable
GeoJson geoJson = null;
try {
id = extractFieldAsNonEmptyString(dataset, "spatial_uri").trim();
logger.info("Found dataset 'spatial_uri' field, will set it to '@id' field of GeoJSON-LD");
} catch (JackanNotFoundException ex) {
logger.info("Couldn't find dataset 'spatial_uri' field");
}
try {
name = extractFieldAsNonEmptyString(dataset, "spatial_text").trim();
} catch (JackanNotFoundException ex) {
logger.info(
"Couldn't find dataset 'spatial_text' field (should hold the natural language name of the place)");
}
try {
spatial = extractFieldAsNonEmptyString(dataset, "spatial");
} catch (JackanNotFoundException ex) {
logger.info("Could not find dataset 'spatial' field");
}
if (!spatial.isEmpty()) {
try {
geoJson = objectMapper.readValue(spatial, GeoJson.class);
} catch (Exception ex) {
logger.log(Level.SEVERE,
"Error while parsing dataset 'spatial' field as GeoJson, will put the problematic json into Feature.properties['description'] ",
ex);
description = spatial;
}
}
if (geoJson != null) {
return geoJson;
} else {
return calcGeoJson(name, description, id, spatial);
}
}
/**
* Notice this extractor only looks for 'theme' field in dataset special
* 'others' and then 'extras', and doesn't fall back on groups. In case
* nothing is found, just returns an empty collection.
*
* @param locale
* the locale of the theme names. If unknown pass
* {@link Locale#ROOT}
* @throws JackanNotFoundException
* if needed fields are missing.
* @throws JackanException
* on generic error
*/
protected List<SkosConcept> extractThemes(CkanDataset dataset, Locale locale, String catalogUrl) {
List<SkosConcept> ret = new ArrayList();
List<String> candidateLabels;
try {
candidateLabels = extractField(dataset, "theme", new TypeReference<List<String>>() {
});
} catch (JackanNotFoundException ex) {
return ret;
}
for (String s : candidateLabels) {
String ts = s == null ? "" : s.trim();
if (!ts.isEmpty()) {
String uri;
Dict prefLabel;
try {
java.net.URI.create(ts);
uri = ts;
prefLabel = Dict.of();
} catch (Exception ex) {
uri = "";
prefLabel = Dict.of(locale, ts);
}
ret.add(SkosConcept.of(SkosConceptScheme.of(), prefLabel, uri));
}
}
return ret;
}
/**
*
* @param catalogUrl
* i.e. http://dati.trentino.it
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractUri(CkanDataset dataset, String catalogUrl) {
String uri = "";
try {
uri = extractFieldAsNonEmptyString(dataset, URI_FIELD);
} catch (JackanNotFoundException ex) {
}
if (isTrimmedEmpty(uri)) {
if (!isTrimmedEmpty(dataset.getId())) {
return CkanClient.makeDatasetUrl(catalogUrl, dataset.getId());
} else {
throw new JackanNotFoundException("Couldn't find any valid dataset uri!");
}
} else {
return uri;
}
}
/**
* Returns a string with values trying to respect ISO 8601 format for time
* intervals: https://en.wikipedia.org/wiki/ISO_8601#Time_intervals
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected PeriodOfTime extractTemporal(CkanDataset dataset) {
String start = "";
String end = "";
try {
start = extractFieldAsNonEmptyString(dataset, "temporal_start").trim();
} catch (JackanNotFoundException ex) {
logger.info("Couldn't find valid dataset field 'temporal_start'");
}
try {
end = extractFieldAsNonEmptyString(dataset, "temporal_end").trim();
} catch (JackanNotFoundException ex) {
logger.info("Couldn't find valid dataset field 'temporal_end'");
}
if (start.isEmpty() && end.isEmpty()) {
throw new JackanNotFoundException("Couldn't find any valid temporal information!");
}
try {
return PeriodOfTime.of(start, end);
} catch (IllegalStateException ex) {
logger.info("Couldn't find valid ISO8061 temporal_start/end fields, storing raw string.'");
return PeriodOfTime.of(start + PeriodOfTime.SEP + end);
}
}
/**
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractIdentifier(CkanDataset dataset) {
try {
return extractFieldAsNonEmptyString(dataset, "identifier");
} catch (JackanNotFoundException ex) {
}
try {
return extractFieldAsNonEmptyString(dataset, "guid");
} catch (JackanNotFoundException ex) {
}
if (!isTrimmedEmpty(dataset.getId())) {
return dataset.getId().trim();
}
throw new JackanNotFoundException("Couldn't find any valid identifier in the dataset!");
}
/**
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractIssued(CkanDataset dataset) {
try {
return extractFieldAsNonEmptyString(dataset, ISSUED);
} catch (JackanNotFoundException ex) {
if (dataset.getMetadataCreated() != null) {
return CkanClient.formatTimestamp(dataset.getMetadataCreated());
}
}
throw new JackanNotFoundException("Couldn't find valid 'issued' field");
}
/**
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractModified(CkanDataset dataset) {
try {
return extractFieldAsString(dataset, MODIFIED);
} catch (JackanNotFoundException ex) {
if (dataset.getMetadataModified() != null) {
return CkanClient.formatTimestamp(dataset.getMetadataModified());
}
}
throw new JackanNotFoundException("Couldn't find valid 'modified' field");
}
/**
* Notice this extractor will mostly look for special dcat fields in
* dataset, without resorting to ckan group, organization or maintainer as
* fallback.
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected FoafAgent extractPublisher(CkanDataset dataset, Locale locale) {
FoafAgent.Builder pubBuilder = FoafAgent.builder();
try {
pubBuilder.setUri(extractFieldAsNonEmptyString(dataset, "publisher_uri").trim());
} catch (JackanNotFoundException ex) {
logger.info("Couldn't find valid field 'publisher_uri'");
}
try {
pubBuilder.setName(Dict.of(locale, extractFieldAsNonEmptyString(dataset, "publisher_name").trim()));
} catch (JackanNotFoundException ex) {
logger.info("Couldn't find valid field 'publisher_name'");
}
try {
pubBuilder.setMbox(extractFieldAsNonEmptyString(dataset, "publisher_email").trim());
} catch (JackanNotFoundException ex) {
logger.info("Couldn't find valid field 'publisher_email'");
String candidateTitle = "";
if (dataset.getOrganization() != null && dataset.getOrganization().getTitle() != null) {
candidateTitle = dataset.getOrganization().getTitle().trim();
}
if (candidateTitle.isEmpty()) {
logger.info("Couldn't find valid organization:title to use as publisher MBox");
} else {
pubBuilder.setMbox(candidateTitle);
}
}
try {
pubBuilder.setHomepage(extractFieldAsNonEmptyString(dataset, "publisher_url").trim());
} catch (JackanNotFoundException ex) {
logger.info("Couldn't find valid field 'publisher_url' for publisher homepage");
}
FoafAgent ret = pubBuilder.build();
if (ret.equals(FoafAgent.of())) {
throw new JackanNotFoundException("Couldn't find any valid field for a publisher!");
} else {
return ret;
}
}
/**
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected VCard extractContactPoint(CkanDataset dataset) {
VCard.Builder cpb = VCard.builder();
try {
cpb.setUri(extractFieldAsNonEmptyString(dataset, "contact_uri"));
} catch (JackanNotFoundException ex) {
logger.info("Couldn't find valid dataset contact uri, skipping it.");
}
String candidateContactName = "";
try {
candidateContactName = extractFieldAsNonEmptyString(dataset, "contact_name");
} catch (JackanNotFoundException ex) {
if (dataset.getMaintainer() != null && !dataset.getMaintainer().trim().isEmpty()) {
candidateContactName = dataset.getMaintainer().trim();
} else if (dataset.getAuthor() != null && !dataset.getAuthor().trim().isEmpty()) {
candidateContactName = dataset.getAuthor().trim();
}
}
if (candidateContactName.isEmpty()) {
logger.info("Couldn't find valid dataset contact fn, skipping it.");
} else {
cpb.setFn(candidateContactName);
}
String candidateContactEmail = "";
try {
candidateContactEmail = extractFieldAsNonEmptyString(dataset, "contact_email");
} catch (JackanNotFoundException ex) {
if (dataset.getMaintainer() != null && !dataset.getMaintainer().trim().isEmpty()) {
candidateContactEmail = dataset.getMaintainerEmail().trim();
} else if (dataset.getAuthor() != null && !dataset.getAuthor().trim().isEmpty()) {
candidateContactEmail = dataset.getAuthorEmail().trim();
}
}
if (candidateContactEmail.isEmpty()) {
logger.info("Couldn't find valid dataset contact email, skipping it.");
} else {
cpb.setEmail(candidateContactEmail);
}
VCard ret = cpb.build();
if (ret.equals(VCard.of())) {
throw new JackanNotFoundException("Couldn't find any valid contact info in dataset!");
} else {
return ret;
}
}
/**
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected List<String> extractKeywords(CkanDataset dataset) {
List<String> ret = new ArrayList();
if (dataset.getTags() == null) {
throw new JackanNotFoundException("Found null tags!");
} else {
for (CkanTag tag : dataset.getTags()) {
if (tag != null && !isTrimmedEmpty(tag.getName())) {
ret.add(tag.getName().trim());
}
}
}
return ret;
}
/**
* Returns a new string with spaces removed at begin and end. If provided
* string is null returns the empty string.
*/
protected static String trim(@Nullable String s) {
if (s == null) {
return "";
} else {
return s.trim();
}
}
/**
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected Dict extractTitle(CkanDataset dataset, Locale locale) {
String s = trim(dataset.getTitle());
if (s.isEmpty()) {
throw new JackanNotFoundException("Couldn't find valid title!");
} else {
return Dict.of(locale, s);
}
}
/**
* @param locale
* if unknown pass {@link Locale#ROOT}
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected Dict extractDescription(CkanDataset dataset, Locale locale) {
String s = trim(dataset.getNotes());
if (s.isEmpty()) {
throw new JackanNotFoundException("Couldn't find valid notes!");
} else {
return Dict.of(locale, s);
}
}
protected String extractAccrualPeriodicity(CkanDataset dataset) {
// todo frequency would probably need further checking
return extractFieldAsNonEmptyString(dataset, "frequency");
}
protected String extractLandingPage(CkanDataset dataset) {
if (isTrimmedEmpty(dataset.getUrl())) {
throw new JackanNotFoundException("Couldn't find valid url field in dataset!");
} else {
return dataset.getUrl();
}
}
protected void logCantFind(String clazz, String attribute) {
logger.log(Level.INFO, "Couldn''t find any valid " + clazz + " {0}, skipping it", attribute);
}
protected void logCantExtract(String clazz, String attribute, Throwable ex) {
logger.log(Level.SEVERE, "Error while extracting " + clazz + " " + attribute + ", skipping it", ex);
}
protected void logDatasetCantFind(String attribute) {
logCantFind("dataset", attribute);
}
protected void logDatasetCantExtract(String attribute, Throwable ex) {
logCantExtract("dataset", attribute, ex);
}
/**
* Converts a CkanDataset to a DcatDataset. If the dataset contains erroneus
* fields the converter should just skip them without throwing eu.trentorise.opendata.commons.exceptions.
*
* @param dataset
* must be non null, but it may have missing or null fields.
* @param catalogUrl
* non-null catalog url, i.e. "http://dati.trentino.it" or empty
* one ""
* @param locale
* the locale of metadata text. If locale is unknown, use
* {@link Locale#ROOT}. todo write about data locale
*/
@Beta
public DcatDataset makeDataset(CkanDataset dataset, String catalogUrl, Locale locale) {
logger.warning(
"CONVERSION FROM CKAN DATASET TO DCAT DATASET IS STILL EXPERIMENTAIL, IT MIGHT BE INCOMPLETE!!!");
TodUtils.checkNotEmpty(catalogUrl, "invalid dcat dataset catalog URL");
checkNotNull(locale, "invalid dcat dataset locale");
checkNotNull(dataset, "Invalid dataset!");
String sanitizedCatalogUrl = TodUtils.removeTrailingSlash(catalogUrl);
String sanitizedId = dataset.getId() == null ? "" : dataset.getId();
String sanitizedLicenceId = dataset.getLicenseId() == null ? "" : dataset.getLicenseId();
logger.warning(
"TODO - CONVERSION FROM CKAN DATASET TO DCAT DATASET IS STILL EXPERIMENTAL, IT MIGHT BE INCOMPLETE!!!");
DcatDataset.Builder ddb = DcatDataset.builder();
try {
ddb.setAccrualPeriodicity(extractAccrualPeriodicity(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("accrualPeriodicity");
} catch (Exception ex) {
logDatasetCantExtract("accrualPeriodicity", ex);
}
try {
ddb.setContactPoint(extractContactPoint(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("contactPoint");
} catch (Exception ex) {
logDatasetCantExtract("contactPoint", ex);
}
try {
ddb.setDescription(extractDescription(dataset, locale));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("description");
} catch (Exception ex) {
logDatasetCantExtract("description", ex);
}
if (dataset.getResources() != null) {
for (CkanResource cr : dataset.getResources()) {
try {
ddb.addDistributions(
makeDistribution(cr, sanitizedCatalogUrl, sanitizedId, sanitizedLicenceId, locale));
} catch (Exception ex) {
logDatasetCantExtract("distribution", ex);
}
}
}
try {
ddb.setIdentifier(extractIdentifier(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("identifier");
} catch (Exception ex) {
logDatasetCantExtract("identifier", ex);
}
try {
ddb.setIssued(extractIssued(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind(ISSUED);
} catch (Exception ex) {
logDatasetCantExtract(ISSUED, ex);
}
try {
ddb.setKeywords(extractKeywords(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("keywords");
} catch (Exception ex) {
logDatasetCantExtract("keywords", ex);
}
try {
ddb.setLandingPage(extractLandingPage(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("landingPage");
} catch (Exception ex) {
logDatasetCantExtract("landingPage", ex);
}
try {
ddb.setLanguages(extractLanguages(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("language");
if (!Locale.ROOT.equals(locale)) {
logger.log(Level.INFO, "Setting language field to provided locale {0}", locale);
ddb.addLanguages(locale);
}
} catch (Exception ex) {
logDatasetCantExtract("language", ex);
if (!Locale.ROOT.equals(locale)) {
logger.log(Level.INFO, "Setting language field to provided locale {0}", locale);
ddb.addLanguages(locale);
}
}
try {
ddb.setModified(extractModified(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind(MODIFIED);
} catch (Exception ex) {
logDatasetCantExtract(MODIFIED, ex);
}
try {
ddb.setPublisher(extractPublisher(dataset, locale));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("publisher");
} catch (Exception ex) {
logDatasetCantExtract("publisher", ex);
}
try {
ddb.setSpatial(extractSpatial(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("spatial");
} catch (Exception ex) {
logDatasetCantExtract("spatial", ex);
}
try {
ddb.setTemporal(extractTemporal(dataset));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("temporal");
} catch (Exception ex) {
logDatasetCantExtract("temporal", ex);
}
try {
ddb.setThemes(extractThemes(dataset, locale, sanitizedCatalogUrl));
} catch (JackanNotFoundException ex) {
logDatasetCantFind("theme");
} catch (Exception ex) {
logDatasetCantExtract("theme", ex);
}
try {
ddb.setTitle(extractTitle(dataset, locale));
} catch (JackanNotFoundException ex) {
logDatasetCantFind(TITLE);
} catch (Exception ex) {
logDatasetCantExtract(TITLE, ex);
}
try {
ddb.setUri(extractUri(dataset, sanitizedCatalogUrl));
} catch (JackanNotFoundException ex) {
logDatasetCantFind(URI_FIELD);
} catch (Exception ex) {
logDatasetCantExtract(URI_FIELD, ex);
}
postProcessDataset(ddb, catalogUrl, locale);
return ddb.build();
}
/**
* Post process the dataset builder after the extractions and prior to
* creating the immutable DcatDataset object. Override this method in case
* you want to perform consistency checks or reset some field.
*
* @param datasetBuilder
* @see #postProcessDistribution(eu.trentorise.opendata.traceprov.dcat.DcatDistribution.Builder,
* CkanResource, String, String, String, Locale)
*
*/
protected void postProcessDataset(DcatDataset.Builder datasetBuilder, String catalogUrl, Locale locale) {
}
protected void logDistribCantFind(String attribute) {
logCantFind("distribution", attribute);
}
protected void logDistribCantExtract(String attribute, Throwable ex) {
logCantExtract("distribution", attribute, ex);
}
/**
*
* @param resource
* @param catalogUrl
* i.e. http://dati.trentino.it
* @param datasetId
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractUri(CkanResource resource, String catalogUrl, String datasetId) {
String candidateUri = "";
try {
candidateUri = extractFieldAsString(resource, URI_FIELD).trim();
} catch (JackanNotFoundException ex) {
}
if (candidateUri.isEmpty()) {
if (isNotEmpty(catalogUrl) && isNotEmpty(datasetId) && isNotEmpty(resource.getId())) {
return CkanClient.makeResourceUrl(catalogUrl, datasetId, resource.getId());
} else {
throw new JackanNotFoundException("Couldn't find valid 'uri' for resource!");
}
} else {
return candidateUri;
}
}
/**
* Return true if the provided string is empty after getting trimmed.
*/
protected static boolean isTrimmedEmpty(@Nullable String s) {
return s == null || (s.trim().isEmpty());
}
/**
* Post processes the distribution builder after the extractions and prior
* to creating the immutable DcatDistribution object. Override this method
* in case you want to perform consistency checks or reset some field.
*
* @param distributionBuilder
* @see #postProcessDistribution(eu.trentorise.opendata.traceprov.dcat.DcatDistribution.Builder,
* CkanResource, String, String, String, Locale)
*
* @see #postProcessDataset(eu.trentorise.opendata.traceprov.dcat.DcatDataset.Builder,
* java.lang.String, java.util.Locale)
*/
protected void postProcessDistribution(DcatDistribution.Builder distributionBuilder, CkanResource resource,
String catalogURL, String datasetId, String license, Locale locale) {
}
/**
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractModified(CkanResource resource) {
return extractFieldAsString(resource, MODIFIED).trim();
}
/**
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractIssued(CkanResource resource) {
return extractFieldAsString(resource, ISSUED).trim();
}
/**
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractAccessUrl(CkanResource resource) {
if (!isTrimmedEmpty(resource.getUrl())) {
return resource.getUrl().trim();
} else {
throw new JackanNotFoundException("Couldn't find valid access url!");
}
}
/**
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractDownloadUrl(CkanResource resource) {
return extractFieldAsNonEmptyString(resource, "download_url");
}
/**
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected int extractByteSize(CkanResource resource) {
if (isTrimmedEmpty(resource.getSize())) {
throw new JackanNotFoundException("Couldn't find valid size in resource!");
}
try {
return Integer.parseInt(resource.getSize());
} catch (NumberFormatException ex) {
throw new JackanException("COULDN'T CONVERT CKAN RESOURCE SIZE TO DCAT! " + "REQUIRED AN INTEGER, FOUND "
+ resource.getSize() + " (ALTHOUGH STRINGS ARE VALID CKAN SIZES)", ex);
}
}
/**
* @param locale
* if unknown pass {@link Locale#ROOT}
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected Dict extractDescription(CkanResource dataset, Locale locale) {
String s = trim(dataset.getDescription());
if (s.isEmpty()) {
throw new JackanNotFoundException("Couldn't find valid description!");
} else {
return Dict.of(locale, s);
}
}
/**
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractFormat(CkanResource resource) {
if (isTrimmedEmpty(resource.getFormat())) {
throw new JackanNotFoundException("Couldn't find a valid format!");
} else {
return resource.getFormat().trim();
}
}
/**
* @param license
* value used if resource does not already have a license field.
* If unknown pass the empty string.
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractLicense(CkanResource resource, String license) {
try {
extractFieldAsNonEmptyString(resource, "license");
} catch (JackanNotFoundException ex) {
if (isNotEmpty(license)) {
return license;
} else {
throw new JackanNotFoundException("Couldn't find valid license in resource!", ex);
}
}
if (isTrimmedEmpty(license)) {
throw new JackanNotFoundException("Couldn't find a valid license!");
} else {
return license.trim();
}
}
/**
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractMediaType(CkanResource resource) {
if (isTrimmedEmpty(resource.getMimetype())) {
throw new JackanNotFoundException("Couldn't find a valid media type!");
} else {
return resource.getMimetype();
}
}
/**
*
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected String extractRights(CkanResource resource) {
return extractFieldAsNonEmptyString(resource, "rights");
}
/**
* @param locale
* if unknown pass {@link Locale#ROOT}
* @throws JackanNotFoundException
* when not found
* @throws JackanException
* on generic error
*/
protected Dict extractTitle(CkanResource resource, Locale locale) {
if (isTrimmedEmpty(resource.getName())) {
logger.info("Couldn't find valid distribution title, skipping it");
throw new JackanNotFoundException("Couldn't find a valid title!");
} else {
return Dict.of(locale, resource.getName().trim());
}
}
/**
* Converts a Ckan resource to a DcatDistribution. If the resource contains
* erroneus fields the converter should just skip them without throwing
* eu.trentorise.opendata.commons.exceptions.
*
*
* @param resource
* must be non null, but it may have missing or null fields.
* @param catalogURL
* catalog string, i.e. http://dati.trentino.it
* @param datasetIdOrName
* owner dataset alphanumerical id (i.e.
* fccc07ce-3750-4970-92fd-6a6f432b4466, preferred as stable) or
* dataset name (less preferred, as names can change over time)
* @param license
* A link to the license document under which the distribution is
* made available. For more info, see
* {@link eu.trentorise.opendata.traceprov.dcat.ADcatDistribution#getLicense() }
* . If license is unknown, use an empty string.
* @param locale
* The language of the distribution. if unknown use
* {@link Locale#ROOT}
*
*/
@Beta
public DcatDistribution makeDistribution(CkanResource resource, String catalogURL, String datasetIdOrName,
String license, Locale locale) {
logger.warning(
"CONVERSION FROM CKAN RESOURCE TO DCAT DISTRIBUTION IS STILL EXPERIMENTAIL, IT MIGHT BE INCOMPLETE!!!");
checkNotNull(resource, "invalid ckan resource");
checkNotEmpty(catalogURL, "invalid catalog URL");
checkNotEmpty(datasetIdOrName, "invalid dataset id");
checkNotNull(license, "invalid license");
String sanitizedCatalogUrl = TodUtils.removeTrailingSlash(catalogURL);
DcatDistribution.Builder ddb = DcatDistribution.builder();
try {
ddb.setUri(extractUri(resource, sanitizedCatalogUrl, datasetIdOrName));
} catch (JackanNotFoundException ex) {
logDistribCantFind(URI_FIELD);
} catch (Exception ex) {
logDistribCantExtract(URI_FIELD, ex);
}
try {
ddb.setAccessURL(extractAccessUrl(resource));
} catch (JackanNotFoundException ex) {
logDistribCantFind("accessURL");
} catch (Exception ex) {
logDistribCantExtract("accessURL", ex);
}
try {
ddb.setDownloadURL(extractDownloadUrl(resource));
} catch (JackanNotFoundException ex) {
logDistribCantFind("downloadURL");
} catch (Exception ex) {
logDistribCantExtract("downloadURL", ex);
}
try {
ddb.setByteSize(extractByteSize(resource));
} catch (JackanNotFoundException ex) {
logDistribCantFind("byteSize");
} catch (Exception ex) {
logDistribCantExtract("byteSize", ex);
}
ddb.setDatasetUri(CkanClient.makeDatasetUrl(sanitizedCatalogUrl, datasetIdOrName));
try {
ddb.setDescription(extractDescription(resource, locale));
} catch (JackanNotFoundException ex) {
logDistribCantFind("description");
} catch (Exception ex) {
logDistribCantExtract("description", ex);
}
try {
ddb.setFormat(extractFormat(resource));
} catch (JackanNotFoundException ex) {
logDistribCantFind("format");
} catch (Exception ex) {
logDistribCantExtract("format", ex);
}
try {
ddb.setIssued(extractIssued(resource));
} catch (JackanNotFoundException ex) {
logDistribCantFind(ISSUED);
} catch (Exception ex) {
logDistribCantExtract(ISSUED, ex);
}
try {
ddb.setLicense(extractLicense(resource, license));
} catch (JackanNotFoundException ex) {
logDistribCantFind("license");
} catch (Exception ex) {
logDistribCantExtract("license", ex);
}
try {
ddb.setModified(extractModified(resource));
} catch (JackanNotFoundException ex) {
logDistribCantFind(MODIFIED);
} catch (Exception ex) {
logDistribCantExtract(MODIFIED, ex);
}
try {
ddb.setMediaType(extractMediaType(resource));
} catch (JackanNotFoundException ex) {
logDistribCantFind("mediaType");
} catch (Exception ex) {
logDistribCantExtract("mediaType", ex);
}
try {
ddb.setRights(extractRights(resource));
} catch (JackanNotFoundException ex) {
logDistribCantFind("rights");
} catch (Exception ex) {
logDistribCantExtract("rights", ex);
}
try {
ddb.setTitle(extractTitle(resource, locale));
} catch (JackanNotFoundException ex) {
logDistribCantFind(TITLE);
} catch (Exception ex) {
logDistribCantExtract(TITLE, ex);
}
postProcessDistribution(ddb, resource, sanitizedCatalogUrl, datasetIdOrName, license, locale);
return ddb.build();
}
}