package org.gbif.occurrence.processor.interpreting;
import org.gbif.api.model.common.MediaObject;
import org.gbif.api.model.occurrence.Occurrence;
import org.gbif.api.model.occurrence.VerbatimOccurrence;
import org.gbif.api.vocabulary.Extension;
import org.gbif.api.vocabulary.OccurrenceIssue;
import org.gbif.common.parsers.MediaParser;
import org.gbif.common.parsers.UrlParser;
import org.gbif.common.parsers.core.OccurrenceParseResult;
import org.gbif.dwc.terms.AcTerm;
import org.gbif.dwc.terms.DcTerm;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.Term;
import org.gbif.dwc.terms.Terms;
import java.net.URI;
import java.time.LocalDate;
import java.time.temporal.TemporalAccessor;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.beust.jcommander.internal.Lists;
import com.beust.jcommander.internal.Maps;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Range;
import static org.gbif.common.parsers.date.TemporalAccessorUtils.toDate;
/**
* Interprets multi media extension records.
*/
public class MultiMediaInterpreter {
private static final MediaParser MEDIA_PARSER = MediaParser.getInstance();
// Order is important in case more than one extension is provided. The order will define the precedence.
private static final Set<Extension> SUPPORTED_MEDIA_EXTENSIONS = ImmutableSet.of(
Extension.MULTIMEDIA, Extension.AUDUBON, Extension.IMAGE);
/**
* Private constructor.
*/
private MultiMediaInterpreter() {
//hidden constructor
}
public static void interpretMedia(VerbatimOccurrence verbatim, Occurrence occ) {
//the order is important since we will keep the first object that appears for each URI
List<MediaObject> mediaList = Lists.newLinkedList();
List<URI> mediaUri = Lists.newLinkedList();
// handle possible multimedia extensions first
final Extension mediaExt = getMultimediaExtension(verbatim.getExtensions().keySet());
if (mediaExt != null) {
for (Map<Term, String> rec : verbatim.getExtensions().get(mediaExt)) {
//For AUDUBON, we use accessURI over identifier
//TODO handle AUDUBON in its own method
URI uri = UrlParser.parse(Terms.getValueOfFirst(rec, AcTerm.accessURI, DcTerm.identifier));
URI link = UrlParser.parse(Terms.getValueOfFirst(rec, DcTerm.references, AcTerm.furtherInformationURL,
AcTerm.attributionLinkURL));
// link or media uri must exist
if (uri != null || link != null) {
MediaObject m = new MediaObject();
m.setIdentifier(uri);
m.setReferences(link);
m.setTitle(rec.get(DcTerm.title));
m.setDescription(Terms.getValueOfFirst(rec, DcTerm.description, AcTerm.caption));
m.setLicense(Terms.getValueOfFirst(rec, DcTerm.license, DcTerm.rights));
m.setPublisher(rec.get(DcTerm.publisher));
m.setContributor(rec.get(DcTerm.contributor));
m.setSource(Terms.getValueOfFirst(rec, DcTerm.source, AcTerm.derivedFrom));
m.setAudience(rec.get(DcTerm.audience));
m.setRightsHolder(rec.get(DcTerm.rightsHolder));
m.setCreator(rec.get(DcTerm.creator));
m.setFormat(MEDIA_PARSER.parseMimeType(rec.get(DcTerm.format)));
if (rec.containsKey(DcTerm.created)) {
Range<LocalDate> validRecordedDateRange = Range.closed(TemporalInterpreter.MIN_LOCAL_DATE, LocalDate.now());
OccurrenceParseResult<TemporalAccessor> parsed = TemporalInterpreter.interpretLocalDate(rec.get(DcTerm.created),
validRecordedDateRange, OccurrenceIssue.MULTIMEDIA_DATE_INVALID);
m.setCreated(toDate(parsed.getPayload()));
occ.getIssues().addAll(parsed.getIssues());
}
MEDIA_PARSER.detectType(m);
mediaList.add(m);
mediaUri.add(getPreferredURI(m));
} else {
occ.getIssues().add(OccurrenceIssue.MULTIMEDIA_URI_INVALID);
}
}
}
// media via core term
if (verbatim.hasVerbatimField(DwcTerm.associatedMedia)) {
for (URI uri : UrlParser.parseUriList(verbatim.getVerbatimField(DwcTerm.associatedMedia))) {
if (uri == null) {
occ.getIssues().add(OccurrenceIssue.MULTIMEDIA_URI_INVALID);
} else {
// only try to build the object if we don't already got it from the extension
if(!mediaUri.contains(uri)) {
MediaObject m = new MediaObject();
m.setIdentifier(uri);
MEDIA_PARSER.detectType(m);
mediaList.add(m);
}
}
}
}
// make sure information is not given several times for the same image
occ.setMedia(deduplicateMedia(mediaList));
}
/**
* Return the first multimedia Extension supported (SUPPORTED_MEDIA_EXTENSIONS).
*
* @param recordExtension
* @return First media Extension found or null if not found
*/
private static Extension getMultimediaExtension(Set<Extension> recordExtension){
for(Extension ext: SUPPORTED_MEDIA_EXTENSIONS){
if(recordExtension.contains(ext)){
return ext;
}
}
return null;
}
/**
* We can get file uris or weblinks. Prefer file URIs as they clearly identify a single image
* @param mediaObject
* @return
*/
private static URI getPreferredURI(MediaObject mediaObject){
return mediaObject.getIdentifier() != null ? mediaObject.getIdentifier() : mediaObject.getReferences();
}
/**
* Merges media records if the same image URL or link is given several times.
* Remove any media that has not either a file or webpage uri.
* @return a new list
*/
private static List<MediaObject> deduplicateMedia(List<MediaObject> mediaList) {
Map<String, MediaObject> media = Maps.newLinkedHashMap();
for (MediaObject m : mediaList) {
URI uri = getPreferredURI(m);
if (uri != null) {
String url = uri.toString();
if (!media.containsKey(url)) {
media.put(url, m);
}
}
}
return Lists.newArrayList(media.values());
}
}