/** * Copyright (C) 2014 Cohesive Integrations, LLC (info@cohesiveintegrations.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package net.di2e.ecdr.search.transform.atom.response; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Serializable; import java.io.StringWriter; import java.net.URI; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import javax.activation.MimeType; import javax.xml.namespace.QName; import net.di2e.ecdr.api.queryresponse.SearchResponseTransformer; import net.di2e.ecdr.commons.CDRMetacard; import net.di2e.ecdr.commons.CDRMetacardType; import net.di2e.ecdr.commons.filter.config.AtomSearchResponseTransformerConfig; import net.di2e.ecdr.commons.filter.config.AtomSearchResponseTransformerConfig.AtomContentXmlWrapOption; import net.di2e.ecdr.search.transform.atom.constants.AtomResponseConstants; import net.di2e.ecdr.search.transform.atom.geo.AbderaConverter; import net.di2e.ecdr.search.transform.atom.response.security.SecurityMarkingParser; import org.apache.abdera.Abdera; import org.apache.abdera.ext.geo.Position; import org.apache.abdera.ext.opensearch.OpenSearchConstants; import org.apache.abdera.i18n.iri.IRI; import org.apache.abdera.model.Category; import org.apache.abdera.model.Content; import org.apache.abdera.model.Document; import org.apache.abdera.model.Element; import org.apache.abdera.model.Entry; import org.apache.abdera.model.Feed; import org.apache.abdera.model.Link; import org.apache.abdera.parser.Parser; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.ISODateTimeFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import ddf.catalog.data.Metacard; import ddf.catalog.data.Result; import ddf.catalog.data.impl.ResultImpl; import ddf.catalog.operation.QueryRequest; import ddf.catalog.operation.SourceResponse; import ddf.catalog.operation.impl.SourceResponseImpl; public class AtomResponseTransformer implements SearchResponseTransformer { private static final transient Logger LOGGER = LoggerFactory.getLogger( AtomResponseTransformer.class ); private static final String METADATA_ELEMENT_NAME = "Resource"; private static final DateTimeFormatter DATE_FORMATTER = ISODateTimeFormat.dateTimeParser(); private static final Abdera ABDERA = Abdera.getInstance(); private AtomSearchResponseTransformerConfig filterConfig = null; public AtomResponseTransformer( AtomSearchResponseTransformerConfig config ) { this.filterConfig = config; } @Override public SourceResponse processSearchResponse( InputStream inputStream, QueryRequest request, String siteName ) { List<Result> resultList = new ArrayList<Result>(); ClassLoader tccl = Thread.currentThread().getContextClassLoader(); Parser parser = null; Document<Feed> atomDoc; try { Thread.currentThread().setContextClassLoader( AtomResponseTransformer.class.getClassLoader() ); parser = ABDERA.getParser(); if ( LOGGER.isTraceEnabled() ) { StringWriter writer = new StringWriter(); try { IOUtils.copy( inputStream, writer ); LOGGER.trace( "Transforming the following atom feed into a DDF SourceResponse:{}{}", System.lineSeparator(), writer ); inputStream = IOUtils.toInputStream( writer.toString() ); } catch ( IOException e ) { LOGGER.trace( "Could not print out atom stream for log: {}", e.getMessage() ); } } atomDoc = parser.parse( new InputStreamReader( inputStream ) ); } finally { Thread.currentThread().setContextClassLoader( tccl ); } Feed feed = atomDoc.getRoot(); List<Entry> entries = feed.getEntries(); int size = entries.size(); for ( Entry entry : entries ) { if ( isValidEntry( entry ) ) { Metacard metacard = entryToMetacard( entry, siteName ); resultList.add( metacardToResult( entry, metacard ) ); } else { LOGGER.debug( "Skipping invalid entry: {}", entry ); size--; } } long totalResults = size; Element totalResultsElement = atomDoc.getRoot().getExtension( OpenSearchConstants.TOTAL_RESULTS ); if ( totalResultsElement != null ) { try { totalResults = Long.parseLong( totalResultsElement.getText() ); } catch ( NumberFormatException e ) { LOGGER.warn( "Received invalid number of results from Atom response [" + totalResultsElement.getText() + "]", e ); } } Map<String, Serializable> responseProperties = null; return new SourceResponseImpl( request, responseProperties, resultList, totalResults ); } private Metacard entryToMetacard( Entry entry, String siteName ) { CDRMetacard metacard = new CDRMetacard( CDRMetacardType.CDR_METACARD ); String id = entry.getIdElement().getText(); // id may be formatted catalog:id:<id>, so we parse out the <id> if ( StringUtils.isNotBlank( id ) && (id.startsWith( "urn:uuid:" ) || id.startsWith( "urn:catalog:id:" )) ) { id = id.substring( id.lastIndexOf( ':' ) + 1 ); } metacard.setId( id ); metacard.setSourceId( siteName ); List<Category> categories = entry.getCategories(); if ( categories != null && !categories.isEmpty() ) { Category category = categories.get( 0 ); metacard.setContentTypeName( category.getTerm() ); IRI scheme = category.getScheme(); if ( scheme != null ) { metacard.setContentTypeVersion( scheme.toString() ); } } try { metacard.setModifiedDate( entry.getUpdated() ); } catch ( IllegalArgumentException e ) { LOGGER.warn( "InvalidDate found in atom reponse, setting Metacard modified time to now " ); metacard.setEffectiveDate( new Date() ); } try { metacard.setEffectiveDate( entry.getPublished() ); } catch ( IllegalArgumentException e ) { LOGGER.warn( "InvalidDate found in atom reponse, setting Metacard Effective time to now " ); metacard.setEffectiveDate( new Date() ); } String createdDate = entry.getSimpleExtension( new QName( AtomResponseConstants.METACARD_ATOM_NAMESPACE, AtomResponseConstants.METACARD_CREATED_DATE_ELEMENT ) ); if ( createdDate != null ) { metacard.setCreatedDate( new Date( DATE_FORMATTER.parseMillis( createdDate ) ) ); } String expirationDate = entry.getSimpleExtension( new QName( AtomResponseConstants.METACARD_ATOM_NAMESPACE, AtomResponseConstants.METADATA_EXPIRATION_DATE_ELEMENT ) ); if ( expirationDate != null ) { metacard.setExpirationDate( new Date( DATE_FORMATTER.parseMillis( expirationDate ) ) ); } AtomContentXmlWrapOption wrap = filterConfig.getAtomContentXmlWrapOption(); String metadata = entry.getContent(); populateMetadata( entry, metacard, wrap, metadata ); metacard.setLocation( getWKT( entry ) ); Link productLink = entry.getLink( filterConfig.getProductLinkRelation() ); if ( productLink != null ) { metacard.setResourceURI( URI.create( productLink.getHref().toASCIIString() ) ); long resourceSize = productLink.getLength(); if ( resourceSize > 0 ) { metacard.setResourceSize( String.valueOf( resourceSize ) ); } String productTitle = productLink.getTitle(); if ( productTitle != null ) { metacard.setAttribute( CDRMetacard.RESOURCE_TITLE, productTitle ); } // ECDR-41 figure out MIMEType MimeType productType = productLink.getMimeType(); if ( productType != null ) { metacard.setAttribute( CDRMetacard.RESOURCE_MIME_TYPE, productType.toString() ); } } String thumbnailLinkRel = filterConfig.getThumbnailLinkRelation(); if ( thumbnailLinkRel != null ) { List<Link> links = entry.getLinks( thumbnailLinkRel ); if ( links != null && !links.isEmpty() ) { for ( Link link : links ) { MimeType mimeType = link.getMimeType(); if ( mimeType == null || "image".equals( mimeType.getPrimaryType() ) ) { metacard.setThumbnailLinkURI( URI.create( link.getHref().toASCIIString() ) ); long thumbnailSize = link.getLength(); if ( thumbnailSize > 0 ) { metacard.setAttribute( CDRMetacard.THUMBNAIL_LENGTH, Long.valueOf( thumbnailSize ) ); } // ECDR-41 figure out MIMEType metacard.setAttribute( CDRMetacard.THUMBNAIL_MIMETYPE, link.getMimeType() ); metacard.setAttribute( CDRMetacard.THUMBNAIL_LINK_TITLE, link.getTitle() ); break; } } } } metacard.setTitle( entry.getTitle() ); boolean isMetadataSet = false; ClassLoader tccl = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader( AtomResponseTransformer.class.getClassLoader() ); List<Element> extensions = entry.getExtensions(); for ( Element element : extensions ) { if ( METADATA_ELEMENT_NAME.equalsIgnoreCase( element.getQName().getLocalPart() ) ) { StringWriter writer = new StringWriter(); try { element.writeTo( writer ); metacard.setMetadata( writer.toString() ); isMetadataSet = true; break; } catch ( IOException e ) { LOGGER.error( "Could not convert Metadata String value from Atom to Metacard.METADATA attribute", e ); } } } } finally { Thread.currentThread().setContextClassLoader( tccl ); } if ( !isMetadataSet ) { String metadataLinkRel = filterConfig.getMetadataLinkRelation(); if ( metadataLinkRel != null ) { List<Link> metadataLinks = entry.getLinks( metadataLinkRel ); String metadataLink = null; for ( Link link : metadataLinks ) { MimeType mimeType = link.getMimeType(); if ( mimeType != null ) { if ( mimeType.getSubType().contains( "xml" ) ) { metadataLink = link.getHref().toASCIIString(); metacard.setMetadataLinkURI( URI.create( metadataLink ) ); metacard.setAttribute( CDRMetacard.WRAP_METADATA, null ); break; } else if ( mimeType.getBaseType().contains( "text" ) ) { metadataLink = link.getHref().toASCIIString(); metacard.setMetadataLinkURI( URI.create( metadataLink ) ); metacard.setAttribute( CDRMetacard.WRAP_METADATA, Boolean.TRUE ); } } } } } Metacard returnMetacard = SecurityMarkingParser.addSecurityToMetacard( metacard, entry ); return new CDRMetacard( returnMetacard ); } protected void populateMetadata( Entry entry, CDRMetacard metacard, AtomContentXmlWrapOption wrap, String metadata ) { if ( metadata != null ) { if ( wrap != null && wrap != AtomContentXmlWrapOption.NEVER_WRAP ) { if ( wrap == AtomContentXmlWrapOption.WRAP_HTML_AND_TEXT ) { Content.Type contentType = entry.getContentType(); // certain content types may not follow XML structure switch ( contentType ) { case TEXT: case HTML: // add content element to make sure it has single root metadata = "<xml-fragment>" + metadata + "</xml-fragment>"; break; default: // other items are xml-based break; } } else { metadata = "<xml-fragment>" + metadata + "</xml-fragment>"; } } metacard.setMetadata( metadata ); } } protected Result metacardToResult( Entry entry, Metacard metacard ) { ResultImpl result = new ResultImpl( metacard ); String relevance = entry.getSimpleExtension( AtomResponseConstants.RELEVANCE_NAMESPACE, AtomResponseConstants.RELEVANCE_ELEMENT, AtomResponseConstants.RELEVANCE_NAMESPACE_PREFIX ); if ( relevance != null ) { try { result.setRelevanceScore( Double.parseDouble( relevance ) ); } catch ( NumberFormatException e ) { LOGGER.warn( "Received invalid number for relevance from Atom response [" + relevance + "]", e ); } } String distance = entry.getSimpleExtension( AtomResponseConstants.CDRS_EXT_NAMESPACE, AtomResponseConstants.DISTANCE_ELEMENT, AtomResponseConstants.CDRS_EXT_NAMESPACE_PREFIX ); if ( distance != null ) { try { result.setDistanceInMeters( Double.parseDouble( distance ) ); } catch ( NumberFormatException e ) { LOGGER.warn( "Received invalid number for distance from Atom response [" + distance + "]", e ); } } return result; } protected String getWKT( Entry entry ) { String wkt = null; Position[] positions = net.di2e.ecdr.search.transform.atom.geo.GeoHelper.getPositions( entry ); int length = positions.length; if ( length == 1 ) { LOGGER.debug( "Found one geometry in the current Atom entry, converting to WKT for inclusion in metacard" ); return AbderaConverter.convertToWKT( positions[0] ); } else if ( length > 1 ) { LOGGER.debug( "Found multiple geometries in the current Atom entry, converting to MULTI-WKT for inclusion in metacard" ); return AbderaConverter.convertToWKT( positions ); } return wkt; } /** * Check to see if entry is a valid ATOM Entry conforming to the specification. * * @param entry * @return true if incoming entry conforms to the specification, false if it does not. */ private boolean isValidEntry( Entry entry ) { if ( entry == null ) { return false; } // RFC4287 Section 4.1.2 // atom:entry elements MUST contain exactly one atom:id element. // atom:entry elements MUST contain exactly one atom:title element. // atom:entry elements MUST contain exactly one atom:updated element. // quick check to make sure that the entry contains those elements. return (entry.getIdElement() != null && entry.getTitleElement() != null && entry.getUpdatedElement() != null); } }