/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.enhancer.engines.xmpextractor; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Collections; import java.util.Map; import org.apache.clerezza.commons.rdf.ImmutableGraph; import org.apache.clerezza.commons.rdf.Graph; import org.apache.clerezza.commons.rdf.IRI; import org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph; import org.apache.clerezza.rdf.core.serializedform.Parser; import org.apache.clerezza.rdf.utils.GraphNode; import org.apache.felix.scr.annotations.Component; import org.apache.felix.scr.annotations.Property; import org.apache.felix.scr.annotations.Reference; import org.apache.felix.scr.annotations.Service; import org.apache.stanbol.commons.indexedgraph.IndexedGraph; import org.apache.stanbol.enhancer.servicesapi.ContentItem; import org.apache.stanbol.enhancer.servicesapi.EngineException; import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; import org.apache.stanbol.enhancer.servicesapi.ServiceProperties; import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; import org.apache.tika.parser.image.xmp.XMPPacketScanner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @Component(immediate = true, metatype = true, inherit = true) @Service @org.apache.felix.scr.annotations.Properties(value={ @Property(name=EnhancementEngine.PROPERTY_NAME, value="xmpextractor") }) public class XmpExtractorEngine extends AbstractEnhancementEngine<IOException,RuntimeException> implements EnhancementEngine, ServiceProperties { private static final Logger LOG = LoggerFactory.getLogger(XmpExtractorEngine.class); @Reference Parser parser; /** * The default value for the Execution of this Engine. Currently set to * {@link ServiceProperties#ORDERING_PRE_PROCESSING} */ public static final Integer defaultOrder = ORDERING_PRE_PROCESSING; @Override public Map<String,Object> getServiceProperties() { return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder)); } @Override public int canEnhance(ContentItem ci) throws EngineException { LOG.info("MimeType: {}", ci.getMimeType()); if (isSupported(ci.getMimeType())) { return ENHANCE_ASYNC; } return CANNOT_ENHANCE; } @Override public void computeEnhancements(ContentItem ci) throws EngineException { InputStream in = ci.getBlob().getStream(); XMPPacketScanner scanner = new XMPPacketScanner(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { scanner.parse(in, baos); } catch (IOException e) { throw new EngineException(e); } byte[] bytes = baos.toByteArray(); if (bytes.length > 0) { Graph model = new IndexedGraph(); parser.parse(model, new ByteArrayInputStream(bytes), "application/rdf+xml"); GraphNode gn = new GraphNode( new IRI("http://relative-uri.fake/"), model); gn.replaceWith(ci.getUri()); ci.getLock().writeLock().lock(); try { LOG.info("Model: {}",model); ci.getMetadata().addAll(model); } finally { ci.getLock().writeLock().unlock(); } } } private boolean isSupported(String mimeType) { if (mimeType.startsWith("text/")) { return false; //assuming text types cannot contain XMP } else { return true; // As there isn't a list of media types that can contain XMP } } }