/** * OpenKM, Open Document Management System (http://www.openkm.com) * Copyright (c) 2006-2011 Paco Avila & Josep Llort * * No bytes were intentionally harmed during the development of this application. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package com.openkm.extractor; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import java.io.StringReader; import java.util.Iterator; import org.apache.jackrabbit.extractor.AbstractTextExtractor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.drew.imaging.ImageMetadataReader; import com.drew.imaging.ImageProcessingException; import com.drew.metadata.Directory; import com.drew.metadata.Metadata; import com.drew.metadata.MetadataException; import com.drew.metadata.Tag; /** * Text extractor for JPEG image documents. * Use java metadata extraction library from * http://www.drewnoakes.com/code/exif/index.html */ public class ExifTextExtractor extends AbstractTextExtractor { /** * Logger instance. */ private static final Logger log = LoggerFactory.getLogger(ExifTextExtractor.class); /** * Creates a new <code>JpegTextExtractor</code> instance. */ public ExifTextExtractor() { super(new String[]{"image/jpeg"}); } //-------------------------------------------------------< TextExtractor > /** * {@inheritDoc} */ @SuppressWarnings("unchecked") public Reader extractText(InputStream stream, String type, String encoding) throws IOException { try { Metadata metadata = ImageMetadataReader.readMetadata(new BufferedInputStream(stream)); Iterator<Directory> directories = metadata.getDirectoryIterator(); StringBuffer sb = new StringBuffer(); while (directories.hasNext()) { Directory directory = directories.next(); Iterator<Tag> tags = directory.getTagIterator(); while (tags.hasNext()) { Tag tag = tags.next(); sb.append("["); sb.append(tag.getDirectoryName()); sb.append("] "); sb.append(tag.getTagName()); sb.append(" = "); sb.append(tag.getDescription()); sb.append("\n"); } } log.debug("TEXT: "+sb.toString()); return new StringReader(sb.toString()); } catch (ImageProcessingException e) { log.warn("Failed to extract EXIF information", e); return new StringReader(""); } catch (MetadataException e) { log.warn("Failed to extract EXIF information", e); return new StringReader(""); } finally { stream.close(); } } }