/* * ModeShape (http://www.modeshape.org) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.modeshape.sequencer.msoffice.powerpoint; import java.awt.Color; import java.awt.Dimension; import java.awt.Graphics2D; import java.awt.geom.Rectangle2D; import java.awt.image.BufferedImage; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Collection; import java.util.List; import java.util.stream.Collectors; import org.apache.poi.hslf.usermodel.HSLFSlide; import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl; import org.apache.poi.hslf.usermodel.HSLFTextParagraph; import org.apache.poi.hslf.usermodel.HSLFTextRun; import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** * Utility for extracting metadata from PowerPoint files */ public class PowerPointMetadataReader { public static PowerpointMetadata instance( InputStream stream ) throws IOException { POIFSFileSystem fs = new POIFSFileSystem(stream); HSLFSlideShow rawSlideShow = new HSLFSlideShow(fs); List<SlideMetadata> slidesMetadata = rawSlideShow.getSlides() .stream() .map(slide -> processSlide(rawSlideShow, slide)) .collect(Collectors.toList()); PowerpointMetadata deck = new PowerpointMetadata(); deck.setSlides(slidesMetadata); deck.setMetadata(new HSLFSlideShowImpl(fs).getSummaryInformation()); return deck; } private static SlideMetadata processSlide(HSLFSlideShow rawSlideShow, HSLFSlide slide) { SlideMetadata slideMetadata = new SlideMetadata(); // process title String title = slide.getTitle(); slideMetadata.setTitle(title); // process notes slideMetadata.setNotes(collectText(slide.getNotes().getTextParagraphs(), title)); // process text slideMetadata.setText(collectText(slide.getTextParagraphs(), title)); // process thumbnail Dimension pgsize = rawSlideShow.getPageSize(); BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB); Graphics2D graphics = img.createGraphics(); // clear the drawing area graphics.setPaint(Color.white); graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height)); // render slide.draw(graphics); try { ByteArrayOutputStream out = new ByteArrayOutputStream(); javax.imageio.ImageIO.write(img, "png", out); slideMetadata.setThumbnail(out.toByteArray()); } catch (IOException e) { throw new RuntimeException(e); } return slideMetadata; } private static String collectText(List<List<HSLFTextParagraph>> paragraphs, String title) { return paragraphs.stream() .flatMap(Collection::stream) .flatMap(paragraph -> paragraph.getTextRuns().stream()) .map(HSLFTextRun::getRawText) .filter(rawText -> !title.equals(rawText)) .collect(Collectors.joining()); } }