/** * * Copyright 2009-2013 The MITRE Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * ************************************************************************** * NOTICE * This software was produced for the U. S. Government under Contract No. * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer * Software and Noncommercial Computer Software Documentation Clause * 252.227-7014 (JUN 1995) * * (c) 2012 The MITRE Corporation. All Rights Reserved. * ************************************************************************** */ package org.opensextant.xtext.converters; import java.io.IOException; import java.io.InputStream; import org.apache.poi.hwpf.extractor.WordExtractor; import org.opensextant.xtext.ConvertedDocument; /** * @author T. Allison, MITRE * @author Marc C. Ubaldino, MITRE, ubaldino at mitre dot org */ public class MSDocConverter extends ConverterAdapter { /** */ @Override protected ConvertedDocument conversionImplementation(InputStream input, java.io.File doc) throws IOException { org.apache.poi.hwpf.extractor.WordExtractor ex = new WordExtractor(input); String[] ps = ex.getParagraphText(); input.close(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < ps.length; i++) { sb.append(WordExtractor.stripFields(ps[i]).trim()); sb.append('\n'); } ConvertedDocument textdoc = new ConvertedDocument(doc); textdoc.setText(sb.toString()); ex.close(); return textdoc; } }