/*******************************************************************************
* Copyright (c) 2012, Directors of the Tyndale STEP Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* Neither the name of the Tyndale House, Cambridge (www.TyndaleHouse.com)
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
package com.tyndalehouse.step.tools.versions;
import static com.tyndalehouse.step.core.utils.StringUtils.isNotBlank;
import java.io.FileInputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* The Class ParseVersionFile.
*/
public class ParseVersionFile {
private static final Pattern LINES = Pattern.compile("[\r\n]");
private static final Pattern REF = Pattern.compile("^\\d?\\s*[a-zA-Z]+\\s*\\d+(:\\d+)?");
private static final Pattern CLEAN_UP = Pattern.compile("[\\r\\n<>]+");
public static void main(final String[] args) throws Exception {
POIFSFileSystem fs = null;
boolean isHidden = false;
fs = new POIFSFileSystem(new FileInputStream(
"C:\\Users\\Chris\\Downloads\\Gen 1-10.prepared for reviewer.doc"));
final HWPFDocument doc = new HWPFDocument(fs);
// System.out.println("Word Document has " + paragraphs.length + " paragraphs");
final Range range = doc.getRange();
boolean prefix = false;
boolean mainText = false;
String currentRef = null;
StringBuilder text = new StringBuilder(256);
StringBuilder partialLine = new StringBuilder(256);
int count = 0;
for (int k = 0; k < range.numParagraphs(); k++) {
final org.apache.poi.hwpf.usermodel.Paragraph paragraph = range.getParagraph(k);
for (int j = 0; j < paragraph.numCharacterRuns(); j++) {
final org.apache.poi.hwpf.usermodel.CharacterRun cr = paragraph.getCharacterRun(j);
String docText = cr.text();
if (cr.isVanished()) {
if (!isHidden) {
// we only print out the last line of full text and of partial line...
final String fullText = text.toString();
final String[] lines = LINES.split(fullText);
String lastLine = lines[lines.length - 1];
final Matcher matcher = REF.matcher(lastLine);
final boolean foundRef = matcher.find();
if (foundRef) {
currentRef = matcher.group();
lastLine = lastLine.replaceAll(currentRef, "").trim();
}
System.out.println("===============================");
System.out.println("@Reference=\t" + currentRef);
System.out.println("@FullText=\t" + lastLine);
System.out.println("@MatchingText=\t" + partialLine.toString());
count = 0;
text = new StringBuilder(256);
partialLine = new StringBuilder(128);
isHidden = true;
}
if (cr.isBold()) {
// if we're looking at bold text, we need to output the prefix
if (!prefix) {
System.out.println(String.format("@OptionsType%d=\t%s", count,
clean(text.toString())));
prefix = true;
text = new StringBuilder(256);
}
} else if (!mainText && prefix) {
// no longer bold, but already have a prefix
mainText = true;
System.out.println(String.format("@OptionsAlternative%d=\t%s", count,
clean(text.toString())));
text = new StringBuilder(256);
// deal with carriage returns differently
final int splitChar = hasCarriageReturn(docText);
if (splitChar != -1) {
// we've split to a new line
final String postfix = docText.substring(0, splitChar);
text.append(postfix);
if (isNotBlank(postfix)) {
final String clean = clean(text.toString());
if (isNotBlank(clean)) {
System.out.println(String
.format("@OptionsQualifier%d=\t%s", count, clean));
}
}
prefix = false;
mainText = false;
count++;
text = new StringBuilder(256);
docText = docText.substring(splitChar);
}
} else if (prefix && mainText) {
// have a prefix and a main text, and we're not bold, then we're either the
// postfix or prefix of the next entry
final int splitChar = hasCarriageReturn(docText);
if (splitChar != -1) {
// we've split to a new line
final String postfix = docText.substring(0, splitChar);
text.append(postfix);
if (isNotBlank(postfix)) {
final String clean = clean(text.toString());
if (isNotBlank(clean)) {
System.out.println(String
.format("@OptionsQualifier%d=\t%s", count, clean));
}
}
prefix = false;
mainText = false;
count++;
text = new StringBuilder(256);
docText = docText.substring(splitChar);
}
} else {
// deal with carriage returns differently
final int splitChar = hasCarriageReturn(docText);
if (splitChar != -1) {
// we've split to a new line
final String postfix = docText.substring(0, splitChar);
text.append(postfix);
if (isNotBlank(postfix)) {
final String clean = clean(text.toString());
if (isNotBlank(clean)) {
System.out.println(String
.format("@OptionsQualifier%d=\t%s", count, clean));
}
}
prefix = false;
mainText = false;
count++;
text = new StringBuilder(256);
docText = docText.substring(splitChar);
}
}
text.append(docText);
} else {
if (isHidden) {
text = new StringBuilder(256);
prefix = false;
mainText = false;
isHidden = false;
}
if (cr.getUnderlineCode() != 0) {
partialLine.append(docText);
}
text.append(docText);
}
}
}
}
private static int hasCarriageReturn(final String docText) {
return Math.max(docText.indexOf('\n'), docText.indexOf('\r'));
}
private static String clean(final String prefix) {
return CLEAN_UP.matcher(prefix).replaceAll("");
}
}