/*******************************************************************************
* Copyright (c) 2009, Adobe Systems Incorporated
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* · Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* · Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* · Neither the name of Adobe Systems Incorporated nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
package com.adobe.dp.office.rtf;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Hashtable;
import java.util.Stack;
import java.util.Vector;
public class RTFDocumentParser {
InputStream in;
RTFDocument doc;
int start;
int stop;
boolean eof;
byte[] buffer = new byte[4096];
final static int MAX_WORD_LEN = 128;
Level curr;
Stack levels;
StringBuffer sbuf;
ByteArrayOutputStream bbuf;
Hashtable fonts;
Hashtable paragraphStyles;
Hashtable characterStyles;
static class Level {
Level() {
this.encoding = "Cp1252";
}
Level(Level prev) {
this.encoding = prev.encoding;
this.skipCount = prev.skipCount;
}
Vector list = new Vector();
String encoding;
int skipCount = 0;
}
public RTFDocumentParser(File docFile) throws IOException {
this.in = new FileInputStream(docFile);
}
public RTFDocumentParser(InputStream in) throws IOException {
this.in = in;
}
public RTFDocument parse() throws IOException {
doc = new RTFDocument();
parseInternal();
return doc;
}
void fillAtLeast(int count) throws IOException {
if (buffer.length - start < count) {
stop -= start;
System.arraycopy(buffer, start, buffer, 0, stop);
start = 0;
}
while (true) {
if (stop - start >= count)
return;
int r = in.read(buffer, stop, buffer.length - stop);
if (r <= 0) {
eof = true;
return;
}
stop += r;
}
}
void flushText0() {
if (bbuf.size() > 0) {
String text;
try {
text = bbuf.toString(curr.encoding);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
text = "???";
}
sbuf.append(text);
bbuf.reset();
}
}
void processHexChar() {
char[] h = new char[2];
h[0] = (char) buffer[start++];
h[1] = (char) buffer[start++];
String hs = new String(h);
injectByte((byte) Integer.parseInt(hs, 16));
}
public void injectByte(byte b) {
bbuf.write(b);
}
public void injectText(String text) {
flushText0();
sbuf.append(text);
}
private void flushText() {
flushText0();
if (sbuf.length() > 0) {
curr.list.add(sbuf.toString());
sbuf.setLength(0);
}
}
public void injectControl(RTFControl control) {
flushText();
curr.list.add(control);
}
public void setEncoding(String encoding) {
if (!Charset.isSupported(encoding)) {
encoding = "Cp1251";
}
if (!curr.encoding.equals(encoding)) {
flushText0();
curr.encoding = encoding;
}
}
public void setSkipCount(int n) {
curr.skipCount = n;
}
public void addFont(int index, RTFFont font) {
fonts.put(new Integer(index), font);
}
public RTFFont getFont(int index) {
return (RTFFont) fonts.get(new Integer(index));
}
public void setColorTable(RTFColor[] colors) {
doc.colorTable = colors;
}
public void addParagraphStyle(int index, RTFStyle style) {
paragraphStyles.put(new Integer(index), style);
}
public RTFStyle getParagraphStyle(int index) {
return (RTFStyle) paragraphStyles.get(new Integer(index));
}
public void addCharacterStyle(int index, RTFStyle style) {
characterStyles.put(new Integer(index), style);
}
public RTFStyle getCharacterStyle(int index) {
return (RTFStyle) characterStyles.get(new Integer(index));
}
void parseInternal() throws IOException {
start = 0;
stop = 0;
eof = false;
levels = new Stack();
curr = new Level();
bbuf = new ByteArrayOutputStream();
sbuf = new StringBuffer();
fonts = new Hashtable();
doc.fonts = fonts;
paragraphStyles = new Hashtable();
doc.paragraphStyles = paragraphStyles;
characterStyles = new Hashtable();
doc.characterStyles = characterStyles;
StringBuffer cbuf = new StringBuffer();
while (true) {
fillAtLeast(MAX_WORD_LEN);
if (start == stop && eof)
break;
byte c = buffer[start];
if (c == '\n' || c == '\r') {
start++;
continue;
}
if (c == '{') {
// System.out.println("+++ start");
flushText();
levels.push(curr);
curr = new Level(curr);
start++;
continue;
}
if (c == '}') {
// System.out.println("+++ end");
flushText();
Object[] arr = new Object[curr.list.size()];
curr.list.copyInto(arr);
curr = (Level) levels.pop();
RTFGroup group = new RTFGroup(arr);
RTFControl control = group.getHead();
if (control == null || !control.getType().parseTimeGroupExec(group, this)) {
curr.list.add(group);
}
start++;
continue;
}
if (c == '\\') {
if (curr.skipCount > 0)
curr.skipCount--;
boolean optional = false;
start++;
c = buffer[start];
if (c == '*') {
optional = true;
start++;
if (buffer[start] != '\\')
continue;
start++;
c = buffer[start];
}
RTFControl ctrl;
if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) {
// control word
cbuf.setLength(0);
cbuf.append((char) c);
start++;
while (start < stop) {
c = buffer[start];
if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) {
cbuf.append((char) c);
start++;
} else
break;
}
RTFControlType type = RTFControlType.getControlTypeByName(cbuf.toString());
if (c == ' ') {
// no params
start++;
ctrl = new RTFControl(type, optional);
} else if (c == '-' || ('0' <= c && c <= '9')) {
// number param
int param = 0;
int sign = 1;
if (c == '-') {
start++;
sign = -1;
}
while (start < stop) {
c = buffer[start];
if ('0' > c || c > '9') {
if (c == ' ')
start++;
break;
}
start++;
param = param * 10 + (c - '0');
}
ctrl = new RTFControlWithParam(type, optional, sign * param);
} else {
// no params
ctrl = new RTFControl(type, optional);
}
} else {
RTFControlType type = RTFControlType.getControlTypeByName(Character.toString((char) c));
start++;
ctrl = new RTFControl(type, optional);
}
if (!ctrl.parseTimeExec(this)) {
injectControl(ctrl);
}
continue;
}
int i = start;
start++;
while (start < stop) {
c = buffer[start];
if (c == '\\' || c == '{' || c == '}' || c == '\r' || c == '\n')
break;
start++;
}
int count = start - i;
if (curr.skipCount < count) {
i += curr.skipCount;
count -= curr.skipCount;
curr.skipCount = 0;
this.bbuf.write(buffer, i, count);
} else {
curr.skipCount -= count;
}
}
if (curr.list.size() >= 0) {
doc.root = (RTFGroup) curr.list.get(0);
}
}
}