/*
* file: RTFEmbeddedObject.java
* author: Jon Iles
* copyright: (c) Packwood Software 2005
* date: Jun 28, 2005
*/
/*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
*/
package net.sf.mpxj.mpp;
import java.io.ByteArrayOutputStream;
import java.io.PrintWriter;
import java.util.LinkedList;
import java.util.List;
/**
* This class represents embedded object data contained within an RTF
* document. According to the RTF specification, this data has been written using
* the OLESaveToStream, although I have been unable to locate any existing
* Java implementations of the equivalent OLELoadFromStream in order to
* read this data, hence the current implementation.
*
* To use this class with note fields in MPXJ, call
* MPPFile.setPreserveNoteFormatting(true) to allow retrieval of the raw RTF
* document text from the note fields. You can use the RTFUtility.strip()
* method to extract plain text from the document for display. If you want
* to extract any embedded objects from the document, call the
* RTFEmbeddedObject.getEmbeddedObjects() method, passing in the raw RTF
* document.
*
* The structure of data embedded in a notes field is beyond the scope
* of the MPXJ documentation. However, generally speaking, you will find that
* each item of embedded data will be made up of two RTFEmbeddedObject instances,
* the first is a header usually containing string data, the second is the
* actual payload data, which will typically be binary. You can retrieve the
* String data using the RTFEmbeddedObject.getStringData() method, and the
* binary data using the RTFEmbeddedObject.getData() method.
*
* For each embedded item in the document you will typically find two
* groups of these objects. The first group of two RTFEmbeddedObject instances
* (one header object and one data object) represent either the location of a
* linked document, or the binary data for the document itself. The second
* group of two RTFEmbeddedObject instances contain a METAFILEPICT, which
* either contains the icon image used as a placeholder for the embedded
* document, or it contains an image of the document contents, again used
* as a placeholder.
*
* Warning: this functionality is experimental, please submit bugs for any
* example files containing embedded objects which fail to parse when using this
* class.
*/
public final class RTFEmbeddedObject
{
/**
* Constructor.
*
* @param blocks list of data blocks
* @param type expected type of next block.
*/
private RTFEmbeddedObject(List<byte[]> blocks, int type)
{
switch (type)
{
case 2:
case 5:
{
m_typeFlag1 = getInt(blocks);
m_typeFlag2 = getInt(blocks);
int length = getInt(blocks);
m_data = getData(blocks, length);
break;
}
case 1:
{
int length = getInt(blocks);
m_data = getData(blocks, length);
break;
}
}
}
/**
* Retrieve type flag 1.
*
* @return type flag 1
*/
public int getTypeFlag1()
{
return (m_typeFlag1);
}
/**
* Retrieve type flag 2.
*
* @return type flag 2
*/
public int getTypeFlag2()
{
return (m_typeFlag2);
}
/**
* Retrieve the data associated with this block as a byte array.
*
* @return byte array of data
*/
public byte[] getData()
{
return (m_data);
}
/**
* Retrieve the data associated with this block as a string.
*
* @return string data
*/
public String getDataString()
{
return (m_data == null ? "" : new String(m_data));
}
/**
* This method generates a list of lists. Each list represents the data
* for an embedded object, and contains set set of RTFEmbeddedObject instances
* that make up the embedded object. This method will return null
* if there are no embedded objects in the RTF document.
*
* @param text RTF document
* @return list of lists of RTFEmbeddedObject instances
*/
public static List<List<RTFEmbeddedObject>> getEmbeddedObjects(String text)
{
List<List<RTFEmbeddedObject>> objects = null;
List<RTFEmbeddedObject> objectData;
int offset = text.indexOf(OBJDATA);
if (offset != -1)
{
objects = new LinkedList<List<RTFEmbeddedObject>>();
while (offset != -1)
{
objectData = new LinkedList<RTFEmbeddedObject>();
objects.add(objectData);
offset = readObjectData(offset, text, objectData);
offset = text.indexOf(OBJDATA, offset);
}
}
return (objects);
}
/**
* Internal method used to retrieve a integer from an
* embedded data block.
*
* @param blocks list of data blocks
* @return int value
*/
private int getInt(List<byte[]> blocks)
{
int result;
if (blocks.isEmpty() == false)
{
byte[] data = blocks.remove(0);
result = MPPUtility.getInt(data, 0);
}
else
{
result = 0;
}
return (result);
}
/**
* Internal method used to retrieve a byte array from one
* or more embedded data blocks. Consecutive data blocks may
* need to be concatenated by this method in order to retrieve
* the complete set of data.
*
* @param blocks list of data blocks
* @param length expected length of the data
* @return byte array
*/
private byte[] getData(List<byte[]> blocks, int length)
{
byte[] result;
if (blocks.isEmpty() == false)
{
if (length < 4)
{
length = 4;
}
result = new byte[length];
int offset = 0;
byte[] data;
while (offset < length)
{
data = blocks.remove(0);
System.arraycopy(data, 0, result, offset, data.length);
offset += data.length;
}
}
else
{
result = null;
}
return (result);
}
/**
* This method extracts byte arrays from the embedded object data
* and converts them into RTFEmbeddedObject instances, which
* it then adds to the supplied list.
*
* @param offset offset into the RTF document
* @param text RTF document
* @param objects destination for RTFEmbeddedObject instances
* @return new offset into the RTF document
*/
private static int readObjectData(int offset, String text, List<RTFEmbeddedObject> objects)
{
LinkedList<byte[]> blocks = new LinkedList<byte[]>();
offset += (OBJDATA.length());
offset = skipEndOfLine(text, offset);
int length;
int lastOffset = offset;
while (offset != -1)
{
length = getBlockLength(text, offset);
lastOffset = readDataBlock(text, offset, length, blocks);
offset = skipEndOfLine(text, lastOffset);
}
RTFEmbeddedObject headerObject;
RTFEmbeddedObject dataObject;
while (blocks.isEmpty() == false)
{
headerObject = new RTFEmbeddedObject(blocks, 2);
objects.add(headerObject);
if (blocks.isEmpty() == false)
{
dataObject = new RTFEmbeddedObject(blocks, headerObject.getTypeFlag2());
objects.add(dataObject);
}
}
return (lastOffset);
}
/**
* This method skips the end-of-line markers in the RTF document.
* It also indicates if the end of the embedded object has been reached.
*
* @param text RTF document test
* @param offset offset into the RTF document
* @return new offset
*/
private static int skipEndOfLine(String text, int offset)
{
char c;
boolean finished = false;
while (finished == false)
{
c = text.charAt(offset);
switch (c)
{
case ' ': // found that OBJDATA could be followed by a space the EOL
case '\r':
case '\n':
{
++offset;
break;
}
case '}':
{
offset = -1;
finished = true;
break;
}
default:
{
finished = true;
break;
}
}
}
return (offset);
}
/**
* Calculates the length of the next block of RTF data.
*
* @param text RTF data
* @param offset current offset into this data
* @return block length
*/
private static int getBlockLength(String text, int offset)
{
int startIndex = offset;
boolean finished = false;
char c;
while (finished == false)
{
c = text.charAt(offset);
switch (c)
{
case '\r':
case '\n':
case '}':
{
finished = true;
break;
}
default:
{
++offset;
break;
}
}
}
int length = offset - startIndex;
return (length);
}
/**
* Reads a data block and adds it to the list of blocks.
*
* @param text RTF data
* @param offset current offset
* @param length next block length
* @param blocks list of blocks
* @return next offset
*/
private static int readDataBlock(String text, int offset, int length, List<byte[]> blocks)
{
int bytes = length / 2;
byte[] data = new byte[bytes];
for (int index = 0; index < bytes; index++)
{
data[index] = (byte) Integer.parseInt(text.substring(offset, offset + 2), 16);
offset += 2;
}
blocks.add(data);
return (offset);
}
/**
* {@inheritDoc}
*/
@Override public String toString()
{
ByteArrayOutputStream os = new ByteArrayOutputStream();
PrintWriter pw = new PrintWriter(os);
pw.println("[RTFObject");
pw.println(" Flag1=" + m_typeFlag1);
pw.println(" Flag2=" + m_typeFlag2);
pw.println(" Data=");
pw.println(MPPUtility.hexdump(m_data, true, 16, " "));
pw.println("]");
pw.flush();
return (os.toString());
}
private int m_typeFlag1;
private int m_typeFlag2;
private byte[] m_data;
private static final String OBJDATA = "\\objdata";
}