/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.cos;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.persistence.util.COSHEXTable;
/**
* This represents a string object in a PDF document.
*
* @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
* @version $Revision: 1.30 $
*/
public class COSString extends COSBase
{
/**
* One of the open string tokens.
*/
public static final byte[] STRING_OPEN = new byte[]{ 40 }; //"(".getBytes();
/**
* One of the close string tokens.
*/
public static final byte[] STRING_CLOSE = new byte[]{ 41 }; //")".getBytes( "ISO-8859-1" );
/**
* One of the open string tokens.
*/
public static final byte[] HEX_STRING_OPEN = new byte[]{ 60 }; //"<".getBytes( "ISO-8859-1" );
/**
* One of the close string tokens.
*/
public static final byte[] HEX_STRING_CLOSE = new byte[]{ 62 }; //">".getBytes( "ISO-8859-1" );
/**
* the escape character in strings.
*/
public static final byte[] ESCAPE = new byte[]{ 92 }; //"\\".getBytes( "ISO-8859-1" );
/**
* CR escape characters.
*/
public static final byte[] CR_ESCAPE = new byte[]{ 92, 114 }; //"\\r".getBytes( "ISO-8859-1" );
/**
* LF escape characters.
*/
public static final byte[] LF_ESCAPE = new byte[]{ 92, 110 }; //"\\n".getBytes( "ISO-8859-1" );
/**
* HT escape characters.
*/
public static final byte[] HT_ESCAPE = new byte[]{ 92, 116 }; //"\\t".getBytes( "ISO-8859-1" );
/**
* BS escape characters.
*/
public static final byte[] BS_ESCAPE = new byte[]{ 92, 98 }; //"\\b".getBytes( "ISO-8859-1" );
/**
* FF escape characters.
*/
public static final byte[] FF_ESCAPE = new byte[]{ 92, 102 }; //"\\f".getBytes( "ISO-8859-1" );
private ByteArrayOutputStream out = null;
private String str = null;
/**
* Forces the string to be serialized in hex form but not literal form, the default is to stream
* in literal form.
*/
private boolean forceHexForm = false;
/**
* Constructor.
*/
public COSString()
{
out = new ByteArrayOutputStream();
}
/**
* Explicit constructor for ease of manual PDF construction.
*
* @param value The string value of the object.
*/
public COSString( String value )
{
try
{
boolean unicode16 = false;
char[] chars = value.toCharArray();
int length = chars.length;
for( int i=0; i<length; i++ )
{
if( chars[i] > 255 )
{
unicode16 = true;
break;
}
}
if( unicode16 )
{
byte[] data = value.getBytes( "UTF-16BE" );
out = new ByteArrayOutputStream( data.length +2);
out.write( 0xFE );
out.write( 0xFF );
out.write( data );
}
else
{
byte[] data = value.getBytes("ISO-8859-1");
out = new ByteArrayOutputStream( data.length );
out.write( data );
}
}
catch (IOException ignore)
{
ignore.printStackTrace();
//should never happen
}
}
/**
* Explicit constructor for ease of manual PDF construction.
*
* @param value The string value of the object.
*/
public COSString( byte[] value )
{
try
{
out = new ByteArrayOutputStream( value.length );
out.write( value );
}
catch (IOException ignore)
{
ignore.printStackTrace();
//should never happen
}
}
/**
* Forces the string to be written in literal form instead of hexadecimal form.
*
* @param v if v is true the string will be written in literal form, otherwise it will
* be written in hexa if necessary.
*/
public void setForceLiteralForm(boolean v)
{
forceHexForm = !v;
}
/**
* Forces the string to be written in hexadecimal form instead of literal form.
*
* @param v if v is true the string will be written in hexadecimal form otherwise it will be written in literal if
* necessary.
*/
public void setForceHexForm(boolean v)
{
forceHexForm = v;
}
/**
* This will create a COS string from a string of hex characters.
*
* @param hex A hex string.
* @return A cos string with the hex characters converted to their actual bytes.
* @throws IOException If there is an error with the hex string.
*/
public static COSString createFromHexString(String hex)
throws IOException {
return createFromHexString(hex, false);
}
/**
* Creates a COS string from a string of hex characters, optionally
* ignoring malformed input.
*
* @param hex A hex string.
* @param force flag to ignore malformed input
* @return A cos string with the hex characters converted to their actual bytes.
* @throws IOException If there is an error with the hex string.
*/
public static COSString createFromHexString(String hex, boolean force)
throws IOException {
COSString retval = new COSString();
StringBuilder hexBuffer = new StringBuilder( hex.trim() );
//if odd number then the last hex digit is assumed to be 0
if( hexBuffer.length() % 2 != 0 )
{
hexBuffer.append( '0' );
}
int length = hexBuffer.length();
for (int i = 0; i < length; i += 2) {
try {
retval.append(
Integer.parseInt(hexBuffer.substring(i, i + 2), 16));
} catch (NumberFormatException e) {
if (force) {
retval.append('?');
} else {
IOException exception =
new IOException("Invalid hex string: " + hex);
exception.initCause(e);
throw exception;
}
}
}
return retval;
}
/**
* This will take this string and create a hex representation of the bytes that make the string.
*
* @return A hex string representing the bytes in this string.
*/
public String getHexString()
{
StringBuilder retval = new StringBuilder( out.size() * 2 );
byte[] data = getBytes();
int length = data.length;
for( int i=0; i<length; i++ )
{
retval.append( COSHEXTable.HEX_TABLE[ (data[i]+256)%256 ] );
}
return retval.toString();
}
/**
* This will get the string that this object wraps.
*
* @return The wrapped string.
*/
public String getString()
{
if (this.str != null)
{
return this.str;
}
String retval;
String encoding = "ISO-8859-1";
byte[] data = getBytes();
int start = 0;
if( data.length > 2 )
{
if( data[0] == (byte)0xFF && data[1] == (byte)0xFE )
{
encoding = "UTF-16LE";
start=2;
}
else if( data[0] == (byte)0xFE && data[1] == (byte)0xFF )
{
encoding = "UTF-16BE";
start=2;
}
}
try
{
retval = new String( getBytes(), start, data.length-start, encoding );
}
catch( UnsupportedEncodingException e )
{
//should never happen
e.printStackTrace();
retval = new String( getBytes() );
}
this.str = retval;
return retval;
}
/**
* This will append a byte[] to the string.
*
* @param data The byte[] to add to this string.
*
* @throws IOException If an IO error occurs while writing the byte.
*/
public void append( byte[] data ) throws IOException
{
out.write( data );
this.str = null;
}
/**
* This will append a byte to the string.
*
* @param in The byte to add to this string.
*
* @throws IOException If an IO error occurs while writing the byte.
*/
public void append( int in ) throws IOException
{
out.write( in );
this.str = null;
}
/**
* This will reset the internal buffer.
*/
public void reset()
{
out.reset();
this.str = null;
}
/**
* This will get the bytes of the string.
*
* @return A byte array that represents the string.
*/
public byte[] getBytes()
{
return out.toByteArray();
}
/**
* {@inheritDoc}
*/
@Override
public String toString()
{
return "COSString{" + this.getString() + "}";
}
/**
* This will output this string as a PDF object.
*
* @param output The stream to write to.
* @throws IOException If there is an error writing to the stream.
*/
public void writePDF( OutputStream output ) throws IOException
{
boolean outsideASCII = false;
//Lets first check if we need to escape this string.
byte[] bytes = getBytes();
int length = bytes.length;
for( int i=0; i<length && !outsideASCII; i++ )
{
//if the byte is negative then it is an eight bit byte and is
//outside the ASCII range.
outsideASCII = bytes[i] <0;
}
if (!outsideASCII && !forceHexForm)
{
output.write(STRING_OPEN);
for( int i=0; i<length; i++ )
{
int b = (bytes[i]+256)%256;
switch( b )
{
case '(':
case ')':
case '\\':
{
output.write(ESCAPE);
output.write((byte)b);
break;
}
case 10: //LF
{
output.write( LF_ESCAPE );
break;
}
case 13: // CR
{
output.write( CR_ESCAPE );
break;
}
case '\t':
{
output.write( HT_ESCAPE );
break;
}
case '\b':
{
output.write( BS_ESCAPE );
break;
}
case '\f':
{
output.write( FF_ESCAPE );
break;
}
default:
{
output.write( (byte)b );
}
}
}
output.write(STRING_CLOSE);
}
else
{
output.write(HEX_STRING_OPEN);
for(int i=0; i<length; i++ )
{
output.write( COSHEXTable.TABLE[ (bytes[i]+256)%256 ] );
}
output.write(HEX_STRING_CLOSE);
}
}
/**
* visitor pattern double dispatch method.
*
* @param visitor The object to notify when visiting this object.
* @return any object, depending on the visitor implementation, or null
* @throws COSVisitorException If an error occurs while visiting this object.
*/
@Override
public Object accept(ICOSVisitor visitor) throws COSVisitorException
{
return visitor.visitFromString( this );
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj)
{
if (obj instanceof COSString)
{
COSString strObj = (COSString) obj;
return this.getString().equals(strObj.getString())
&& this.forceHexForm == strObj.forceHexForm;
}
return false;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
int result = getString().hashCode();
return result += forceHexForm ? 17 : 0;
}
}