package com.tom_roush.pdfbox.pdmodel.font;
import com.tom_roush.pdfbox.util.Charsets;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
* Writes ToUnicode Mapping Files.
*
* @author John Hewson
*/
final class ToUnicodeWriter
{
private final Map<Integer, String> cidToUnicode = new TreeMap<Integer, String>();
private int wMode;
/**
* Creates a new ToUnicode CMap writer.
*/
ToUnicodeWriter()
{
this.wMode = 0;
}
/**
* Sets the WMode (writing mode) of this CMap.
*
* @param wMode 1 for vertical, 0 for horizontal (default)
*/
public void setWMode(int wMode)
{
this.wMode = wMode;
}
/**
* Adds the given CID to Unicode mapping.
*
* @param cid CID
* @param text Unicode text, up to 512 bytes.
*/
public void add(int cid, String text)
{
if (cid < 0 || cid > 0xFFFF)
{
throw new IllegalArgumentException("CID is not valid");
}
if (text == null || text.isEmpty())
{
throw new IllegalArgumentException("Text is null or empty");
}
cidToUnicode.put(cid, text);
}
/**
* Writes the CMap as ASCII to the given output stream.
*
* @param out ASCII output stream
* @throws IOException if the stream could not be written
*/
public void writeTo(OutputStream out) throws IOException
{
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, Charsets.US_ASCII));
writeLine(writer, "/CIDInit /ProcSet findresource begin");
writeLine(writer, "12 dict begin\n");
writeLine(writer, "begincmap");
writeLine(writer, "/CIDSystemInfo");
writeLine(writer, "<< /Registry ()");
writeLine(writer, "/Ordering ()");
writeLine(writer, "/Supplement ");
writeLine(writer, ">> def\n");
writeLine(writer, "/CMapName /Adobe-Identity-UCS" + " def");
writeLine(writer, "/CMapType 2 def\n"); // 2 = ToUnicode
if (wMode != 0)
{
writeLine(writer, "/WMode /" + wMode + " def");
}
// ToUnicode always uses 16-bit CIDs
writeLine(writer, "1 begincodespacerange");
writeLine(writer, "<0000> <FFFF>");
writeLine(writer, "endcodespacerange\n");
// CID -> Unicode mappings, we use ranges to generate a smaller CMap
List<Integer> srcFrom = new ArrayList<Integer>();
List<Integer> srcTo = new ArrayList<Integer>();
List<String> dstString = new ArrayList<String>();
int srcPrev = -1;
String dstPrev = null;
int srcCode1 = -1;
for (Map.Entry<Integer, String> entry : cidToUnicode.entrySet())
{
int cid = entry.getKey();
String text = entry.getValue();
if (cid == srcPrev + 1 && // CID must be last CID + 1
dstPrev.codePointCount(0, dstPrev.length()) == 1 && // no UTF-16 surrogates
text.codePointAt(0) == dstPrev.codePointAt(0) + 1 && // dstString must be prev + 1
dstPrev.codePointAt(0) + 1 <= 255 - (cid - srcCode1)) // increment last byte only
{
// extend range
srcTo.set(srcTo.size() - 1, cid);
}
else
{
// begin range
srcCode1 = cid;
srcFrom.add(cid);
srcTo.add(cid);
dstString.add(text);
}
srcPrev = cid;
dstPrev = text;
}
// limit of 100 entries per operator
int batchCount = (int)Math.ceil(srcFrom.size() / 100.0);
for (int batch = 0; batch < batchCount; batch++)
{
int count = batch == batchCount - 1 ? srcFrom.size() % 100 : 100;
writer.write(count + " beginbfrange\n");
for (int j = 0; j < count; j++)
{
int index = batch * 100 + j;
writer.write('<');
writer.write(toHex(srcFrom.get(index)));
writer.write("> ");
writer.write('<');
writer.write(toHex(srcTo.get(index)));
writer.write("> ");
writer.write("<");
writer.write(stringToHex(dstString.get(index)));
writer.write(">\n");
}
writeLine(writer, "endbfrange\n");
}
// footer
writeLine(writer, "endcmap");
writeLine(writer, "CMapName currentdict /CMap defineresource pop");
writeLine(writer, "end");
writeLine(writer, "end");
writer.flush();
}
private void writeLine(BufferedWriter writer, String text) throws IOException
{
writer.write(text);
writer.write('\n');
}
private String toHex(int num)
{
return String.format("%04X", num);
}
private String stringToHex(String text)
{
// use of non-BMP code points requires PDF 1.5 or later, otherwise we're limited to UCS-2
StringBuilder sb = new StringBuilder();
for (byte b : text.getBytes(Charsets.UTF_16BE))
{
sb.append(String.format("%02X", b));
}
return sb.toString();
}
}