package com.sun.pdfview.font.cid;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.sun.pdfview.PDFObject;
/*****************************************************************************
* Parses a CMAP and builds a lookup table to map CMAP based codes to unicode.
* This is not a fully functional CMAP parser but a stripped down parser
* that should be able to parse some limited variants of CMAPs that are
* used for the ToUnicode mapping found for some Type0 fonts.
*
* @author Bernd Rosstauscher
* @since 03.08.2011
****************************************************************************/
public class ToUnicodeMap extends PDFCMap {
/*****************************************************************************
* Small helper class to define a code range.
****************************************************************************/
private static class CodeRangeMapping {
char srcStart;
char srcEnd;
CodeRangeMapping(char srcStart, char srcEnd) {
this.srcStart = srcStart;
this.srcEnd = srcEnd;
}
boolean contains(char c) {
return this.srcStart <= c
&& c <= this.srcEnd;
}
}
/*****************************************************************************
* Small helper class to define a char range.
****************************************************************************/
private static class CharRangeMapping {
char srcStart;
char srcEnd;
char destStart;
CharRangeMapping(char srcStart, char srcEnd, char destStart) {
this.srcStart = srcStart;
this.srcEnd = srcEnd;
this.destStart = destStart;
}
boolean contains(char c) {
return this.srcStart <= c
&& c <= this.srcEnd;
}
char map(char src) {
return (char) (this.destStart + (src-this.srcStart));
}
}
private final Map<Character, Character> singleCharMappings;
private final List<CharRangeMapping> charRangeMappings;
private final List<CodeRangeMapping> codeRangeMappings;
/*************************************************************************
* Constructor
* @param map
* @throws IOException
************************************************************************/
public ToUnicodeMap(PDFObject map) throws IOException {
super();
this.singleCharMappings = new HashMap<Character, Character>();
this.charRangeMappings = new ArrayList<CharRangeMapping>();
this.codeRangeMappings = new ArrayList<CodeRangeMapping>();
parseMappings(map);
}
/*************************************************************************
* @param map
* @throws IOException
************************************************************************/
private void parseMappings(PDFObject map) throws IOException {
try {
StringReader reader = new StringReader(new String(map.getStream(), "ASCII"));
BufferedReader bf = new BufferedReader(reader);
String line = bf.readLine();
while (line != null) {
if (line.contains("beginbfchar")) {
parseSingleCharMappingSection(bf);
}
if (line.contains("beginbfrange")) {
parseCharRangeMappingSection(bf);
}
if (line.contains("begincodespacerange")) {
parseCodeRangeMappingSection(bf);
}
line = bf.readLine();
}
} catch (UnsupportedEncodingException e) {
throw new IOException(e);
}
}
/*************************************************************************
* @param bf
* @throws IOException
************************************************************************/
private void parseCharRangeMappingSection(BufferedReader bf) throws IOException {
String line = bf.readLine();
while (line != null) {
if (line.contains("endbfrange")) {
break;
}
parseRangeLine(line);
line = bf.readLine();
}
}
private void parseCodeRangeMappingSection(BufferedReader bf) throws IOException {
String line = bf.readLine();
while (line != null) {
if (line.contains("endcodespacerange")) {
break;
}
parseCodeRangeLine(line);
line = bf.readLine();
}
}
/*************************************************************************
* @param line
* @return
************************************************************************/
private void parseRangeLine(String line) {
String[] mapping = line.split(" ");
if (mapping.length == 3) {
Character srcStart = parseChar(mapping[0]);
Character srcEnd = parseChar(mapping[1]);
Character destStart = parseChar(mapping[2]);
this.charRangeMappings.add(new CharRangeMapping(srcStart, srcEnd, destStart));
}
}
private void parseCodeRangeLine(String line) {
String[] mapping = line.split(" ");
if (mapping.length == 2) {
Character srcStart = parseChar(mapping[0]);
Character srcEnd = parseChar(mapping[1]);
this.codeRangeMappings.add(new CodeRangeMapping(srcStart, srcEnd));
}
}
/*************************************************************************
* @param bf
* @throws IOException
************************************************************************/
private void parseSingleCharMappingSection(BufferedReader bf) throws IOException {
String line = bf.readLine();
while (line != null) {
if (line.contains("endbfchar")) {
break;
}
parseSingleCharMappingLine(line);
line = bf.readLine();
}
}
/*************************************************************************
* @param line
* @return
************************************************************************/
private void parseSingleCharMappingLine(String line) {
String[] mapping = line.split(" ");
if (mapping.length == 2
&& mapping[0].startsWith("<")
&& mapping[1].startsWith("<")) {
this.singleCharMappings.put(parseChar(mapping[0]), parseChar(mapping[1]));
}
}
/*************************************************************************
* Parse a string of the format <0F3A> to a char.
* @param charDef
* @return
************************************************************************/
private Character parseChar(String charDef) {
if (charDef.startsWith("<")) {
charDef = charDef.substring(1);
}
if (charDef.endsWith(">")) {
charDef = charDef.substring(0, charDef.length()-1);
}
try {
int result = Integer.decode("0x" + charDef);
return (char) result;
} catch (NumberFormatException e) {
return (char) ' ';
}
}
/*************************************************************************
* map
* @see com.sun.pdfview.font.cid.PDFCMap#map(char)
************************************************************************/
@Override
public char map(char src) {
Character mappedChar = null;
for (CodeRangeMapping codeRange : this.codeRangeMappings) {
if(codeRange.contains(src)) {
mappedChar = this.singleCharMappings.get(src);
if (mappedChar == null) {
mappedChar = lookupInRanges(src);
}
break;
}
}
if (mappedChar == null) {
// TODO XOND 27.03.2012: PDF Spec. "9.7.6.3Handling Undefined Characters"
mappedChar = 0;
}
return mappedChar;
}
/*************************************************************************
* @param src
* @return
************************************************************************/
private Character lookupInRanges(char src) {
Character mappedChar = null;
for (CharRangeMapping rangeMapping : this.charRangeMappings) {
if (rangeMapping.contains(src)) {
mappedChar = rangeMapping.map(src);
break;
}
}
return mappedChar;
}
}