CmapSubtable.java example

Explorer
PdfBox-Android-master
- library
  - src
- sample
  - src
    - main
      - java
        com
        tom_roush
        pdfbox
        sample
        MainActivity.java
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.tom_roush.fontbox.ttf;

import android.util.Log;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

/**
 * A "cmap" subtable.
 * 
 * @author Ben Litchfield
 */
public class CmapSubtable
{
	private static final long LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
	private static final long SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;

	private int platformId;
	private int platformEncodingId;
	private long subTableOffset;
	private int[] glyphIdToCharacterCode;
    private final Map<Integer, Integer> characterCodeToGlyphId = new HashMap<Integer, Integer>();

	/**
	 * This will read the required data from the stream.
	 * 
	 * @param data The stream to read the data from.
	 * @throws IOException If there is an error reading the data.
	 */
	public void initData(TTFDataStream data) throws IOException
	{
		platformId = data.readUnsignedShort();
		platformEncodingId = data.readUnsignedShort();
		subTableOffset = data.readUnsignedInt();
	}

	/**
	 * This will read the required data from the stream.
	 * 
	 * @param cmap the CMAP this encoding belongs to.
	 * @param numGlyphs number of glyphs.
	 * @param data The stream to read the data from.
	 * @throws IOException If there is an error reading the data.
	 */
	public void initSubtable(CmapTable cmap, int numGlyphs, TTFDataStream data) throws IOException
	{
		data.seek(cmap.getOffset() + subTableOffset);
		int subtableFormat = data.readUnsignedShort();
		long length;
		long version;
		if (subtableFormat < 8)
		{
			length = data.readUnsignedShort();
			version = data.readUnsignedShort();
		}
		else
		{
			// read an other UnsignedShort to read a Fixed32
			data.readUnsignedShort();
			length = data.readUnsignedInt();
			version = data.readUnsignedInt();
		}

		switch (subtableFormat)
		{
		case 0:
			processSubtype0(data);
			break;
		case 2:
			processSubtype2(data, numGlyphs);
			break;
		case 4:
			processSubtype4(data, numGlyphs);
			break;
		case 6:
			processSubtype6(data, numGlyphs);
			break;
		case 8:
			processSubtype8(data, numGlyphs);
			break;
		case 10:
			processSubtype10(data, numGlyphs);
			break;
		case 12:
			processSubtype12(data, numGlyphs);
			break;
		case 13:
			processSubtype13(data, numGlyphs);
			break;
		case 14:
			processSubtype14(data, numGlyphs);
			break;
		default:
			throw new IOException("Unknown cmap format:" + subtableFormat);
		}
	}

	/**
	 * Reads a format 8 subtable.
	 * 
	 * @param data the data stream of the to be parsed ttf font
	 * @param numGlyphs number of glyphs to be read
	 * @throws IOException If there is an error parsing the true type font.
	 */
	protected void processSubtype8(TTFDataStream data, int numGlyphs) throws IOException
	{
		// --- is32 is a 65536 BITS array ( = 8192 BYTES)
		int[] is32 = data.readUnsignedByteArray(8192);
		long nbGroups = data.readUnsignedInt();

		// --- nbGroups shouldn't be greater than 65536
		if (nbGroups > 65536)
		{
			throw new IOException("CMap ( Subtype8 ) is invalid");
		}

		glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs);
		// -- Read all sub header
		for (long i = 0; i < nbGroups; ++i)
		{
			long firstCode = data.readUnsignedInt();
			long endCode = data.readUnsignedInt();
			long startGlyph = data.readUnsignedInt();

			// -- process simple validation
			if (firstCode > endCode || 0 > firstCode)
			{
				throw new IOException("Range invalid");
			}

			for (long j = firstCode; j <= endCode; ++j)
			{
				// -- Convert the Character code in decimal
				if (j > Integer.MAX_VALUE)
				{
					throw new IOException("[Sub Format 8] Invalid Character code");
				}

				int currentCharCode;
				if ((is32[(int) j / 8] & (1 << ((int) j % 8))) == 0)
				{
					currentCharCode = (int) j;
				}
				else
				{
					// the character code uses a 32bits format
					// convert it in decimal : see http://www.unicode.org/faq//utf_bom.html#utf16-4
					long lead = LEAD_OFFSET + (j >> 10);
					long trail = 0xDC00 + (j & 0x3FF);

					long codepoint = (lead << 10) + trail + SURROGATE_OFFSET;
					if (codepoint > Integer.MAX_VALUE)
					{
						throw new IOException("[Sub Format 8] Invalid Character code");
					}
					currentCharCode = (int) codepoint;
				}

				long glyphIndex = startGlyph + (j - firstCode);
				if (glyphIndex > numGlyphs || glyphIndex > Integer.MAX_VALUE)
				{
					throw new IOException("CMap contains an invalid glyph index");
				}

				glyphIdToCharacterCode[(int) glyphIndex] = currentCharCode;
				characterCodeToGlyphId.put(currentCharCode, (int) glyphIndex);
			}
		}
	}

	/**
	 * Reads a format 10 subtable.
	 * 
	 * @param data the data stream of the to be parsed ttf font
	 * @param numGlyphs number of glyphs to be read
	 * @throws IOException If there is an error parsing the true type font.
	 */
	protected void processSubtype10(TTFDataStream data, int numGlyphs) throws IOException
	{
		long startCode = data.readUnsignedInt();
		long numChars = data.readUnsignedInt();
		if (numChars > Integer.MAX_VALUE)
		{
			throw new IOException("Invalid number of Characters");
		}

		if (startCode < 0 || startCode > 0x0010FFFF || (startCode + numChars) > 0x0010FFFF
				|| ((startCode + numChars) >= 0x0000D800 && (startCode + numChars) <= 0x0000DFFF))
		{
			throw new IOException("Invalid Characters codes");

		}
	}

	/**
	 * Reads a format 12 subtable.
	 * 
	 * @param data the data stream of the to be parsed ttf font
	 * @param numGlyphs number of glyphs to be read
	 * @throws IOException If there is an error parsing the true type font.
	 */
	protected void processSubtype12(TTFDataStream data, int numGlyphs) throws IOException
	{
		long nbGroups = data.readUnsignedInt();
		glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs);
		for (long i = 0; i < nbGroups; ++i)
		{
			long firstCode = data.readUnsignedInt();
			long endCode = data.readUnsignedInt();
			long startGlyph = data.readUnsignedInt();

			if (firstCode < 0 || firstCode > 0x0010FFFF ||
					(firstCode >= 0x0000D800 && firstCode <= 0x0000DFFF))
			{
				throw new IOException("Invalid characters codes");
			}

			if ((endCode > 0 && endCode < firstCode) ||
					endCode > 0x0010FFFF ||
					(endCode >= 0x0000D800 && endCode <= 0x0000DFFF))
			{
				throw new IOException("Invalid characters codes");
			}

			for (long j = 0; j <= endCode - firstCode; ++j)
			{
				long glyphIndex = startGlyph + j;
				if (glyphIndex >= numGlyphs)
				{
					throw new IOException("Character Code greater than Integer.MAX_VALUE");
				}

				if (firstCode + j > 0x10FFFF)
				{
					Log.w("PdfBox-Android", "Format 12 cmap contains character beyond UCS-4");
				}
				glyphIdToCharacterCode[(int) glyphIndex] = (int) (firstCode + j);
				characterCodeToGlyphId.put((int) (firstCode + j), (int) glyphIndex);
			}
		}
	}

	/**
	 * Reads a format 13 subtable.
	 * 
	 * @param data the data stream of the to be parsed ttf font
	 * @param numGlyphs number of glyphs to be read
	 * @throws IOException If there is an error parsing the true type font.
	 */
	protected void processSubtype13(TTFDataStream data, int numGlyphs) throws IOException
	{
		long nbGroups = data.readUnsignedInt();
		for (long i = 0; i < nbGroups; ++i)
		{
			long firstCode = data.readUnsignedInt();
			long endCode = data.readUnsignedInt();
			long glyphId = data.readUnsignedInt();

			if (glyphId > numGlyphs)
			{
				Log.w("PdfBox-Android", "Format 13 cmap contains an invalid glyph index");
				break;
			}

			if (firstCode < 0 || firstCode > 0x0010FFFF || (firstCode >= 0x0000D800 && firstCode <= 0x0000DFFF))
			{
				throw new IOException("Invalid Characters codes");
			}

			if ((endCode > 0 && endCode < firstCode) || endCode > 0x0010FFFF
					|| (endCode >= 0x0000D800 && endCode <= 0x0000DFFF))
			{
				throw new IOException("Invalid Characters codes");
			}

			for (long j = 0; j <= endCode - firstCode; ++j)
			{

				if (firstCode + j > Integer.MAX_VALUE)
				{
					throw new IOException("Character Code greater than Integer.MAX_VALUE");
				}
				
				if (firstCode + j > 0x10FFFF)
				{
					Log.w("PdfBox-Android", "Format 13 cmap contains character beyond UCS-4");
				}
				
				glyphIdToCharacterCode[(int) glyphId] = (int) (firstCode + j);
				characterCodeToGlyphId.put((int) (firstCode + j), (int) glyphId);
			}
		}
	}

	/**
	 * Reads a format 14 subtable.
	 * 
	 * @param data the data stream of the to be parsed ttf font
	 * @param numGlyphs number of glyphs to be read
	 * @throws IOException If there is an error parsing the true type font.
	 */
	protected void processSubtype14(TTFDataStream data, int numGlyphs) throws IOException
	{
		// Unicode Variation Sequences (UVS)
		// see http://blogs.adobe.com/CCJKType/2013/05/opentype-cmap-table-ramblings.html
		Log.w("PdfBox-Android", "Format 14 cmap table is not supported and will be ignored");
	}

	/**
	 * Reads a format 6 subtable.
	 * 
	 * @param data the data stream of the to be parsed ttf font
	 * @param numGlyphs number of glyphs to be read
	 * @throws IOException If there is an error parsing the true type font.
	 */
	protected void processSubtype6(TTFDataStream data, int numGlyphs) throws IOException
	{
		int firstCode = data.readUnsignedShort();
		int entryCount = data.readUnsignedShort();
        Map<Integer, Integer> tmpGlyphToChar = new HashMap<Integer, Integer>();
        int[] glyphIdArray = data.readUnsignedShortArray(entryCount);
		for (int i = 0; i < entryCount; i++)
		{
            tmpGlyphToChar.put(glyphIdArray[i], firstCode + i);
            characterCodeToGlyphId.put((firstCode + i), glyphIdArray[i]);
		}
        glyphIdToCharacterCode = newGlyphIdToCharacterCode(
            Collections.max(tmpGlyphToChar.keySet()) + 1);
        for (Entry<Integer, Integer> entry : tmpGlyphToChar.entrySet())
        {
            // link the glyphId with the right character code
            glyphIdToCharacterCode[entry.getKey()] = entry.getValue();
        }
    }

	/**
	 * Reads a format 4 subtable.
	 * 
	 * @param data the data stream of the to be parsed ttf font
	 * @param numGlyphs number of glyphs to be read
	 * @throws IOException If there is an error parsing the true type font.
	 */
	protected void processSubtype4(TTFDataStream data, int numGlyphs) throws IOException
	{
		int segCountX2 = data.readUnsignedShort();
		int segCount = segCountX2 / 2;
		int searchRange = data.readUnsignedShort();
		int entrySelector = data.readUnsignedShort();
		int rangeShift = data.readUnsignedShort();
		int[] endCount = data.readUnsignedShortArray(segCount);
		int reservedPad = data.readUnsignedShort();
		int[] startCount = data.readUnsignedShortArray(segCount);
		int[] idDelta = data.readUnsignedShortArray(segCount);
		int[] idRangeOffset = data.readUnsignedShortArray(segCount);

		Map<Integer, Integer> tmpGlyphToChar = new HashMap<Integer, Integer>();

		long currentPosition = data.getCurrentPosition();

		for (int i = 0; i < segCount; i++)
		{
			int start = startCount[i];
			int end = endCount[i];
			int delta = idDelta[i];
			int rangeOffset = idRangeOffset[i];
			if (start != 65535 && end != 65535)
			{
				for (int j = start; j <= end; j++)
				{
					if (rangeOffset == 0)
					{
						int glyphid = (j + delta) % 65536;
						tmpGlyphToChar.put(glyphid, j);
						characterCodeToGlyphId.put(j, glyphid);
					}
					else
					{
						long glyphOffset = currentPosition + ((rangeOffset / 2) +
								(j - start) + 
								(i - segCount)) * 2;
						data.seek(glyphOffset);
						int glyphIndex = data.readUnsignedShort();
						if (glyphIndex != 0)
						{
							glyphIndex += delta;
							glyphIndex %= 65536;
							if (!tmpGlyphToChar.containsKey(glyphIndex))
							{
								tmpGlyphToChar.put(glyphIndex, j);
								characterCodeToGlyphId.put(j, glyphIndex);
							}
						}
					}
				}
			}
		}

		/*
		 * this is the final result key=glyphId, value is character codes Create an array that contains MAX(GlyphIds)
		 * element, or -1
		 */
		 if (tmpGlyphToChar.isEmpty())
		 {
			 Log.w("PdfBox-Android", "cmap format 4 subtable is empty");
			 return;
		 }
		 glyphIdToCharacterCode = newGlyphIdToCharacterCode(Collections.max(tmpGlyphToChar.keySet()) + 1);
		for (Entry<Integer, Integer> entry : tmpGlyphToChar.entrySet())
		{
			// link the glyphId with the right character code
			glyphIdToCharacterCode[entry.getKey()] = entry.getValue();
		}
	}

	/**
	 * Read a format 2 subtable.
	 * 
	 * @param data the data stream of the to be parsed ttf font
	 * @param numGlyphs number of glyphs to be read
	 * @throws IOException If there is an error parsing the true type font.
	 */
	protected void processSubtype2(TTFDataStream data, int numGlyphs) throws IOException
	{
		int[] subHeaderKeys = new int[256];
		// ---- keep the Max Index of the SubHeader array to know its length
		int maxSubHeaderIndex = 0;
		for (int i = 0; i < 256; i++)
		{
			subHeaderKeys[i] = data.readUnsignedShort();
			maxSubHeaderIndex = Math.max(maxSubHeaderIndex, subHeaderKeys[i] / 8);
		}

		// ---- Read all SubHeaders to avoid useless seek on DataSource
		SubHeader[] subHeaders = new SubHeader[maxSubHeaderIndex + 1];
		for (int i = 0; i <= maxSubHeaderIndex; ++i)
		{
			int firstCode = data.readUnsignedShort();
			int entryCount = data.readUnsignedShort();
			short idDelta = data.readSignedShort();
			int idRangeOffset = data.readUnsignedShort() - (maxSubHeaderIndex + 1 - i - 1) * 8 - 2;
			subHeaders[i] = new SubHeader(firstCode, entryCount, idDelta, idRangeOffset);
		}
		long startGlyphIndexOffset = data.getCurrentPosition();
		glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs);
		for (int i = 0; i <= maxSubHeaderIndex; ++i)
		{
			SubHeader sh = subHeaders[i];
			int firstCode = sh.getFirstCode();
			int idRangeOffset = sh.getIdRangeOffset();
			int idDelta = sh.getIdDelta();
			int entryCount = sh.getEntryCount();
			data.seek(startGlyphIndexOffset + idRangeOffset);
			for (int j = 0; j < entryCount; ++j)
			{
				// ---- compute the Character Code
				int charCode = i;
				charCode = (charCode << 8) + (firstCode + j);

				// ---- Go to the CharacterCOde position in the Sub Array
				// of the glyphIndexArray
				// glyphIndexArray contains Unsigned Short so add (j * 2) bytes
				// at the index position
				int p = data.readUnsignedShort();
				// ---- compute the glyphIndex
				if (p > 0)
				{
					p = (p + idDelta) % 65536;
				}
				glyphIdToCharacterCode[p] = charCode;
				characterCodeToGlyphId.put(charCode, p);
			}
		}
	}

	/**
	 * Initialize the CMapEntry when it is a subtype 0.
	 * 
	 * @param data the data stream of the to be parsed ttf font
	 * @throws IOException If there is an error parsing the true type font.
	 */
	protected void processSubtype0(TTFDataStream data) throws IOException
	{
		byte[] glyphMapping = data.read(256);
		glyphIdToCharacterCode = newGlyphIdToCharacterCode(256);
		for (int i = 0; i < glyphMapping.length; i++)
		{
			int glyphIndex = (glyphMapping[i] + 256) % 256;
			glyphIdToCharacterCode[glyphIndex] = i;
			characterCodeToGlyphId.put(i, glyphIndex);
		}
	}

	/**
	 * Workaround for the fact that glyphIdToCharacterCode doesn't distinguish between
	 * missing character codes and code 0.
	 */
	private int[] newGlyphIdToCharacterCode(int size)
	{
		int[] gidToCode = new int[size];
		Arrays.fill(gidToCode, -1);
		return gidToCode;
	}

	/**
	 * @return Returns the platformEncodingId.
	 */
	public int getPlatformEncodingId()
	{
		return platformEncodingId;
	}

	/**
	 * @param platformEncodingIdValue The platformEncodingId to set.
	 */
	public void setPlatformEncodingId(int platformEncodingIdValue)
	{
		platformEncodingId = platformEncodingIdValue;
	}

	/**
	 * @return Returns the platformId.
	 */
	public int getPlatformId()
	{
		return platformId;
	}

	/**
	 * @param platformIdValue The platformId to set.
	 */
	public void setPlatformId(int platformIdValue)
	{
		platformId = platformIdValue;
	}

	/**
	 * Returns the GlyphId linked with the given character code.
	 *
	 * @param characterCode the given character code to be mapped
	 * @return glyphId the corresponding glyph id for the given character code
	 */
	public int getGlyphId(int characterCode)
	{
		Integer glyphId = characterCodeToGlyphId.get(characterCode);
		return glyphId == null ? 0 : glyphId;
	}

	/**
	 * Returns the character code for the given GID, or null if there is none.
	 *
	 * @param gid glyph id
	 * @return character code
	 */
	public Integer getCharacterCode(int gid)
	{
		if (gid < 0 || gid >= glyphIdToCharacterCode.length)
		{
			return null;
		}
		// workaround for the fact that glyphIdToCharacterCode doesn't distinguish between
		// missing character codes and code 0.
		int code = glyphIdToCharacterCode[gid];
		if (code == -1)
		{
			return null;
		}
		return code;
	}

	@Override
	public String toString()
	{
		return "{" + getPlatformId() + " " + getPlatformEncodingId() + "}";
	}

	/**
	 * 
	 * Class used to manage CMap - Format 2.
	 * 
	 */
	private class SubHeader
	{
		private final int firstCode;
		private final int entryCount;
		/**
		 * used to compute the GlyphIndex : P = glyphIndexArray.SubArray[pos] GlyphIndex = P + idDelta % 65536.
		 */
		private final short idDelta;
		/**
		 * Number of bytes to skip to reach the firstCode in the glyphIndexArray.
		 */
		private final int idRangeOffset;

		private SubHeader(int firstCodeValue, int entryCountValue, short idDeltaValue, int idRangeOffsetValue)
		{
			firstCode = firstCodeValue;
			entryCount = entryCountValue;
			idDelta = idDeltaValue;
			idRangeOffset = idRangeOffsetValue;
		}

		/**
		 * @return the firstCode
		 */
		private int getFirstCode()
		{
			return firstCode;
		}

		/**
		 * @return the entryCount
		 */
		private int getEntryCount()
		{
			return entryCount;
		}

		/**
		 * @return the idDelta
		 */
		private short getIdDelta()
		{
			return idDelta;
		}

		/**
		 * @return the idRangeOffset
		 */
		private int getIdRangeOffset()
		{
			return idRangeOffset;
		}
	}
}