/* * * This file is part of the iText (R) project. Copyright (c) 1998-2017 iText Group NV * Authors: Bruno Lowagie, Paulo Soares, et al. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License version 3 * as published by the Free Software Foundation with the addition of the * following permission added to Section 15 as permitted in Section 7(a): * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY * ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT * OF THIRD PARTY RIGHTS * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Affero General Public License for more details. * You should have received a copy of the GNU Affero General Public License * along with this program; if not, see http://www.gnu.org/licenses or write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA, 02110-1301 USA, or download the license from the following URL: * http://itextpdf.com/terms-of-use/ * * The interactive user interfaces in modified source and object code versions * of this program must display Appropriate Legal Notices, as required under * Section 5 of the GNU Affero General Public License. * * In accordance with Section 7(b) of the GNU Affero General Public License, * a covered work must retain the producer line in every PDF that is created * or manipulated using iText. * * You can be released from the requirements of the license by purchasing * a commercial license. Buying such a license is mandatory as soon as you * develop commercial activities involving the iText software without * disclosing the source code of your own applications. * These activities include: offering paid services to customers as an ASP, * serving PDFs on the fly in a web application, shipping iText with a closed * source product. * * For more information, please contact iText Software Corp. at this * address: sales@itextpdf.com */ package com.itextpdf.text.pdf.codec; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Iterator; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import com.itextpdf.text.error_messages.MessageLocalization; import com.itextpdf.text.pdf.RandomAccessFileOrArray; /** * Class to read a JBIG2 file at a basic level: understand all the segments, * understand what segments belong to which pages, how many pages there are, * what the width and height of each page is, and global segments if there * are any. Or: the minimum required to be able to take a normal sequential * or random-access organized file, and be able to embed JBIG2 pages as images * in a PDF. * * TODO: the indeterminate-segment-size value of dataLength, else? * * @since 2.1.5 */ public class JBIG2SegmentReader { public static final int SYMBOL_DICTIONARY = 0; //see 7.4.2. public static final int INTERMEDIATE_TEXT_REGION = 4; //see 7.4.3. public static final int IMMEDIATE_TEXT_REGION = 6; //see 7.4.3. public static final int IMMEDIATE_LOSSLESS_TEXT_REGION = 7; //see 7.4.3. public static final int PATTERN_DICTIONARY = 16; //see 7.4.4. public static final int INTERMEDIATE_HALFTONE_REGION = 20; //see 7.4.5. public static final int IMMEDIATE_HALFTONE_REGION = 22; //see 7.4.5. public static final int IMMEDIATE_LOSSLESS_HALFTONE_REGION = 23; //see 7.4.5. public static final int INTERMEDIATE_GENERIC_REGION = 36; //see 7.4.6. public static final int IMMEDIATE_GENERIC_REGION = 38; //see 7.4.6. public static final int IMMEDIATE_LOSSLESS_GENERIC_REGION = 39; //see 7.4.6. public static final int INTERMEDIATE_GENERIC_REFINEMENT_REGION = 40; //see 7.4.7. public static final int IMMEDIATE_GENERIC_REFINEMENT_REGION = 42; //see 7.4.7. public static final int IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION = 43; //see 7.4.7. public static final int PAGE_INFORMATION = 48; //see 7.4.8. public static final int END_OF_PAGE = 49; //see 7.4.9. public static final int END_OF_STRIPE = 50; //see 7.4.10. public static final int END_OF_FILE = 51; //see 7.4.11. public static final int PROFILES = 52; //see 7.4.12. public static final int TABLES = 53; //see 7.4.13. public static final int EXTENSION = 62; //see 7.4.14. private final SortedMap<Integer, JBIG2Segment> segments = new TreeMap<Integer, JBIG2Segment>(); private final SortedMap<Integer, JBIG2Page> pages = new TreeMap<Integer, JBIG2Page>(); private final SortedSet<JBIG2Segment> globals = new TreeSet<JBIG2Segment>(); private RandomAccessFileOrArray ra; private boolean sequential; private boolean number_of_pages_known; private int number_of_pages = -1; private boolean read = false; /** * Inner class that holds information about a JBIG2 segment. * @since 2.1.5 */ public static class JBIG2Segment implements Comparable<JBIG2Segment> { public final int segmentNumber; public long dataLength = -1; public int page = -1; public int[] referredToSegmentNumbers = null; public boolean[] segmentRetentionFlags = null; public int type = -1; public boolean deferredNonRetain = false; public int countOfReferredToSegments = -1; public byte[] data = null; public byte[] headerData = null; public boolean page_association_size = false; public int page_association_offset = -1; public JBIG2Segment(int segment_number) { this.segmentNumber = segment_number; } public int compareTo(JBIG2Segment s) { return this.segmentNumber - s.segmentNumber; } } /** * Inner class that holds information about a JBIG2 page. * @since 2.1.5 */ public static class JBIG2Page { public final int page; private final JBIG2SegmentReader sr; private final SortedMap<Integer, JBIG2Segment> segs = new TreeMap<Integer, JBIG2Segment>(); public int pageBitmapWidth = -1; public int pageBitmapHeight = -1; public JBIG2Page(int page, JBIG2SegmentReader sr) { this.page = page; this.sr = sr; } /** * return as a single byte array the header-data for each segment in segment number * order, EMBEDDED organization, but i am putting the needed segments in SEQUENTIAL organization. * if for_embedding, skip the segment types that are known to be not for acrobat. * @param for_embedding * @return a byte array * @throws IOException */ public byte[] getData(boolean for_embedding) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); for (Integer sn : segs.keySet()) { JBIG2Segment s = segs.get(sn); // pdf reference 1.4, section 3.3.6 JBIG2Decode Filter // D.3 Embedded organisation if ( for_embedding && ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) { continue; } if ( for_embedding ) { // change the page association to page 1 byte[] headerData_emb = copyByteArray(s.headerData); if ( s.page_association_size ) { headerData_emb[s.page_association_offset] = 0x0; headerData_emb[s.page_association_offset+1] = 0x0; headerData_emb[s.page_association_offset+2] = 0x0; headerData_emb[s.page_association_offset+3] = 0x1; } else { headerData_emb[s.page_association_offset] = 0x1; } os.write(headerData_emb); } else { os.write(s.headerData); } os.write(s.data); } os.close(); return os.toByteArray(); } public void addSegment(JBIG2Segment s) { segs.put(Integer.valueOf(s.segmentNumber), s); } } public JBIG2SegmentReader(RandomAccessFileOrArray ra ) throws IOException { this.ra = ra; } public static byte[] copyByteArray(byte[] b) { byte[] bc = new byte[b.length]; System.arraycopy(b, 0, bc, 0, b.length); return bc; } public void read() throws IOException { if ( this.read ) { throw new IllegalStateException(MessageLocalization.getComposedMessage("already.attempted.a.read.on.this.jbig2.file")); } this.read = true; readFileHeader(); // Annex D if ( this.sequential ) { // D.1 do { JBIG2Segment tmp = readHeader(); readSegment(tmp); segments.put(Integer.valueOf(tmp.segmentNumber), tmp); } while ( this.ra.getFilePointer() < this.ra.length() ); } else { // D.2 JBIG2Segment tmp; do { tmp = readHeader(); segments.put(Integer.valueOf(tmp.segmentNumber), tmp); } while ( tmp.type != END_OF_FILE ); Iterator<Integer> segs = segments.keySet().iterator(); while ( segs.hasNext() ) { readSegment(segments.get(segs.next())); } } } void readSegment(JBIG2Segment s) throws IOException { int ptr = (int)ra.getFilePointer(); if ( s.dataLength == 0xffffffffl ) { // TODO figure this bit out, 7.2.7 return; } byte[] data = new byte[(int)s.dataLength]; ra.read(data); s.data = data; if ( s.type == PAGE_INFORMATION ) { int last = (int)ra.getFilePointer(); ra.seek(ptr); int page_bitmap_width = ra.readInt(); int page_bitmap_height = ra.readInt(); ra.seek(last); JBIG2Page p = pages.get(Integer.valueOf(s.page)); if ( p == null ) { throw new IllegalStateException(MessageLocalization.getComposedMessage("referring.to.widht.height.of.page.we.havent.seen.yet.1", s.page)); } p.pageBitmapWidth = page_bitmap_width; p.pageBitmapHeight = page_bitmap_height; } } JBIG2Segment readHeader() throws IOException { int ptr = (int)ra.getFilePointer(); // 7.2.1 int segment_number = ra.readInt(); JBIG2Segment s = new JBIG2Segment(segment_number); // 7.2.3 int segment_header_flags = ra.read(); boolean deferred_non_retain = ( segment_header_flags & 0x80 ) == 0x80; s.deferredNonRetain = deferred_non_retain; boolean page_association_size = ( segment_header_flags & 0x40 ) == 0x40; int segment_type = segment_header_flags & 0x3f; s.type = segment_type; //7.2.4 int referred_to_byte0 = ra.read(); int count_of_referred_to_segments = (referred_to_byte0 & 0xE0) >> 5; int[] referred_to_segment_numbers = null; boolean[] segment_retention_flags = null; if ( count_of_referred_to_segments == 7 ) { // at least five bytes ra.seek(ra.getFilePointer() - 1); count_of_referred_to_segments = ra.readInt() & 0x1fffffff; segment_retention_flags = new boolean[count_of_referred_to_segments+1]; int i = 0; int referred_to_current_byte = 0; do { int j = i % 8; if ( j == 0) { referred_to_current_byte = ra.read(); } segment_retention_flags[i] = (0x1 << j & referred_to_current_byte) >> j == 0x1; i++; } while ( i <= count_of_referred_to_segments ); } else if ( count_of_referred_to_segments <= 4 ) { // only one byte segment_retention_flags = new boolean[count_of_referred_to_segments+1]; referred_to_byte0 &= 0x1f; for ( int i = 0; i <= count_of_referred_to_segments; i++ ) { segment_retention_flags[i] = (0x1 << i & referred_to_byte0) >> i == 0x1; } } else if ( count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6 ) { throw new IllegalStateException(MessageLocalization.getComposedMessage("count.of.referred.to.segments.had.bad.value.in.header.for.segment.1.starting.at.2", String.valueOf(segment_number), String.valueOf(ptr))); } s.segmentRetentionFlags = segment_retention_flags; s.countOfReferredToSegments = count_of_referred_to_segments; // 7.2.5 referred_to_segment_numbers = new int[count_of_referred_to_segments+1]; for ( int i = 1; i <= count_of_referred_to_segments; i++ ) { if ( segment_number <= 256 ) { referred_to_segment_numbers[i] = ra.read(); } else if ( segment_number <= 65536 ) { referred_to_segment_numbers[i] = ra.readUnsignedShort(); } else { referred_to_segment_numbers[i] = (int)ra.readUnsignedInt(); // TODO wtf ack } } s.referredToSegmentNumbers = referred_to_segment_numbers; // 7.2.6 int segment_page_association; int page_association_offset = (int)ra.getFilePointer() - ptr; if ( page_association_size ) { segment_page_association = ra.readInt(); } else { segment_page_association = ra.read(); } if ( segment_page_association < 0 ) { throw new IllegalStateException(MessageLocalization.getComposedMessage("page.1.invalid.for.segment.2.starting.at.3", String.valueOf(segment_page_association), String.valueOf(segment_number), String.valueOf(ptr))); } s.page = segment_page_association; // so we can change the page association at embedding time. s.page_association_size = page_association_size; s.page_association_offset = page_association_offset; if ( segment_page_association > 0 && ! pages.containsKey(Integer.valueOf(segment_page_association)) ) { pages.put(Integer.valueOf(segment_page_association), new JBIG2Page(segment_page_association, this)); } if ( segment_page_association > 0 ) { pages.get(Integer.valueOf(segment_page_association)).addSegment(s); } else { globals.add(s); } // 7.2.7 long segment_data_length = ra.readUnsignedInt(); // TODO the 0xffffffff value that might be here, and how to understand those afflicted segments s.dataLength = segment_data_length; int end_ptr = (int)ra.getFilePointer(); ra.seek(ptr); byte[] header_data = new byte[end_ptr - ptr]; ra.read(header_data); s.headerData = header_data; return s; } void readFileHeader() throws IOException { ra.seek(0); byte[] idstring = new byte[8]; ra.read(idstring); byte[] refidstring = {(byte)0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A}; for ( int i = 0; i < idstring.length; i++ ) { if ( idstring[i] != refidstring[i] ) { throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.idstring.not.good.at.byte.1", i)); } } int fileheaderflags = ra.read(); this.sequential = ( fileheaderflags & 0x1 ) == 0x1; this.number_of_pages_known = ( fileheaderflags & 0x2) == 0x0; if ( (fileheaderflags & 0xfc) != 0x0 ) { throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.flags.bits.2.7.not.0")); } if ( this.number_of_pages_known ) { this.number_of_pages = ra.readInt(); } } public int numberOfPages() { return pages.size(); } public int getPageHeight(int i) { return pages.get(Integer.valueOf(i)).pageBitmapHeight; } public int getPageWidth(int i) { return pages.get(Integer.valueOf(i)).pageBitmapWidth; } public JBIG2Page getPage(int page) { return pages.get(Integer.valueOf(page)); } public byte[] getGlobal(boolean for_embedding) { ByteArrayOutputStream os = new ByteArrayOutputStream(); try { for (Object element : globals) { JBIG2Segment s = (JBIG2Segment)element; if ( for_embedding && ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) { continue; } os.write(s.headerData); os.write(s.data); } os.close(); } catch (IOException e) { e.printStackTrace(); } if ( os.size() <= 0 ) { return null; } return os.toByteArray(); } @Override public String toString() { if ( this.read ) { return "Jbig2SegmentReader: number of pages: " + this.numberOfPages(); } else { return "Jbig2SegmentReader in indeterminate state."; } } }