/* Copyright 2004 Ryan Ackley
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.parse.msword.chp;
import java.util.List;
import java.util.ArrayList;
import java.io.OutputStream;
import java.io.IOException;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hwpf.model.io.*;
import org.apache.poi.hwpf.model.*;
/**
* This class holds all of the character formatting properties from a Word
* 6.0/95 document.
*
* @author Ryan Ackley
*/
public class Word6CHPBinTable
{
/** List of character properties.*/
ArrayList _textRuns = new ArrayList();
/**
* Constructor used to read a binTable in from a Word document.
*
* @param documentStream The POIFS "WordDocument" stream from a Word document
* @param offset The offset of the Chp bin table in the main stream.
* @param size The size of the Chp bin table in the main stream.
* @param fcMin The start of text in the main stream.
*/
public Word6CHPBinTable(byte[] documentStream, int offset,
int size, int fcMin, TextPieceTable tpt)
{
PlexOfCps binTable = new PlexOfCps(documentStream, offset, size, 2);
int length = binTable.length();
for (int x = 0; x < length; x++)
{
GenericPropertyNode node = binTable.getProperty(x);
int pageNum = LittleEndian.getShort((byte[])node.getBytes());
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
pageOffset, fcMin, tpt);
int fkpSize = cfkp.size();
for (int y = 0; y < fkpSize; y++)
{
_textRuns.add(cfkp.getCHPX(y));
}
}
}
public List getTextRuns()
{
return _textRuns;
}
}