package edu.isi.bmkeg.lapdf.text;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.List;
import edu.isi.bmkeg.lapdf.features.ChunkFeatures;
import edu.isi.bmkeg.lapdf.model.ChunkBlock;
import edu.isi.bmkeg.lapdf.model.LapdfDocument;
import edu.isi.bmkeg.lapdf.model.PageBlock;
import edu.isi.bmkeg.lapdf.model.WordBlock;
import edu.isi.bmkeg.lapdf.model.RTree.RTModelFactory;
import edu.isi.bmkeg.lapdf.model.ordering.SpatialOrdering;
import edu.isi.bmkeg.lapdf.model.spatial.SpatialEntity;
import edu.isi.bmkeg.utils.ReadWriteTextFileWithEncoding;
public class SpatialLayoutFeaturesReportGenerator implements TextWriter
{
private StringBuilder sb;
public SpatialLayoutFeaturesReportGenerator() throws IOException
{
sb = new StringBuilder();
}
private StringBuilder writeFeatures(StringBuilder sb, ChunkBlock chunk, PageBlock page){
List<SpatialEntity> words = page.containsByType(chunk,
SpatialOrdering.MIXED_MODE, WordBlock.class);
WordBlock word;
if(chunk.readNumberOfLine()==1||words.size()==1){//possibly a section heading line
sb.append("\n-POSSIBLE SECTION HEADING-\n");
sb.append("\nChunk text: "+chunk.readChunkText());
}
sb.append("\nMost popular font "+chunk.getMostPopularWordFont());
sb.append("\nMost popular font size "+chunk.getMostPopularWordStyle());
sb.append("\nMost popular word height "+chunk.getMostPopularWordHeight());
sb.append("\nNumber of Lines "+chunk.readNumberOfLine());
sb.append("\nAlignment "+chunk.readLeftRightMedLine());
return sb;
}
@Override
public void write(LapdfDocument doc, String outputFilename) throws IOException,FileNotFoundException
{
PageBlock page;
List<ChunkBlock> chunks;
int totalNumberOfPages = doc.getTotalNumberOfPages();
for (int i = 1; i <= totalNumberOfPages; i++) {
sb.append("\n\n--------------------------------------------------------------------------");
sb.append("--------------------PAGE: "+i+"------------------------\n\n");
page = doc.getPage(i);
chunks = page.getAllChunkBlocks(SpatialOrdering.COLUMN_AWARE_MIXED_MODE);
sb.append("\nNumber of Blocks="+chunks.size());
int chunkCounter = 1;
for(ChunkBlock chunk : chunks){
sb.append("\n--------------------TEXT BLOCK:"+chunkCounter+"------------------------");
sb = writeFeatures(sb,chunk,page);
chunkCounter++;
}
}
ReadWriteTextFileWithEncoding.write(outputFilename, TextWriter.UTF_8, sb.toString());
}
}