package edu.isi.bmkeg.lapdf.text; import java.io.FileNotFoundException; import java.io.IOException; import java.util.List; import edu.isi.bmkeg.lapdf.model.ChunkBlock; import edu.isi.bmkeg.lapdf.model.LapdfDocument; import edu.isi.bmkeg.lapdf.model.PageBlock; import edu.isi.bmkeg.lapdf.model.ordering.SpatialOrdering; import edu.isi.bmkeg.utils.ReadWriteTextFileWithEncoding; public class SpatiallyOrderedChunkTextWriter implements TextWriter { private StringBuilder text; public SpatiallyOrderedChunkTextWriter() { } @Override public void write(LapdfDocument document, String outputFilename) throws IOException,FileNotFoundException { text = new StringBuilder(); int totalNumberOfPages = document.getTotalNumberOfPages(); PageBlock page; for (int i = 1; i <= totalNumberOfPages; i++) { page = document.getPage(i); List<ChunkBlock> chunksPerPage = page.getAllChunkBlocks(SpatialOrdering.PAGE_COLUMN_AWARE_MIXED_MODE); for(ChunkBlock chunkBlock:chunksPerPage){ if(!chunkBlock.getType().equals(ChunkBlock.TYPE_FOOTER)&&!chunkBlock.getType().equals(ChunkBlock.TYPE_HEADER)){ text.append(chunkBlock.readChunkText() + "\n"); } } } ReadWriteTextFileWithEncoding.write(outputFilename, TextWriter.UTF_8, text.toString()); } }