package net.bitpot.railways.parser.route; import org.jetbrains.annotations.NotNull; import java.util.ArrayList; import java.util.Collection; import java.util.List; /** * @author Basil Gren * on 09.12.2014. */ public abstract class TextChunkHighlighter { public List<TextChunk> highlight(List<TextChunk> textChunks, String highlightSubstr) { highlightSubstr = highlightSubstr.trim(); ArrayList<TextChunk> result = new ArrayList<TextChunk>(); StringBuilder sb = new StringBuilder(); for(TextChunk t: textChunks) sb.append(t.getText()); // First, find all substring regions to be highlighted. List<TextRegion> regions = findSubstringRegions(sb.toString(), highlightSubstr); if (regions == null) return textChunks; // Now go through every TextChunk and break it down if it intersects // with any region. Token type is preserved. for(TextChunk chunk: textChunks) highlightChunk(chunk, regions, result); return result; } /** * Finds all regions of provided string which contain provided substring. * Returns array of arrays. Each item is an array that contains begin and * end offsets of a substring: * * @param s String which will be searched for substring. * @param subStr Substring to find. * @return Array of substring regions (begin and end offsets) or null if * specified substring is empty. */ private List<TextRegion> findSubstringRegions(String s, String subStr) { // Prevent infinite loop if (subStr.equals("")) return null; int startOffset = 0, endOffset; ArrayList<TextRegion> regions = new ArrayList<TextRegion>(); while(startOffset != -1) { startOffset = s.indexOf(subStr, startOffset); if (startOffset != -1) { endOffset = startOffset + subStr.length(); regions.add(new TextRegion(startOffset, endOffset)); startOffset = endOffset; } } return regions; } /** * Creates a set of chunks from the passed one. New chunks are determined * by intersection with passed regions, so that every new chunk that is * inside a region is marked as highlighted. * * @param chunk Text chunk. * @param highlightedRegions A list of regions in original string, which * should be highlighted. * @param chunkList Target chunk collection, that will receive new chunks. */ private void highlightChunk(TextChunk chunk, List<TextRegion> highlightedRegions, Collection<TextChunk> chunkList) { int newChunkSize; int offsRel = 0; // Offset relative to current text chunk // We assume that regions are sorted. for(TextRegion region: highlightedRegions) { // Absolute offset - offset in original string, which text chunk belongs to. int offsAbs = chunk.getBeginOffset() + offsRel; // Skip to the next region if current does not intersect with chunk if (region.endOffset <= offsAbs || chunk.getEndOffset() < region.startOffset) continue; // Get intersection of chunk and region int intersectionBegin = Math.max(offsAbs, region.startOffset); int intersectionEnd = Math.min(chunk.getEndOffset(), region.endOffset); // Now breakdown chunk into parts. // 1st part - between chunk begin and intersection begin newChunkSize = intersectionBegin - offsAbs; if (newChunkSize > 0) { chunkList.add(createChunk( chunk.getText().substring(offsRel, offsRel + newChunkSize), chunk.getType(), offsAbs)); offsRel += newChunkSize; } // 2nd part - intersection itself (highlighted part). newChunkSize = intersectionEnd - intersectionBegin; if (newChunkSize > 0) { TextChunk hlToken = createChunk( chunk.getText().substring(offsRel, offsRel + newChunkSize), chunk.getType(), intersectionBegin); hlToken.setHighlighted(true); chunkList.add(hlToken); offsRel += newChunkSize; } } // the last part - between intersection and chunk end, if it's necessary newChunkSize = chunk.getText().length() - offsRel; if (newChunkSize > 0) { chunkList.add(createChunk( chunk.getText().substring(offsRel, offsRel + newChunkSize), chunk.getType(), chunk.getBeginOffset() + offsRel)); } } @NotNull protected abstract TextChunk createChunk(@NotNull String text, int chunkType, int offsetAbs); private static class TextRegion { public int startOffset; public int endOffset; public TextRegion(int startOffset, int endOffset) { this.startOffset = startOffset; this.endOffset = endOffset; } } }