package io.lumify.core; import io.lumify.core.ingest.video.VideoFrameInfo; import io.lumify.core.ingest.video.VideoPropertyHelper; import io.lumify.core.ingest.video.VideoTranscript; import io.lumify.core.model.textHighlighting.OffsetItem; import io.lumify.core.model.textHighlighting.VertexOffsetItem; import io.lumify.web.clientapi.model.SandboxStatus; import io.lumify.core.util.GraphUtil; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.json.JSONException; import org.json.JSONObject; import org.securegraph.Authorizations; import org.securegraph.Vertex; import java.util.*; public class EntityHighlighter { public String getHighlightedText(String text, Iterable<Vertex> termMentions, String workspaceId, Authorizations authorizations) { List<OffsetItem> offsetItems = convertTermMentionsToOffsetItems(termMentions, workspaceId, authorizations); return getHighlightedText(text, offsetItems); } // TODO: change to use an InputStream? public static String getHighlightedText(String text, List<OffsetItem> offsetItems) throws JSONException { Collections.sort(offsetItems); StringBuilder result = new StringBuilder(); PriorityQueue<Integer> endOffsets = new PriorityQueue<>(); int lastStart = 0; for (int i = 0; i < offsetItems.size(); i++) { OffsetItem offsetItem = offsetItems.get(i); boolean overlapsPreviousItem = false; if (offsetItem instanceof VertexOffsetItem) { for (int j = 0; j < i; j++) { OffsetItem compareItem = offsetItems.get(j); if (compareItem instanceof VertexOffsetItem && (OffsetItem.getOffset(compareItem.getEnd()) >= OffsetItem.getOffset(offsetItem.getEnd()) || OffsetItem.getOffset(compareItem.getEnd()) > OffsetItem.getOffset(offsetItem.getStart()))) { overlapsPreviousItem = true; offsetItems.remove(i--); break; } } } if (overlapsPreviousItem) { continue; } if (OffsetItem.getOffset(offsetItem.getStart()) < 0 || OffsetItem.getOffset(offsetItem.getEnd()) < 0) { continue; } if (!offsetItem.shouldHighlight()) { continue; } while (endOffsets.size() > 0 && endOffsets.peek() <= OffsetItem.getOffset(offsetItem.getStart())) { int end = endOffsets.poll(); result.append(StringEscapeUtils.escapeHtml(safeSubstring(text, lastStart, end))); result.append("</span>"); lastStart = end; } result.append(StringEscapeUtils.escapeHtml(safeSubstring(text, lastStart, (int) OffsetItem.getOffset(offsetItem.getStart())))); JSONObject infoJson = offsetItem.getInfoJson(); result.append("<span"); result.append(" class=\""); result.append(StringUtils.join(offsetItem.getCssClasses(), " ")); result.append("\""); if (offsetItem.getTitle() != null) { result.append(" title=\""); result.append(StringEscapeUtils.escapeHtml(offsetItem.getTitle())); result.append("\""); } result.append(" data-info=\""); result.append(StringEscapeUtils.escapeHtml(infoJson.toString())); result.append("\""); result.append(">"); endOffsets.add((int) OffsetItem.getOffset(offsetItem.getEnd())); lastStart = (int) OffsetItem.getOffset(offsetItem.getStart()); } while (endOffsets.size() > 0) { int end = endOffsets.poll(); result.append(StringEscapeUtils.escapeHtml(safeSubstring(text, lastStart, end))); result.append("</span>"); lastStart = end; } result.append(StringEscapeUtils.escapeHtml(safeSubstring(text, lastStart))); return result.toString().replaceAll(" ", " "); } public VideoTranscript getHighlightedVideoTranscript(VideoTranscript videoTranscript, Iterable<Vertex> termMentions, String workspaceId, Authorizations authorizations) { List<OffsetItem> offsetItems = convertTermMentionsToOffsetItems(termMentions, workspaceId, authorizations); return getHighlightedVideoTranscript(videoTranscript, offsetItems); } private VideoTranscript getHighlightedVideoTranscript(VideoTranscript videoTranscript, List<OffsetItem> offsetItems) { Map<Integer, List<OffsetItem>> videoTranscriptOffsetItems = convertOffsetItemsToVideoTranscriptOffsetItems(videoTranscript, offsetItems); return getHighlightedVideoTranscript(videoTranscript, videoTranscriptOffsetItems); } private VideoTranscript getHighlightedVideoTranscript(VideoTranscript videoTranscript, Map<Integer, List<OffsetItem>> videoTranscriptOffsetItems) { VideoTranscript result = new VideoTranscript(); int entryIndex = 0; for (VideoTranscript.TimedText videoTranscriptEntry : videoTranscript.getEntries()) { VideoTranscript.TimedText entry = videoTranscript.getEntries().get(entryIndex); List<OffsetItem> offsetItems = videoTranscriptOffsetItems.get(entryIndex); String highlightedText; if (offsetItems == null) { highlightedText = entry.getText(); } else { highlightedText = getHighlightedText(entry.getText(), offsetItems); } result.add(videoTranscriptEntry.getTime(), highlightedText); entryIndex++; } return result; } private Map<Integer, List<OffsetItem>> convertOffsetItemsToVideoTranscriptOffsetItems(VideoTranscript videoTranscript, List<OffsetItem> offsetItems) { Map<Integer, List<OffsetItem>> results = new HashMap<>(); for (OffsetItem offsetItem : offsetItems) { Integer videoTranscriptEntryIndex = getVideoTranscriptEntryIndex(videoTranscript, offsetItem); List<OffsetItem> currentList = results.get(videoTranscriptEntryIndex); if (currentList == null) { currentList = new ArrayList<>(); results.put(videoTranscriptEntryIndex, currentList); } currentList.add(offsetItem); } return results; } private static int getVideoTranscriptEntryIndex(VideoTranscript videoTranscript, OffsetItem offsetItem) { Integer videoTranscriptEntryIndex = null; VideoFrameInfo videoFrameInfo = VideoPropertyHelper.getVideoFrameInfo(offsetItem.getId()); if (videoFrameInfo != null) { videoTranscriptEntryIndex = videoTranscript.findEntryIndexFromStartTime(videoFrameInfo.getFrameStartTime()); } if (videoTranscriptEntryIndex == null) { videoTranscriptEntryIndex = offsetItem.getVideoTranscriptEntryIndex(); } return videoTranscriptEntryIndex; } private static String safeSubstring(String text, int beginIndex) { beginIndex = Math.min(beginIndex, text.length()); return text.substring(beginIndex); } private static String safeSubstring(String text, int beginIndex, int endIndex) { beginIndex = Math.min(beginIndex, text.length()); endIndex = Math.min(endIndex, text.length()); return text.substring(beginIndex, endIndex); } public List<OffsetItem> convertTermMentionsToOffsetItems(Iterable<Vertex> termMentions, String workspaceId, Authorizations authorizations) { ArrayList<OffsetItem> termMetadataOffsetItems = new ArrayList<>(); for (Vertex termMention : termMentions) { String visibility = termMention.getVisibility().getVisibilityString(); SandboxStatus sandboxStatus = GraphUtil.getSandboxStatusFromVisibilityString(visibility, workspaceId); termMetadataOffsetItems.add(new VertexOffsetItem(termMention, sandboxStatus, authorizations)); } return termMetadataOffsetItems; } }