package org.juxtasoftware.util; import org.apache.commons.lang.StringEscapeUtils; import eu.interedition.text.Range; public final class FragmentFormatter { private FragmentFormatter(){} public static String format(final String srcFrag, Range origRange, Range contextRange, long maxLen) { // NOTES: // There are cases (like huck fin example) where text like </image blah> is inline // in the plain text witness. The fragment needs to be escaped so this shows, // but the tags for the change itself must NOT be escaped. So... stick in a // odd char string (that wont be esscaped) to represent tag locations in original fragment, then escape it. // This ensures the tags are placed correctly. Next, replace the special strings with // actual tags. Kinda ugly, but it works (until the doc has the odd char sequences in its fragment) // slap in some tags to show where the change is final String omit = " <span class='del'>✖</span>"; final String omitMarker = "|{x}|"; final String change = "<span class='change'>"; final String startMarker = "|{s}|"; final String endTag = "</span>"; final String endMarker = "|{e}|"; long startOffset = origRange.getStart() - contextRange.getStart(); long endOffset = startOffset+origRange.length(); StringBuilder frag = new StringBuilder(srcFrag); if ( origRange.length() == 0 ) { frag.insert((int) startOffset, omitMarker); } else { // find the first whitespace on after the end offset. long orig = endOffset; while ( true ) { // if this is the last pos in the fragment, we're done if ( (int)endOffset == frag.length() ) { break; } else if ( Character.isWhitespace( frag.charAt((int)endOffset)) == false ) { endOffset++; if ( endOffset >= frag.length()) { endOffset = orig; break; } } else { break; } } frag.insert((int) endOffset, endMarker); // find first whitespace on or before start orig = startOffset; long prior = -1; while ( true ) { if ( Character.isWhitespace( frag.charAt((int)startOffset)) == false ) { prior = startOffset; startOffset--; if ( startOffset <= 0) { startOffset = orig; break; } } else { if ( prior != -1) { startOffset = prior; } break; } } frag.insert((int) startOffset, startMarker); } // convert it to a string so the extra spaces can be stripped // Do this AFTER above so positions will be correct, but // before word broundary stuff so the bondaries are correct String out = frag.toString(); out = out.trim(); out = out.replaceAll("\\n ", "\n"); out = out.replaceAll("\\n+", "\n"); out = out.replaceAll("\\n", " / "); out = StringEscapeUtils.escapeHtml(out); out = out.replace(startMarker, change); out = out.replace(endMarker, endTag); out = out.replace(omitMarker, omit); // trim frag so it starts/ends on word boundaries int lastTagPos = out.lastIndexOf('>')+1; if ( lastTagPos < out.length() ) { int endPos = out.lastIndexOf(' '); if ( endPos != -1 ) { out = out.substring(0, (int)Math.max(endPos, lastTagPos)); } } int firstTagPos = out.indexOf('<'); if ( firstTagPos > 0 ) { int startPos = out.indexOf(' '); if ( startPos != -1) { out = out.substring(Math.min(startPos, firstTagPos)); } } // append lead/trail ellipses as needed if (contextRange.getStart() > 0) { out = "..." + out; } if ( contextRange.getEnd() < maxLen) { out = out + "..."; } return out; } }