package org.nines.cleaner; import org.nines.ICustomCleaner; public class NcawCleaner implements ICustomCleaner { public String clean(String archiveName, String content) { if ( archiveName.equals("ncaw") == false) { return content; } // take file line by line. Keep text bound by // <td class="main_text" // or // <td class="notes_text" // ending with </td> // String starters[] = {"<td class=\"main_text\"", "<td class=\"notes_text\""}; String ender = "</td>"; String[] lines = content.split("\n"); StringBuffer finalContent = new StringBuffer(); boolean skip = true; boolean lineHandled = false; for ( int i=0; i<lines.length; i++) { String line = lines[i].trim(); lineHandled = false; // look for </td> when in midst of acceptinging multiline content if ( skip == false && line.contains(ender) ) { int p0 = line.indexOf(ender); line = line.substring(0,p0).trim(); if ( line.length() > 0 ) { finalContent.append(line).append("\n"); } skip = !skip; lineHandled = true; continue; } // look for any of the starters in this line... for ( int s=0; s<starters.length; s++) { String starter = starters[s]; if ( line.contains(starter) ) { int p0 = line.indexOf(starter); int p1 = line.indexOf(">", p0); line = line.substring(p1+1); int p2 = line.indexOf(ender); if ( p2 > -1) { line = line.substring(0,p2).trim(); if ( line.length() > 0) { finalContent.append(line).append("\n"); } } else { line = line.trim(); if (line.length() > 0) { finalContent.append(line).append("\n"); } skip = !skip; } lineHandled = true; break; } } // if not handled yet, append text if we are not skipping if ( lineHandled == false ) { if ( skip == false ) { finalContent.append(line).append("\n"); } } } System.out.println(finalContent.toString()); return finalContent.toString().trim(); } }