package org.archive.wayback.replay.html.transformer; import java.util.List; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.replay.html.ReplayParseContext; import org.archive.wayback.replay.html.StringTransformer; public class ExtractInsertTransformer implements StringTransformer { public static class Rule { String urlkeyContains; String startAfter; String fromPrefix; String fromBeforeMatch; String untilAny; String insert; boolean insertAtEnd; public String getUrlkeyContains() { return urlkeyContains; } public String getStartAfter() { return startAfter; } public String getFromPrefix() { return fromPrefix; } public String getFromBeforeMatch() { return fromBeforeMatch; } public String getUntilAny() { return untilAny; } public String getInsert() { return insert; } public boolean isInsertAtEnd() { return insertAtEnd; } public void setUrlkeyContains(String urlkeyContains) { this.urlkeyContains = urlkeyContains; } public void setStartAfter(String startAfter) { this.startAfter = startAfter; } public void setFromPrefix(String fromPrefix) { this.fromPrefix = fromPrefix; } public void setFromBeforeMatch(String fromBeforeMatch) { this.fromBeforeMatch = fromBeforeMatch; } public void setUntilAny(String untilAny) { this.untilAny = untilAny; } public void setInsert(String insert) { this.insert = insert; } public void setInsertAtEnd(boolean insertAtEnd) { this.insertAtEnd = insertAtEnd; } } protected List<Rule> rules; protected boolean matchOnce = false; public final static String EXTRACT_INSERT_MATCHED = "_extractormatched"; @Override public String transform(ReplayParseContext context, String input) { if (context.getData(EXTRACT_INSERT_MATCHED) != null) { return input; } for (Rule rule : rules) { // Check urlScope if (rule.urlkeyContains != null) { CaptureSearchResult result = context.getCaptureSearchResult(); if ((result != null) && !result.getUrlKey().contains(rule.urlkeyContains)) { continue; } } int index = input.indexOf(rule.startAfter); if (index < 0) { continue; } index += rule.startAfter.length(); String insertion; if (rule.fromPrefix != null) { boolean matching = true; boolean skipRule = false; while (matching) { index = input.indexOf(rule.fromPrefix, index); if (index < 0) { skipRule = true; break; } if (rule.fromBeforeMatch == null || (isAny(input.charAt(index - 1), rule.fromBeforeMatch))) { matching = false; } index += rule.fromPrefix.length(); } if (skipRule) { continue; } int endIndex = index; while ((endIndex < input.length()) && !isAny(input.charAt(endIndex), rule.untilAny)) { endIndex++; } if (endIndex == index) { continue; } String extract = input.substring(index, endIndex); insertion = String.format(rule.insert, extract); } else { insertion = rule.insert; } if (rule.insertAtEnd) { input = input + insertion; } else { input = insertion + input; } if (matchOnce) { context.putData(EXTRACT_INSERT_MATCHED, EXTRACT_INSERT_MATCHED); break; } } return input; } public boolean isAny(char c, String s) { for (int i = 0; i < s.length(); i++) { if (s.charAt(i) == c) { return true; } } return false; } public List<Rule> getRules() { return rules; } public void setRules(List<Rule> rules) { this.rules = rules; } public boolean isMatchOnce() { return matchOnce; } public void setMatchOnce(boolean matchOnce) { this.matchOnce = matchOnce; } }