package com.yahoo.dtf.actions.file; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Map.Entry; import com.yahoo.dtf.actions.Action; import com.yahoo.dtf.exception.DTFException; import com.yahoo.dtf.recorder.Event; import com.yahoo.dtf.recorder.Recorder; import com.yahoo.dtf.storage.StorageFactory; /** * @dtf.tag diff * * @dtf.since 1.0 * @dtf.author Rodney Gomes * * @dtf.tag.desc This tag is used for calculating the differences between two * or more files. The tag will throw events of the type diffs.xxx * that will contain the various differences between the files and * will throw one final diff.xxx event that contains the * summarized information for all of the differences between those * files. See the Event information below for more detalis. * * @dtf.event diff * @dtf.event.attr difflines * @dtf.event.attr.desc The total number of lines that are different between * the files being diffed. * * @dtf.event diff * @dtf.event.attr totallines * @dtf.event.attr.desc The total number of lines in that were compared during * the diff calculation. * * @dtf.event diff * @dtf.event.attr totalfiles * @dtf.event.attr.desc The total number of files that were compared during the * diff calculation. * * @dtf.event diff * @dtf.event.attr uri[0-totalfiles].path * @dtf.event.attr.desc The full path for each of of the files that were * compared during the diff calculation. * * @dtf.event diffs * @dtf.event.attr uris * @dtf.event.attr.desc the ids of the URI's that differed (0-totafiles). * * @dtf.event diffs * @dtf.event.attr line[0-totalfiles] * @dtf.event.attr.desc There will exist two of these per diffs event that is * thrown and they will be numbered based on the uris * attribute so that you can easily identify which file * contained which line. * * @dtf.event diffs * @dtf.event.attr linenumber * @dtf.event.attr.desc The line number of where the two files identified by * the uris attribute differ. * * @dtf.tag.example * <diff> * <input uri="storage://OUTPUT/file1"/> * <input uri="storage://OUTPUT/file2"/> * <input uri="storage://OUTPUT/file3"/> * </diff> * * @dtf.tag.example * <diff> * <input uri="storage://OUTPUT/basedata"/> * <input uri="storage://OUTPUT/currentdata"/> * </diff> * * @dtf.tag.example * <sequence> * <record uri="storage://OUTPUT/diffs"> * <diff> * <input uri="storage://OUTPUT/file1"/> * <input uri="storage://OUTPUT/file2"/> * <input uri="storage://OUTPUT/file3"/> * </diff> * </record> * * <log>number of lines that differ: ${diff.difflines}</log> * * <query uri="storage://OUTPUT/diffs" * cursor="differences" * event="diffs"/> * * <iterate cursor="differences"> * <log>files differ at line: ${differences.linenumber}</log> * <for property="id" range="${differences.uris}"> * <log>file ${diff.uri${id}.path} has ${differences.line${id}}</log> * </for> * </iterate> * </sequence> */ public class Diff extends Action { public void execute() throws DTFException { ArrayList<Input> inputs = findActions(Input.class); BufferedReader[] brs = new BufferedReader[inputs.size()]; StorageFactory sf = getStorageFactory(); for (int i = 0; i < inputs.size(); i++) { URI uri = inputs.get(i).getUri(); if ( uri != null ) { brs[i] = sf.getBufferedReader(uri); } else { String data = inputs.get(i).getCDATA(); if ( data != null ) { ByteArrayInputStream bais = new ByteArrayInputStream(data.getBytes()); InputStreamReader reader = new InputStreamReader(bais); brs[i] = new BufferedReader(reader); } } } long MAX = 100; long count = 0; long different = 0; try { try { Event event = new Event("diff"); event.start(); HashMap<Integer,ArrayList<String>> lines = new HashMap<Integer, ArrayList<String>>(); for (int i = 0; i < brs.length; i++) lines.put(i, new ArrayList<String>()); boolean haslines = true; while ( haslines ) { haslines = false; Iterator<Entry<Integer, ArrayList<String>>> entries = lines.entrySet().iterator(); while ( entries.hasNext() ) { Entry<Integer, ArrayList<String>> flines = entries.next(); Integer index = flines.getKey(); ArrayList<String> clines = flines.getValue(); if ( clines.size() < MAX ) { String line = null; while ((line = brs[index].readLine()) != null && clines.size() < MAX ) { haslines = true; clines.add(line); } if ( line != null ) clines.add(line); } else { haslines = true; } } if ( !haslines ) break; ArrayList<Event> diffs = new ArrayList<Event>(); ArrayList<Event> diffs1 = new ArrayList<Event>(); ArrayList<Event> diffs2 = new ArrayList<Event>(); ArrayList<String> blines = lines.get(0); if ( blines.size() == 0 ) { for (int i = 1; i < lines.size(); i++) { ArrayList<String> alines = lines.get(i); while ( alines.size() > 0 ) { Event aux = new Event("diffs"); aux.start(); aux.stop(); aux.addAttribute("uris","0," + i); aux.addAttribute("diff", "<" + alines.get(0)); diffs1.add(aux); diffs2.add(aux); if ( getLogger().isDebugEnabled() ) getLogger().debug("< " + alines.get(0)); alines.remove(0); } } } for (int i = 1; i < lines.size(); i++) { int a,b; ArrayList<String> alines = lines.get(i); // calculate the longest common string int[][] opt = new int[alines.size()+1][blines.size()+1]; for (a = alines.size()-1; a >= 0; a--) { for (b = blines.size()-1; b >= 0; b--) { if (!blines.get(b).equals(alines.get(a))) { opt[a][b] = opt[a+1][b+1] + 1; } else { opt[a][b] = Math.max(opt[a+1][b], opt[a][b+1]); } } } a = 0; b = 0; while (a < alines.size() && b < blines.size()) { count++; if ( alines.get(a).equals(blines.get(b)) ) { a++; b++; } else if ( opt[a+1][b] >= opt[a][b+1] ) { if ( getLogger().isDebugEnabled() ) getLogger().debug("< " + alines.get(a)); Event aux = new Event("diffs"); aux.start(); aux.stop(); aux.addAttribute("uris","0," + i); aux.addAttribute("diff", "<" + alines.get(a)); diffs1.add(aux); a++; } else { if ( getLogger().isDebugEnabled() ) getLogger().debug("> " + blines.get(b)); Event aux = new Event("diffs"); aux.start(); aux.stop(); aux.addAttribute("uris","0," + i); aux.addAttribute("diff", ">" + blines.get(b)); diffs1.add(aux); b++; } } while (a < alines.size() || b < blines.size()) { count++; if (a == alines.size()) { Event aux = new Event("diffs"); aux.start(); aux.stop(); aux.addAttribute("uris","0," + i); aux.addAttribute("diff", ">" + blines.get(b)); diffs1.add(aux); if ( getLogger().isDebugEnabled() ) getLogger().debug("> " + blines.get(b)); b++; } else if (b == blines.size()) { Event aux = new Event("diffs"); aux.start(); aux.stop(); aux.addAttribute("uris","0," + i); aux.addAttribute("diff", "<" + alines.get(a)); diffs1.add(aux); if ( getLogger().isDebugEnabled() ) getLogger().debug("< " + alines.get(a)); a++; } } alines = lines.get(i); ArrayList<String> tlines = null; tlines = blines; blines = alines; alines = tlines; opt = new int[alines.size()+1][blines.size()+1]; // calculate the longest common string for (a = alines.size()-1; a >= 0; a--) { for (b = blines.size()-1; b >= 0; b--) { if (!blines.get(b).equals(alines.get(a))) { opt[a][b] = opt[a+1][b+1] + 1; } else { opt[a][b] = Math.max(opt[a+1][b], opt[a][b+1]); } } } a = 0; b = 0; while (a < alines.size() && b < blines.size()) { if ( alines.get(a).equals(blines.get(b)) ) { a++; b++; } else if ( opt[a+1][b] >= opt[a][b+1] ) { if ( getLogger().isDebugEnabled() ) getLogger().debug("< " + alines.get(a)); Event aux = new Event("diffs"); aux.start(); aux.stop(); aux.addAttribute("uris","0," + i); aux.addAttribute("diff", "<" + alines.get(a)); diffs2.add(aux); a++; } else { if ( getLogger().isDebugEnabled() ) getLogger().debug("> " + blines.get(b)); Event aux = new Event("diffs"); aux.start(); aux.stop(); aux.addAttribute("uris","0," + i); aux.addAttribute("diff", ">" + blines.get(b)); diffs2.add(aux); b++; } } while (a < alines.size() || b < blines.size()) { if (a == alines.size()) { Event aux = new Event("diffs"); aux.start(); aux.stop(); aux.addAttribute("uris","0," + i); aux.addAttribute("diff", ">" + blines.get(b)); diffs2.add(aux); if ( getLogger().isDebugEnabled() ) getLogger().debug("> " + blines.get(b)); b++; } else if (b == blines.size()) { Event aux = new Event("diffs"); aux.start(); aux.stop(); aux.addAttribute("uris","0," + i); aux.addAttribute("diff", "<" + alines.get(a)); diffs2.add(aux); if ( getLogger().isDebugEnabled() ) getLogger().debug("< " + alines.get(a)); a++; } } diffs = (diffs1.size() > diffs2.size() ? diffs2 : diffs1); for (int x = 0; x < a; x++) alines.remove(0); } for (int x = 0; x < diffs.size(); x++) { different++; Recorder recorder = getRecorder(); recorder.record(diffs.get(x)); } blines.clear(); } event.stop(); for (int i = 0; i < brs.length; i++) { URI uri = inputs.get(i).getUri(); if ( uri != null ) { String path = sf.getPath(uri); event.addAttribute("uri" + i + ".path", path); } else { event.addAttribute("uri" + i + ".path", inputs.get(i).getXMLLocation()); } } event.addAttribute("totalfiles", brs.length); event.addAttribute("totallines", count); event.addAttribute("difflines", different); getRecorder().record(event); } finally { for (int i = 0; i < inputs.size(); i++) { if ( brs[i] != null ) brs[i].close(); } } } catch (IOException e) { throw new DTFException("Error reading files.",e); } } }