package com.yahoo.dtf.actions.event; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import com.yahoo.dtf.actions.Action; import com.yahoo.dtf.actions.event.Attribute.FieldType; import com.yahoo.dtf.exception.DTFException; import com.yahoo.dtf.exception.ParseException; import com.yahoo.dtf.query.QueryFactory; import com.yahoo.dtf.query.QueryIntf; import com.yahoo.dtf.query.TxtQuery; import com.yahoo.dtf.recorder.RecorderBase; import com.yahoo.dtf.recorder.RecorderFactory; import com.yahoo.dtf.recorder.TextRecorder; import com.yahoo.dtf.storage.StorageIntf; import com.yahoo.dtf.util.CLIUtil; import com.yahoo.dtf.util.StringUtil; /** * @dtf.tag sort * * @dtf.since 1.0 * @dtf.author Rodney Gomes * * @dtf.tag.desc This tag will sort an already existing recording of events * by the field specified and will rewrite the file using the * same previously recorded format. In the process of sorting * you can also filter out the fields that no longer interest you * and save time in the whole sorting process. * * @dtf.tag.example * */ public class Sort extends Action { /** * @dtf.attr input * @dtf.attr.desc the uri of the input of events to be read from. */ private String input = null; /** * @dtf.attr output * @dtf.attr.desc the uri of the output of events to be written to. */ private String output = null; /** * @dtf.attr type * @dtf.attr.desc the type of events that were recorded. */ private String type = null; /** * @dtf.attr event * @dtf.attr.desc the exact event name if you wish to order (and filter) on * a specific set of events. */ private String event = null; /** * @dtf.attr encoding * @dtf.attr.desc identify any specific character encoding used by this * event file. */ private String encoding = null; public Sort() { } public void execute() throws DTFException { int maxevents = 10000; QueryIntf query = QueryFactory.getQuery(getType()); final ArrayList<Field> fields = findActions(Field.class); Select select = (Select)findFirstAction(Select.class); String storage = getInput().getHost(); query.open(getInput(), (select == null ? null : select.findActions(Field.class)), null, null, getEvent(), "UTF-8"); int count = 0, parts = 0; HashMap<String,String> map = null; final ArrayList<String> rFields = new ArrayList<String>(); final ArrayList<FieldType> rTypes = new ArrayList<FieldType>(); for (Field field : fields) { FieldType type = field.getType(); rTypes.add(type); } /* * Comparator function to compare event data */ Comparator<HashMap<String, String>> cmp = new Comparator<HashMap<String,String>>() { public int compare(HashMap<String,String> o1, HashMap<String,String> o2) { int cnt = 0; for (String fname : rFields) { String value1 = o1.get(fname); String value2 = o2.get(fname); FieldType type = rTypes.get(cnt); int cmp = 0; if ( type == null ) { cmp = StringUtil.naturalCompare(value1, value2); } else if ( type == FieldType.STRING ) { cmp = value1.compareTo(value2); } else if ( type == FieldType.INTEGER ) { cmp = Integer.parseInt(value1) - Integer.parseInt(value2); } if ( cmp != 0 ) return cmp; cnt++; } return 0; } }; List<HashMap<String, String>> events = new ArrayList<HashMap<String,String>>(); final String eventname = getEvent(); String filename = getInput().getPath(); for (Field field : fields) rFields.add(eventname + "." + field.getName()); while ( (map = query.next(false)) != null ) { int where = Collections.binarySearch(events, map, cmp); if ( where < 0 ) events.add((-where)-1, map); else events.add(where, map); if ( count > maxevents ) { // enough events to create a secondary file String path = "storage://" + storage + "/" + filename + ".tmp-" + parts; record(path, events); events = new ArrayList<HashMap<String,String>>(); count = 0; parts++; } count++; } // events left to record if ( count > 0 ) { String path = "storage://" + storage + "/" + filename + ".tmp-" + parts; record(path,events); events = new ArrayList<HashMap<String,String>>(); parts++; } // now the file is partly ordered within the other files we must read // back from each of those files while recreating the newly sorted file URI uri; try { uri = new URI("storage://" + storage + "/" + filename + ".sort_tmp"); } catch (URISyntaxException e) { throw new DTFException("URI parsing issue.",e); } String sorted_filename = uri.getPath(); RecorderBase output = RecorderFactory.getRecorder(getType(), uri, false, getEncoding()); TxtQuery[] inputs = new TxtQuery[parts]; for (int i = 0; i < parts; i++) { inputs[i] = new TxtQuery(); try { uri = new URI("storage://" + storage + "/" + filename + ".tmp-" + i); } catch (URISyntaxException e) { throw new DTFException("URI parsing issue.",e); } inputs[i].open(uri, null, null, null, null); } boolean available = true; HashMap<String, String>[] heads = new HashMap[parts]; long eventcount = 0; output.start(); while ( available ) { available = false; HashMap<String, String> min = null; int index = -1; for (int i = 0; i < parts; i++) { if (inputs[i] != null) { if ( heads[i] == null ) { heads[i] = inputs[i].next(false); if ( heads[i] == null ) { inputs[i].close(); inputs[i] = null; } } if ( heads[i] != null ) { available = true; if ( min == null || cmp.compare(heads[i], min) < 0 ) { min = heads[i]; index = i; } } } } if ( index != -1 ) { heads[index] = null; // we have our smallest line lets output it com.yahoo.dtf.recorder.Event event = CLIUtil.hashMapToEvent(min); output.record(event); eventcount++; } } output.stop(); StorageIntf store = getStorageFactory().getStorage(storage); for (int i = 0; i < parts; i++) { store.delete(filename + ".tmp-" + i); } // last step move the newly sorted file over the old one. store.move(sorted_filename, getOutput().getPath()); } private void record(String uripath, List<HashMap<String, String>> events) throws DTFException { URI uri; try { uri = new URI(uripath); } catch (URISyntaxException e) { throw new DTFException("URI parsing issue.",e); } TextRecorder recorder = new TextRecorder(uri, false); recorder.start(); for (HashMap<String, String> aux : events) { com.yahoo.dtf.recorder.Event event = CLIUtil.hashMapToEvent(aux); recorder.record(event); } recorder.stop(); } public URI getOutput() throws ParseException { return parseURI(output); } public void setOutput(String output) { this.output = output; } public URI getInput() throws ParseException { return parseURI(input); } public void setInput(String input) { this.input = input; } public String getType() throws ParseException { return replaceProperties(type); } public void setType(String type) { this.type = type; } public String getEvent() { return event; } public void setEvent(String event) { this.event = event; } public String getEncoding() { return encoding; } public void setEncoding(String encoding) { this.encoding = encoding; } }