package i5.las2peer.services.ocd.adapters.graphInput; import java.io.IOException; import java.io.Reader; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import i5.las2peer.services.ocd.adapters.AdapterException; import i5.las2peer.services.ocd.adapters.Adapters; import i5.las2peer.services.ocd.graphs.CustomGraph; import i5.las2peer.services.ocd.preprocessing.TextProcessor; import i5.las2peer.services.ocd.utils.DocIndexer; import y.base.Edge; import y.base.Node; import y.base.NodeCursor; /** * A graph input adapter for a node list which includes a content attribute for each node and edges in form of a * threadid or a specified sender and receiver. * Each line must contain either a author attribute (e.g. a user name we can use as node name), a content attribute and a threadid attribute, * or a sender attribute used as the user name, a receiver attribute to compute the edges and a content attribute. * There can be several lines for one user, so that the content will simply be attached. * In the first line the attribute names have to be specified. * @author Sabrina */ public class NodeContentEdgeListGraphInputAdapter extends AbstractGraphInputAdapter{ ///////////////// ////Variables//// ///////////////// /** * Variable for the beginning of the date interval, the posts have to be issued in */ private Date startDate = null; /** * Variable for the beginning of the date interval, the posts have to be issued in */ private Date endDate = null; private String path = "C:\\indexes\\pgsql"; public NodeContentEdgeListGraphInputAdapter(){ } public NodeContentEdgeListGraphInputAdapter(Reader reader){ this.reader = reader; } public void setParameter(Map<String,String> param) throws IllegalArgumentException, ParseException{ SimpleDateFormat df = new SimpleDateFormat ("yyyy-MM-dd"); if(param.containsKey("startDate")){ startDate = df.parse(param.get("startDate")); } if(param.containsKey("endDate")){ endDate = df.parse(param.get("endDate")); } if(param.containsKey("path")){ path = param.get("path"); } } @Override public CustomGraph readGraph() throws AdapterException { CustomGraph graph = new CustomGraph(); try{ List<String> line = Adapters.readLine(reader); int nameIndex = -1; int contentIndex = -1; int senderIndex = -1; int receiverIndex = -1; int threadIndex = -1; int dateIndex = -1; int index = 0; if(line.size() <= 0){ throw new AdapterException("Input format invalid"); } for(Iterator<String> it = line.iterator(); it.hasNext();){ String curr = it.next(); switch(curr.toUpperCase()){ case "AUTHOR": nameIndex = index; break; case "CONTENT": contentIndex = index; break; case "SENT_BY": senderIndex = index; break; case "REPLIES_TO": receiverIndex = index; break; case "THREAD_ID": threadIndex = index; break; case "DATE": dateIndex = index; break; } index++; } if(nameIndex == -1 && senderIndex == -1){ throw new AdapterException("No name attribute"); } if(contentIndex == -1){ throw new AdapterException("No content attribute"); } if(receiverIndex == -1){ if(threadIndex == -1){ throw new AdapterException("No attribute to generate links"); }else{ graph = readThreadGraph(nameIndex, contentIndex, dateIndex, threadIndex); } }else{ graph = readSenderReceiverGraph(senderIndex, receiverIndex, contentIndex, dateIndex, line.size()); } }catch(Exception e){ throw new AdapterException(e); } finally { try { reader.close(); } catch (Exception e) { } } return graph; } private CustomGraph readSenderReceiverGraph(int senderIndex, int receiverIndex, int contentIndex, int dateIndex, int lineLength) throws IOException, AdapterException { TextProcessor textProc = new TextProcessor(); Map<String, Node> nodeNames = new HashMap<String, Node>(); Map<String, String> nodeContents = new HashMap<String, String>(); CustomGraph graph = new CustomGraph(); Map<Node,HashMap<String,Integer>> links = new HashMap<Node, HashMap<String,Integer>>(); SimpleDateFormat df = new SimpleDateFormat ("yyyy-MM-dd"); try{ // read first content line List<String> line = Adapters.readLineTabIgnoreLineBreak(reader,lineLength); graph.setPath(path); // create nodes while(line.size() > 0){ Date d = df.parse(line.get(dateIndex)); /*if(startDate != null || endDate != null){ if(d.after(endDate)){ //assuming that we have a dataset sorted according to date break; } }*/ if(!((startDate != null && d.before(startDate)) || (endDate != null && d.after(endDate)))){ Node node; String customNodeName = line.get(senderIndex); String customNodeContent = textProc.preprocText(line.get(contentIndex)); String customNodeReceiver = line.get(receiverIndex); // node does not yet exist if(!nodeNames.containsKey(customNodeName)){ node = graph.createNode(); //create new node and add attributes graph.setNodeName(node , customNodeName); nodeContents.put(customNodeName, customNodeContent); //graph.setNodeContent(node, customNodeContent); HashMap<String,Integer> temp = new HashMap<String,Integer>(); temp.put(customNodeReceiver,1); // initialize structural weights (number of connections between two nodes) links.put(node, temp); // temporarly save nodes connections to other nodes nodeNames.put(customNodeName, node); // node is already create, so content has to be added }else{ node = nodeNames.get(customNodeName); // get respective node //customNodeContent = customNodeContent + " " + graph.getNodeContent(node); //add further content to the nodes attribute nodeContents.merge(customNodeName, " " + customNodeContent, String::concat); //graph.setNodeContent(node, customNodeContent); HashMap<String,Integer> temp = links.get(node); // get connections of the node if (temp.containsKey(customNodeReceiver)) { int r = temp.get(customNodeReceiver); // increase weight if link already exists r++; temp.put(customNodeReceiver,r); }else{ temp.put(customNodeReceiver, 1); // add new link and initialize weight } links.put(node, temp); } } //read next content line line = Adapters.readLineTabIgnoreLineBreak(reader,lineLength); } DocIndexer di = new DocIndexer(graph.getPath()); //create lucene index for content for(Entry<String,String> e : nodeContents.entrySet()){ di.indexDocPerField(e.getKey(), e.getValue()); } //create edges for each entry in the temporary edge list for(Entry<Node, HashMap<String,Integer>> entry : links.entrySet()){ Node curr = entry.getKey(); HashMap<String,Integer> list = entry.getValue(); for(Entry<String,Integer> e : list.entrySet()){ if(nodeNames.containsKey(e.getKey())){ Edge edge = graph.createEdge(curr, nodeNames.get(e.getKey())); graph.setEdgeWeight(edge, e.getValue()); } } } }catch(Exception e){ throw new AdapterException(e); } finally { try { reader.close(); } catch (Exception e) { } } return graph; } private CustomGraph readThreadGraph(int nameIndex, int contentIndex, int dateIndex, int threadIndex) throws IOException, AdapterException { TextProcessor textProc = new TextProcessor(); Map<String, Node> nodeNames = new HashMap<String, Node>(); Map<String,String> nodeContents = new HashMap<String,String>(); Map<Node, LinkedList<String>> nodeThreads = new HashMap<Node, LinkedList<String>>(); CustomGraph graph = new CustomGraph(); SimpleDateFormat df = new SimpleDateFormat ("yyyy-MM-dd"); try{ List<String> line = Adapters.readLineTab(reader); graph.setPath(path); // create nodes while(line.size() > 0){ Date d = df.parse(line.get(dateIndex)); if(!((startDate != null && d.before(startDate)) || (endDate != null && d.after(endDate)))){ Node node; String customNodeName = textProc.deletWhiteSpace(line.get(nameIndex)); String customNodeContent = textProc.preprocText(line.get(contentIndex)); String customNodeThread = line.get(threadIndex); if(!nodeNames.containsKey(customNodeName)){ node = graph.createNode(); graph.setNodeName(node , customNodeName); nodeContents.put(customNodeName, customNodeContent); //graph.setNodeContent(node, customNodeContent); nodeNames.put(customNodeName, node); LinkedList<String> th = new LinkedList<String>(); th.add(customNodeThread); nodeThreads.put(node, th); }else{ node = nodeNames.get(customNodeName); //customNodeContent = customNodeContent + " " + graph.getNodeContent(node); //graph.setNodeContent(node, customNodeContent); nodeContents.merge(customNodeName, " " + customNodeContent, String::concat); LinkedList<String> thr = nodeThreads.get(node); thr.add(customNodeThread); nodeThreads.put(node, thr); } } line = Adapters.readLineTab(reader); } DocIndexer di = new DocIndexer(graph.getPath()); //create lucene index for content for(Entry<String,String> e : nodeContents.entrySet()){ di.indexDocPerField(e.getKey(), e.getValue()); } //create edges for each entry in the temporary edge list for(Entry<Node, LinkedList<String>> entry : nodeThreads.entrySet()){ Node curr = entry.getKey(); LinkedList<String> list = entry.getValue(); for(String str:list){ for(Entry<Node, LinkedList<String>> reciever : nodeThreads.entrySet()){ if(curr != reciever.getKey() && reciever.getValue().contains(str)){ graph.createEdge(curr, reciever.getKey()); //graph.setEdgeWeight(edge, reciever.getValue()); } } } } }catch(Exception e){ throw new AdapterException(e); } finally { try { reader.close(); } catch (Exception e) { } } return graph; } }