package i5.las2peer.services.ocd.adapters.graphInput; import java.io.File; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import i5.las2peer.services.ocd.adapters.AdapterException; import i5.las2peer.services.ocd.graphs.CustomGraph; import i5.las2peer.services.ocd.preprocessing.TextProcessor; import i5.las2peer.services.ocd.utils.DocIndexer; import y.base.Edge; import y.base.Node; public class XMLGraphInputAdapter extends AbstractGraphInputAdapter{ public XMLGraphInputAdapter(){ } ///////////////// ////Variables//// ///////////////// /** * Variable for the beginning of the date interval, the posts have to be issued in */ private Date startDate = null; /** * Variable for the beginning of the date interval, the posts have to be issued in */ private Date endDate = null; private String filePath = "ocd/test/input/stackexAcademia.xml"; private String indexPath = null; public void setParameter(Map<String,String> param) throws IllegalArgumentException, ParseException{ SimpleDateFormat df = new SimpleDateFormat ("yyyy-MM-dd"); if(param.containsKey("startDate")){ startDate = df.parse(param.get("startDate")); } if(param.containsKey("endDate")){ endDate = df.parse(param.get("endDate")); } if(param.containsKey("filePath")){ filePath = param.get("filePath"); } if(param.containsKey("indexPath")){ indexPath = param.get("indexPath"); } } public CustomGraph readGraph() throws AdapterException{ CustomGraph graph = new CustomGraph(); TextProcessor textProc = new TextProcessor(); Map<String, Node> nodeNames = new HashMap<String, Node>(); Map<String, Node> nodeIds = new HashMap<String, Node>(); Map<String, String> nodeContents = new HashMap<String, String>(); Map<Node,HashMap<String,Integer>> links = new HashMap<Node, HashMap<String,Integer>>(); SimpleDateFormat df = new SimpleDateFormat ("yyyy-MM-dd"); try { if(indexPath == null){ throw new AdapterException("No path for saving index"); } graph.setPath(indexPath); File file = new File(filePath); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder; docBuilder = factory.newDocumentBuilder(); Document doc = docBuilder.parse(file); Element docElement = doc.getDocumentElement(); NodeList nodeList = docElement.getElementsByTagName("row"); int length = nodeList.getLength(); for(int i = 0; i < length; i++){ Element e = (Element) nodeList.item(i); Date d = df.parse(e.getAttribute("CreationDate")); if(!((startDate != null && d.before(startDate)) || (endDate != null && d.after(endDate)))){ Node node; String customNodeName = e.getAttribute("OwnerUserId"); if(customNodeName == ""){ customNodeName = textProc.deletWhiteSpace(e.getAttribute("OwnerDisplayName")); } String customNodeContent = textProc.preprocText(e.getAttribute("Body")); String customNodeId = e.getAttribute("Id"); String customNodeParent = e.getAttribute("ParentId"); // node does not yet exist if(!nodeNames.containsKey(customNodeName)){ node = graph.createNode(); //create new node and add attributes graph.setNodeName(node , customNodeName); nodeIds.put(customNodeId, node); nodeContents.put(customNodeName, customNodeContent); //graph.setNodeContent(node, customNodeContent); if(customNodeParent != ""){ HashMap<String,Integer> temp = new HashMap<String,Integer>(); temp.put(customNodeParent,1); // initialize structural weights (number of connections between two nodes) links.put(node, temp); } // temporarly save nodes connections to other nodes nodeNames.put(customNodeName, node); // node is already create, so content has to be added }else{ node = nodeNames.get(customNodeName); // get respective node //customNodeContent = customNodeContent + " " + graph.getNodeContent(node); //add further content to the nodes attribute nodeContents.merge(customNodeName, " " + customNodeContent, String::concat); //graph.setNodeContent(node, customNodeContent); if(!nodeIds.containsKey(customNodeId)){ nodeIds.put(customNodeId, node); } if(customNodeParent != ""){ HashMap<String,Integer> temp; if(links.get(node) == null){ temp = new HashMap<String,Integer>(); }else{ temp = links.get(node); // get connections of the node } if (temp.containsKey(customNodeParent)) { int r = temp.get(customNodeParent); // increase weight if link already exists r++; temp.put(customNodeParent,r); }else{ temp.put(customNodeParent, 1); // add new link and initialize weight } links.put(node, temp); } } } } DocIndexer di = new DocIndexer(graph.getPath()); //create lucene index for content for(Entry<String,String> e : nodeContents.entrySet()){ di.indexDocPerField(e.getKey(), e.getValue()); } //create edges for each entry in the temporary edge list for(Entry<Node, HashMap<String,Integer>> entry : links.entrySet()){ Node curr = entry.getKey(); HashMap<String,Integer> list = entry.getValue(); for(Entry<String,Integer> e : list.entrySet()){ if(nodeIds.containsKey(e.getKey())){ Edge edge = graph.createEdge(curr, nodeIds.get(e.getKey())); graph.setEdgeWeight(edge, e.getValue()); } } } } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (DOMException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } return graph; } }