/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.alignment.framework.graph; import java.io.BufferedReader; import java.io.FileOutputStream; import java.io.FileReader; import java.io.PrintStream; import java.util.HashMap; import java.util.HashSet; import java.util.TreeSet; import de.tudarmstadt.ukp.alignment.framework.Global; import de.tudarmstadt.ukp.lmf.model.enums.ELanguageIdentifier; public class CreateAlignmentFromGraphOutput { /** *This method creates an alignment from the distances output by the Dijkstra-WSA implementation and outputs them in the desired format */ public static void main(String[] args) { /* GLOBAL SETTINGS */ Global.init(); final String language = ELanguageIdentifier.ENGLISH; try { /*RESOURCE 1*/ boolean synset1 = true; boolean usePos1 = true; final int monoLinkThreshold1 = 1000; final int prefix1 = Global.WN_Synset_prefix; OneResourceBuilder bg_1 = new OneResourceBuilder("uby_release_1_0","root","fortuna", prefix1,language,synset1,usePos1); /*RESOURCE 2*/ boolean synset2 = false; boolean usePos2 = true; final int monoLinkThreshold2 = 2000; final int prefix2 = Global.WKT_EN_prefix; OneResourceBuilder bg_2 = new OneResourceBuilder("uby_release_1_0","root","fortuna", prefix2,language,synset2,usePos2); /*Alignment parameters*/ int depth = 5; // Manually set; exhaustive search can be triggered by depth >20 boolean allowMultiple = true; //allow 1:n alignments boolean alignSingle = false; //allow instant alignment in case of only ine candidate boolean backoff=false; //use a similarity-based backoff file in case no alignment can be found String backoff_file = "target/"+"WN_OW_en_alignment_similarity_Pos_tfidf_nonZero.txt"; createAlignment(bg_1,bg_2,monoLinkThreshold1,monoLinkThreshold2, depth, allowMultiple,alignSingle, backoff, backoff_file); boolean extRef = false; //Use either UBY-Ids or the original IDs for the final alignment file Global.mapAlignmentToUby(bg_1,bg_2,"target/"+bg_1.prefix_string+"_"+bg_2.prefix_string+"_alignment_dwsa_"+(bg_2.pos ? "Pos": "noPos")+"_"+depth+"_"+(allowMultiple? "1toN" :"1to1")+(alignSingle ? "_alignSingle":"")+(backoff ? "_backoff":"")+".txt", extRef); } catch(Exception e) { e.printStackTrace(); } } /** * This method creates an alignment from the distances output by the Dijkstra-WSA implementation */ @Deprecated public static void createAlignmentOldGraphFormat(OneResourceBuilder gb1,OneResourceBuilder gb2, int monoLinkThreshold1, int monoLinkThreshold2, int depth, boolean allowMultiple,boolean alignSingle, boolean backoff, String backoff_file, boolean all_distances, String candidate_file) { HashMap<String,TreeSet<NodeWithDistance> > alignment_results = new HashMap<String, TreeSet<NodeWithDistance>>(); HashMap<String,HashSet<String> > candidates = new HashMap<String, HashSet<String>>(); //Read the candidates and distance files try { FileReader in = new FileReader(candidate_file); BufferedReader input = new BufferedReader(in); FileReader in2 = new FileReader("target/"+gb1.prefix_string+"_"+(gb1.synset?"synset":"sense")+"_"+(gb1.pos ? "Pos":"noPos")+"_relationMLgraph"+"_"+monoLinkThreshold1 +"_MERGED_"+ gb2.prefix_string+"_"+(gb2.synset?"synset":"sense")+"_"+(gb2.pos ? "Pos":"noPos")+"_relationMLgraph"+"_"+monoLinkThreshold2+ "_trivial_result.txt"); BufferedReader input2 = new BufferedReader(in2); FileOutputStream outstream; PrintStream p = null; if(all_distances) { outstream = new FileOutputStream("target/"+gb1.prefix_string+"_"+gb2.prefix_string+"_distances_dwsa_"+(gb2.pos ? "Pos": "noPos")+".txt"); p = new PrintStream( outstream ); p.println("f\t"+gb1.prefix_string+"_"+gb2.prefix_string+"_candidates_"+(gb2.pos ? "Pos": "noPos")+".txt"+"\t"+"DWSA distances"); } String current_id1 =""; String current_id2 =""; String distance =""; String line = ""; String line2 = ""; int i =0; while((line = input.readLine())!=null && (line2 = input2.readLine())!=null) { if(line.startsWith("p") || line.startsWith("f")) { continue; } if(line.startsWith("q") && line2.startsWith("d")) { System.out.println("Source Nodes parsed "+i++); // current_id1 = line.split(" ")[1]; // current_id2 = line.split(" ")[2]; current_id1 = line.split("\t")[0]; current_id2 = line.split("\t")[1]; distance = line2.split(" ")[1]; if(distance.length()>3) { distance = "1000"; } if(all_distances) { p.println(current_id1+"\t"+current_id2+"\t"+distance); continue; } if(alignment_results.get(current_id1)==null) { alignment_results.put(current_id1, new TreeSet<NodeWithDistance>()); } if(distance.length()>3) { NodeWithDistance nwd = new NodeWithDistance(Integer.parseInt(current_id2),1000); alignment_results.get(current_id1).add(nwd); } else { NodeWithDistance nwd = new NodeWithDistance(Integer.parseInt(current_id2), Integer.parseInt(distance)); alignment_results.get(current_id1).add(nwd); } } } input.close(); input2.close(); in.close(); in2.close(); /*HERE THE ACTUAL ANALYISIS BEGINS*/ if(!all_distances) { candidates = new HashMap<String, HashSet<String>>(); outstream = new FileOutputStream("target/"+gb1.prefix_string+"_"+gb2.prefix_string+"_alignment_dwsa_"+(gb2.pos ? "Pos": "noPos")+"_"+depth+"_"+(allowMultiple? "1toN" :"1to1")+(alignSingle ? "_alignSingle":"")+(backoff ? "_backoff":"")+".txt"); p = new PrintStream( outstream ); for(String s : alignment_results.keySet()) { TreeSet<NodeWithDistance> cands = alignment_results.get(s); TreeSet<NodeWithDistance> polled_out = new TreeSet<NodeWithDistance>(); HashSet<NodeWithDistance> targets = new HashSet<NodeWithDistance>(); int observed_d = 0; while(observed_d <=depth) { NodeWithDistance nwd =cands.pollFirst(); if(nwd==null) { break; } polled_out.add(nwd); observed_d = nwd.path_length; if(observed_d<=depth || cands.size() ==1 && alignSingle) { targets.add(nwd); } if(!allowMultiple) { break; } } if(cands!=null && polled_out!= null && !polled_out.isEmpty()) { cands.addAll(polled_out); /*HERE THE OUTPUT BEGINS*/ } for(NodeWithDistance t : targets) { /*Preparation for Backoff*/ if(!candidates.containsKey(s)) { candidates.put(s, new HashSet<String>()); } candidates.get(s).add(t+""); p.println(s+"\t"+t.id+"\t"+t.path_length); } } if(backoff) // We add the alignment from the backoff for this which were not aligned using DWSA { in = new FileReader("target/"+backoff_file) ; input = new BufferedReader(in); while((line = input.readLine())!=null) { if(line.startsWith("f")) { continue; } String id_1 = line.split("\t")[0]; String id_2 = line.split("\t")[1]; String conf = line.split("\t")[2]; if(candidates.containsKey(id_1)) { System.out.println("Already aligned!!"); continue; } p.println(id_1+"\t"+id_2+"\t"+conf); } input.close(); in.close(); } p.close(); } } catch(Exception e) { e.printStackTrace(); } } /** * This method creates an alignment from the distances output by the Dijkstra-WSA implementation */ public static void createAlignment(OneResourceBuilder gb1,OneResourceBuilder gb2, int monoLinkThreshold1, int monoLinkThreshold2, int depth, boolean allowMultiple,boolean alignSingle, boolean backoff, String backoff_file) { HashMap<String,TreeSet<NodeWithDistance> > alignment_results = new HashMap<String, TreeSet<NodeWithDistance>>(); HashMap<String,HashSet<String> > candidates = new HashMap<String, HashSet<String>>(); //Read the candidates and distance files try { FileReader in2 = new FileReader("target/"+gb1.prefix_string+"_"+(gb1.synset?"synset":"sense")+"_"+(gb1.pos ? "Pos":"noPos")+"_relationMLgraph"+"_"+monoLinkThreshold1 +"_MERGED_"+ gb2.prefix_string+"_"+(gb2.synset?"synset":"sense")+"_"+(gb2.pos ? "Pos":"noPos")+"_relationMLgraph"+"_"+monoLinkThreshold2+ "_trivial_result.txt"); BufferedReader input2 = new BufferedReader(in2); FileOutputStream outstream; PrintStream p = null; String current_id1 =""; String current_id2 =""; String distance =""; String line2 = ""; int i =0; while( (line2 = input2.readLine())!=null) { System.out.println("Source Nodes parsed "+i++); // current_id1 = line.split(" ")[1]; // current_id2 = line.split(" ")[2]; current_id1 = line2.split("\t")[0]; current_id2 = line2.split("\t")[1]; distance = line2.split("\t")[2]; if(distance.length()>5) { distance = "1000"; } if(alignment_results.get(current_id1)==null) { alignment_results.put(current_id1, new TreeSet<NodeWithDistance>()); } if(distance.length()>3) { NodeWithDistance nwd = new NodeWithDistance(Integer.parseInt(current_id2),1000); alignment_results.get(current_id1).add(nwd); } else { NodeWithDistance nwd = new NodeWithDistance(Integer.parseInt(current_id2), Integer.parseInt(distance)); alignment_results.get(current_id1).add(nwd); } } input2.close(); in2.close(); /*HERE THE ACTUAL ANALYISIS BEGINS*/ candidates = new HashMap<String, HashSet<String>>(); outstream = new FileOutputStream("target/"+gb1.prefix_string+"_"+gb2.prefix_string+"_alignment_dwsa_"+(gb2.pos ? "Pos": "noPos")+"_"+depth+"_"+(allowMultiple? "1toN" :"1to1")+(alignSingle ? "_alignSingle":"")+(backoff ? "_backoff":"")+".txt"); p = new PrintStream( outstream ); for(String s : alignment_results.keySet()) { TreeSet<NodeWithDistance> cands = alignment_results.get(s); TreeSet<NodeWithDistance> polled_out = new TreeSet<NodeWithDistance>(); HashSet<NodeWithDistance> targets = new HashSet<NodeWithDistance>(); int observed_d = 0; while(observed_d <=depth) { NodeWithDistance nwd =cands.pollFirst(); if(nwd==null) { break; } polled_out.add(nwd); observed_d = nwd.path_length; if(observed_d<=depth || cands.size() ==1 && alignSingle) { targets.add(nwd); } if(!allowMultiple) { break; } } if(cands!=null && polled_out!= null && !polled_out.isEmpty()) { cands.addAll(polled_out); /*HERE THE OUTPUT BEGINS*/ } for(NodeWithDistance t : targets) { /*Preparation for Backoff*/ if(!candidates.containsKey(s)) { candidates.put(s, new HashSet<String>()); } candidates.get(s).add(t+""); p.println(s+"\t"+t.id+"\t"+t.path_length); } } if(backoff) // We add the alignment from the backoff for this which were not aligned using DWSA { in2 = new FileReader(backoff_file) ; input2 = new BufferedReader(in2); while((line2 = input2.readLine())!=null) { if(line2.startsWith("f")) { continue; } String id_1 = line2.split("\t")[0]; String id_2 = line2.split("\t")[1]; String conf = line2.split("\t")[2]; if(candidates.containsKey(id_1)) { System.out.println("Already aligned!!"); continue; } p.println(id_1+"\t"+id_2+"\t"+conf); } } p.close(); } catch(Exception e) { e.printStackTrace(); } } }