/*******************************************************************************
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.uby.integration.alignment.xml.transform.sensealignments;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.TreeMap;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Alignments;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Decision;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Decisiontype;
import de.tudarmstadt.ukp.integration.alignment.xml.model.ResourceXml;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Source;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Target;
import de.tudarmstadt.ukp.integration.alignment.xml.model.XmlMeta;
import de.tudarmstadt.ukp.lmf.api.Uby;
import de.tudarmstadt.ukp.lmf.model.core.LexicalEntry;
import de.tudarmstadt.ukp.lmf.model.core.Lexicon;
import de.tudarmstadt.ukp.lmf.model.core.Sense;
import de.tudarmstadt.ukp.lmf.model.enums.ELabelTypeSemantics;
import de.tudarmstadt.ukp.lmf.model.enums.EPartOfSpeech;
import de.tudarmstadt.ukp.lmf.model.meta.SemanticLabel;
import de.tudarmstadt.ukp.lmf.transform.DBConfig;
/**
* Convert given alignment FrameNet to VerbNet alignment file to generic
* alignment xml Requires UBY lookup for VerbNet external reference
*/
public class VnFnSenseAlignmentXml extends SenseAlignmentXml {
private final Log logger = LogFactory.getLog(VnFnSenseAlignmentXml.class);
private final Uby uby;
private final String lexiconName = "VerbNet";
public int inputsize = 0;
// public ArrayList<String> notAdded;
public VnFnSenseAlignmentXml(String alignmentFile, String outFile,
DBConfig dbConfig) throws FileNotFoundException {
super(alignmentFile, outFile);
// notAdded = new ArrayList<String>();
uby = new Uby(dbConfig);
}
/**
* @param metadata
* @throws IOException
*/
@Override
public void toAlignmentXml(XmlMeta metadata) throws IOException {
Lexicon vn = uby.getLexiconByName(lexiconName);
TreeMap<String, Source> sourceMap = new TreeMap<>();
int noSource = 0;
int lines = 0;
int count = 0;
ArrayList<String> output = new ArrayList<String>();
try {
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new File(alignmentFile));
doc.getDocumentElement().normalize();
NodeList entries = doc.getElementsByTagName("vncls");
for (int i = 0; i < entries.getLength(); i++) {
Node alignment = entries.item(i);
NamedNodeMap atts = alignment.getAttributes();
String vnClass = atts.getNamedItem("class").getTextContent();
String vnLemma = atts.getNamedItem("vnmember").getTextContent();
String luId = atts.getNamedItem("fnlexent").getTextContent();
// there are mappings with empty (fn) target:
if (luId.equals("")) {
noSource++;
} else {
// add output here
output.add(luId + "\t" + vnLemma + "\t" + vnClass + "\n");
List<LexicalEntry> vnentries = uby.getLexicalEntries(
vnLemma, EPartOfSpeech.verb, vn);
if (vnentries.size() > 0) {
for (LexicalEntry e : vnentries) {
List<Sense> vnSenses = e.getSenses();
for (Sense vns : vnSenses) {
String senseId = vns.getId();
// filter by VN-class
List<SemanticLabel> labels = uby
.getSemanticLabelsbySenseIdbyType(
senseId,
ELabelTypeSemantics.verbnetClass
.toString());
for (SemanticLabel l : labels) {
String[] labelItems = l.getLabel().split(
"-");
StringBuffer parsedLabel = new StringBuffer();
parsedLabel.append(labelItems[1]);
for (int ji = 2; ji < labelItems.length; ji++) {
parsedLabel
.append("-" + labelItems[ji]);
}
if (parsedLabel.toString().equals(vnClass)) {
// get sourceMa
Source source = null;
if (sourceMap.containsKey(luId)) {
source = sourceMap.get(luId);
} else {
source = new Source();
source.ref = luId;
}
Target target = new Target();
target.ref = vns
.getMonolingualExternalRefs()
.iterator().next()
.getExternalReference();
target.decision = new Decision();
target.decision.value = true;
target.decision.confidence = DEFAULTCONFIDENCE;
// add target to source
if (source.targets.size() > 0) {
source.targets.add(target);
} else {
source.targets.add(target);
}
count++;
sourceMap.put(source.ref, source);
}
}
}
}
}
}
lines++;
}
} catch (IOException | ParserConfigurationException | SAXException e) {
throw new IOException(e);
}
logString.append("Converted " + alignmentFile + ", statistics:" + LF);
logString.append("\tInput Lines: " + lines +LF);
logString.append("\tOutput: " + output.size()+LF);
logString.append("\tNo alignment target: " + noSource + LF);
logString.append("\tControl: output + no alignment = input lines: "
+ (output.size() + noSource) + LF);
logString.append("\tNumber of alignment pairs in output:" + count);
logger.info(logString.toString());
writer.writeMetaData(metadata);
Alignments alignments = new Alignments();
alignments.source = new LinkedList<>();
alignments.source.addAll(sourceMap.values());
writer.writeAlignments(alignments);
writer.close();
}
@Override
public XmlMeta getDefaultXmlMeta() {
XmlMeta metadata = new XmlMeta();
metadata.title = "VerbNet-FrameNet mapping from SemLink version ";
metadata.creator = "http://verbs.colorado.edu/semlink/";
metadata.date = "2015-03-13"; // download date
metadata.description = "Manual mapping of VerbNet class members to FrameNet Senses, the mapping is part of SemLink";
metadata.identifier = "VNFN32";
metadata.publisher = "University of Colorado";
metadata.rights = "VerbNet 3.0 (and 3.x) License";
metadata.version = "3.2";
ResourceXml targetResource = new ResourceXml();
targetResource.description = "VerbNet version 3.2";
// matches lexiconId
targetResource.id = "VN_Lexicon_0";
targetResource.language = "en";
// matches externalSystem
targetResource.identifiertype = "VerbNet_3.2_eng_sense";
metadata.targetResource = targetResource;
ResourceXml sourceResource = new ResourceXml();
sourceResource.description = "FrameNet version 1.x";
// matches lexiconId
sourceResource.id = "FN_Lexicon_0";
sourceResource.language = "en";
// matches externalSystem
sourceResource.identifiertype = "FrameNet_1.5_eng_lexicalUnit";
metadata.sourceResource = sourceResource;
Decisiontype type = new Decisiontype();
type.id = "SemLink_VNFN";
type.name = "SemLink VNFN";
type.type = Decisiontype.Decision.MANUAL;
List<Decisiontype> decisionTypes = new ArrayList<>();
decisionTypes.add(type);
metadata.decisiontypes = decisionTypes;
// no separate scores given => no scoretype information
return metadata;
}
public static void main(String[] args) throws Exception {
String UBY_HOME = System.getenv("UBY_HOME");
String alignmentFile = UBY_HOME + "SemLink/1.2.2c/vn-fn/VNC-FNF.s";
String outFile = UBY_HOME
+ "/target/verbNetFrameNetAlignment22c_newXml.xml";
DBConfig dbConfig = new DBConfig("localhost/uby_clarin_0_7_0w",
"com.mysql.jdbc.Driver", "mysql", "root", "pass", false);
VnFnSenseAlignmentXml al = new VnFnSenseAlignmentXml(alignmentFile,
outFile, dbConfig);
al.toAlignmentXml(al.getDefaultXmlMeta());
}
}