/*******************************************************************************
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.uby.integration.alignment.xml.transform;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import java.util.TreeMap;
import javax.xml.transform.TransformerException;
import org.dom4j.DocumentException;
import org.xml.sax.SAXException;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Decisiontype;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Source;
import de.tudarmstadt.ukp.integration.alignment.xml.model.SubSource;
import de.tudarmstadt.ukp.integration.alignment.xml.model.SubTarget;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Target;
import de.tudarmstadt.ukp.lmf.api.Uby;
import de.tudarmstadt.ukp.lmf.model.core.GlobalInformation;
import de.tudarmstadt.ukp.lmf.model.core.LexicalResource;
import de.tudarmstadt.ukp.lmf.model.core.Lexicon;
import de.tudarmstadt.ukp.lmf.model.meta.MetaData;
import de.tudarmstadt.ukp.lmf.model.multilingual.PredicateArgumentAxis;
import de.tudarmstadt.ukp.lmf.model.semantics.SemanticArgument;
import de.tudarmstadt.ukp.lmf.model.semantics.SemanticPredicate;
import de.tudarmstadt.ukp.lmf.transform.DBConfig;
import de.tudarmstadt.ukp.lmf.transform.LMFXmlWriter;
/**
* Create uby lexical resource containing predicate argument alignments from
* generic alignment xml file
*/
public class PredicateAlignmentGenericXml extends AlignmentGenericXml {
/* determine axis type */
public static String axisType = "uby_predicate_axis";
public static String subAxisType = "uby_argument_axis";
private final TreeMap<String, PredicateArgumentAxis> axisMap;
/*
* protected final static Log logger = LogFactory
* .getLog(PredicateAlignmentGenericXml.class);
*/
public PredicateAlignmentGenericXml(String sourceUrl, String dbDriver,
String dbVendor, String alignmentFile, String user, String pass) {
super(sourceUrl, dbDriver, dbVendor, alignmentFile, user, pass);
this.alignment = new File(alignmentFile);
axisMap = new TreeMap<>();
}
public PredicateAlignmentGenericXml(DBConfig dbconf, String alignmentFile) {
super(dbconf, alignmentFile);
this.alignment = new File(alignmentFile);
lmfMetaData = new LinkedList<>();
axisMap = new TreeMap<>();
logString = new StringBuilder();
if (!alignment.exists() && !alignment.isFile()) {
System.out.println("Alignment file: " + alignmentFile
+ " doesn't exist! ");
System.exit(1);
}
DBConfig dbConfig = dbconf;
try {
uby = new Uby(dbConfig);
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
try {
readAlignmentFile(alignment);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Convert generic alignment xml format to LMF
*
* @param idPrefix
* @throws ParseException
*/
@Override
public void getAlignment(String idPrefix) throws ParseException {
String subIdPrefix = idPrefix + "_arg_";
logger.info("looking up alignment");
// expect single decisiontype
Decisiontype decisiontype = metadata.decisiontypes.get(0);
String sourceType = metadata.sourceResource.identifiertype;
String destType = metadata.targetResource.identifiertype;
String subSourceType = metadata.subSource.identifiertype;
String subDestType = metadata.subTarget.identifiertype;
MetaData meta = new MetaData();
DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
Date d = formatter.parse(metadata.date);
meta.setCreationDate(d);
meta.setId(metadata.identifier);
meta.setVersion(metadata.version);
meta.setAutomatic(decisiontype.type == Decisiontype.Decision.AUTOMATIC);
meta.setCreationProcess(decisiontype.id);
meta.setCreationTool(metadata.description);
lmfMetaData.add(meta);
Lexicon sourceLexicon = uby.getLexiconById(metadata.sourceResource.id);
Lexicon destLexicon = uby.getLexiconById(metadata.targetResource.id);
int id = 0;
int subId = 0;
/* Lookup of alignments in UBY */
for (Source source : alignments) {
List<SemanticPredicate> sourcePredicates = getPredicates(
sourceType, source.ref, sourceLexicon);
for (Target target : source.targets) {
// only add "positive" alignments for now! - nonalignments are
// not modeled
if (target.decision.value == true) {
List<SemanticPredicate> destPredicates = getPredicates(
destType, target.ref, destLexicon);
for (SemanticPredicate sourcePred : sourcePredicates) { // should
// be
// source
// pred
for (SemanticPredicate destPred : destPredicates) {
if (destPred != null && sourcePred != null) {
// avoid duplicates
if (!axisMap.containsKey(sourcePred.getId()
+ "%%" + destPred.getId())) {
PredicateArgumentAxis axis = new PredicateArgumentAxis();
axis.setId(idPrefix + "_" + id);
// set confidence score if available
if (target.decision.confidence != null) {
axis.setConfidence(target.decision.confidence);
} else {
axis.setConfidence(DEFAULTCONFSCORE);
}
axis.setLexiconOne(sourceLexicon);
axis.setLexiconTwo(destLexicon);
axis.setMetaData(meta);
axis.setAxisType(axisType);
axis.setSemanticPredicateOne(sourcePred);
axis.setSemanticPredicateTwo(destPred);
// axis.setSenseAxisRelations(senseAxisRelations);
axisMap.put(sourcePred.getId() + "%%"
+ destPred.getId(), axis);
id++;
// add new predicate alignment => add the
// corresponding argument alignment
for (SubSource subSource : target.subsources) {
PredicateArgumentAxis argumentAxis = new PredicateArgumentAxis();
argumentAxis.setId(subIdPrefix + "_"
+ subId);
argumentAxis
.setLexiconOne(sourceLexicon);
argumentAxis.setLexiconTwo(destLexicon);
argumentAxis.setMetaData(meta); // same
// metadata
// as on
// predicate
// level
argumentAxis.setAxisType(subAxisType);
argumentAxis
.setSemanticPredicateOne(sourcePred);
argumentAxis
.setSemanticPredicateTwo(destPred);
List<SemanticArgument> sourceArgs = getArguments(
subSourceType, subSource.ref,
sourcePred);
for (SubTarget subTarget : subSource.subtargets) {
List<SemanticArgument> destArgs = getArguments(
subDestType, subTarget.ref,
destPred);
for (SemanticArgument sourceArg : sourceArgs) {
for (SemanticArgument destArg : destArgs) {
argumentAxis
.setSemanticArgumentOne(sourceArg);
argumentAxis
.setSemanticArgumentTwo(destArg);
axisMap.put(
sourceArg.getId()
+ "%%"
+ destArg
.getId(),
argumentAxis);
subId++;
}
}
}
}
} else {
System.err.println("catching duplicates "
+ sourcePred.getId() + "%%"
+ destPred.getId());
}
} else {
logString.append("No predlevel alignment for: "
+ source.ref + " " + target.ref);
logString.append(LF);
nullAlignment++;
System.err.println("Cannot align these guys!");
}
}
}
}
}
}
logString.append("So many input id pairs could not be aligned: "
+ nullAlignment);
logger.info(logString.toString());
}
/**
* Get {@link List} of {@link SemanticPredicate} for given lexicon, id and
* type of id
*
* @param type
* @param ref
* @param lexicon
* @return
*/
private List<SemanticPredicate> getPredicates(String type, String ref,
Lexicon lexicon) {
List<SemanticPredicate> predicates = new ArrayList<>();
if (type.equals(UBY_SEMPRED_ID)) {
predicates.add(uby.getSemanticPredicateById(ref));
} else {
predicates = uby.getSemanticPredicatesByLabelAndLexicon(ref,
lexicon);
}
if (predicates.size() == 0) {
logger.info("Could not find semantic predicate for " + type + " "
+ ref + " " + lexicon.getName());
}
return predicates;
}
/**
* Get {@link List} of {@link SemanticArgument} for given lexicon, id and
* type of id
*
* @param type
* @param ref
* @param predicate
* @return
*/
private List<SemanticArgument> getArguments(String type, String ref,
SemanticPredicate predicate) {
List<SemanticArgument> arguments = new ArrayList<>();
if (type.equals(UBY_SEMARG_ID)) {
arguments.add(uby.getSemanticArgumentById(ref));
} else {
arguments = uby.getSemanticArgumentsByLabelAndPredicate(ref,
predicate);
}
if (arguments.size() == 0) {
logger.info("Could not find semantic argument for " + type + " "
+ ref + " " + predicate.getId());
}
return arguments;
}
/**
* Write alignments to UBY LMF xml
*
* @param idPrefix
* @param dtdVersion
* @throws IOException
* @throws TransformerException
* @throws SAXException
*/
@Override
public void toLMF(String idPrefix, String dtdVersion, String outfile)
throws IOException, TransformerException, SAXException {
LMFXmlWriter xmlWriter = new LMFXmlWriter(outfile, UBY_HOME
+ "/resources/dtd/DTD_unifiedModel_" + dtdVersion + ".dtd");
LexicalResource lexicalResource = new LexicalResource();
List<MetaData> metaDataList = new ArrayList<MetaData>(
lmfMetaData.size());
int i = 0;
for (MetaData meta : lmfMetaData) {
meta.setId(idPrefix + "_Meta_" + i);
metaDataList.add(meta);
i++;
}
lexicalResource.setMetaData(metaDataList); // set metadata for
// lexicalresource!
LinkedList<PredicateArgumentAxis> predaxes = new LinkedList<>();
predaxes.addAll(axisMap.values());
lexicalResource.setPredicateArgumentAxes(predaxes);
lexicalResource.setDtdVersion(dtdVersion);
lexicalResource.setName("Uby_PredicateArgumentAlignments_" + idPrefix);
GlobalInformation globalInformation = new GlobalInformation();
globalInformation.setLabel("PredicateArgumentAlignments_" + idPrefix);
lexicalResource.setGlobalInformation(globalInformation);
xmlWriter.writeElement(lexicalResource);
xmlWriter.writeEndDocument();
}
/**
* Read UBY LMF XML to database
*
* @param dbConfig
* @param xmlSource
* @param idPrefix
* @throws DocumentException
* @throws FileNotFoundException
* @throws IllegalArgumentException
*/
public static void toDB(DBConfig dbConfig, File xmlSource, String idPrefix)
throws DocumentException, FileNotFoundException,
IllegalArgumentException {
convertToDB(dbConfig, xmlSource, "Uby_PredicateArgumentAlignments_"
+ idPrefix);
}
}