/*******************************************************************************
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.uby.integration.alignment.xml.transform;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import javax.xml.transform.TransformerException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.DocumentException;
import org.xml.sax.SAXException;
import de.tudarmstadt.ukp.integration.alignment.xml.AlignmentXmlReader;
import de.tudarmstadt.ukp.integration.alignment.xml.model.Source;
import de.tudarmstadt.ukp.integration.alignment.xml.model.XmlMeta;
import de.tudarmstadt.ukp.lmf.api.Uby;
import de.tudarmstadt.ukp.lmf.model.core.Lexicon;
import de.tudarmstadt.ukp.lmf.model.core.Sense;
import de.tudarmstadt.ukp.lmf.model.meta.MetaData;
import de.tudarmstadt.ukp.lmf.transform.DBConfig;
import de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer;
/**
* Create uby lexical resource containing sense axes directly from generic
* alignment xml file Replaces SenseAlignment and children of SenseAlignment
*/
public abstract class AlignmentGenericXml {
// enum not possible: match any externalReference String in UBY
public static final String UBY_SENSE_ID = "UBY_SENSE_ID";
public static final String UBY_SYNSET_ID = "UBY_SYNSET_ID";
public static final String UBY_SEMPRED_ID = "UBY_SEMANTIC_PREDICATE_ID";
public static final String UBY_SEMPRED_LABEL = "UBY_SEMANTIC_PREDICATE_LABEL";
public static final String UBY_SEMARG_ID = "UBY_SEMANTIC_ARGUMENT_ID";
public static final String UBY_SEMARG_ROLE = "UBY_SEMANTIC_ARGUMENT_ROLE";
public static final Double DEFAULTCONFSCORE = 1.0;
public StringBuilder logString;
public int nullAlignment = 0;
protected static String UBY_HOME = System.getenv("UBY_HOME");
protected static String LF = System.getProperty("line.separator");
protected final static Log logger = LogFactory
.getLog(AlignmentGenericXml.class);
protected File alignment;
protected LinkedList<MetaData> lmfMetaData;
protected Uby uby;
protected List<Source> alignments;
protected XmlMeta metadata;
public AlignmentGenericXml(String sourceUrl, String dbDriver,
String dbVendor, String alignmentFile, String user, String pass) {
this.alignment = new File(alignmentFile);
lmfMetaData = new LinkedList<>();
logString = new StringBuilder();
if (!alignment.exists() && !alignment.isFile()) {
logger.warn("Alignment file: " + alignmentFile + " doesn't exist! ");
System.exit(1);
}
DBConfig dbConfig = new DBConfig(sourceUrl, dbDriver, dbVendor, user,
pass, false);
try {
uby = new Uby(dbConfig);
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
try {
readAlignmentFile(alignment);
} catch (IOException e) {
e.printStackTrace();
}
}
public AlignmentGenericXml(DBConfig dbconf, String alignmentFile) {
this.alignment = new File(alignmentFile);
lmfMetaData = new LinkedList<>();
logString = new StringBuilder();
if (!alignment.exists() && !alignment.isFile()) {
logger.warn("Alignment file: " + alignmentFile + " doesn't exist! ");
System.exit(1);
}
DBConfig dbConfig = dbconf;
try {
uby = new Uby(dbConfig);
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
try {
readAlignmentFile(alignment);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Convert alignments from generic xml format to UBY
*/
public abstract void getAlignment(String idPrefix) throws ParseException;
/**
* Write alignments to UBY LMF xml
*/
public abstract void toLMF(String idPrefix, String dtdVersion,
String outfile) throws IOException, TransformerException,
SAXException;
/**
* Get list senses for given lexicon, id and type of id
*
* @param sourceType
* @param sourceID
* @param sourceLexicon
* @return a list of {@link Sense} objects
*/
protected List<Sense> getSenses(String sourceType, String sourceID,
Lexicon sourceLexicon) {
List<Sense> senses = new ArrayList<>();
if (sourceType.equals(UBY_SENSE_ID)) {
senses.add(uby.getSenseById(sourceID));
} else if (sourceType.equals(UBY_SYNSET_ID)) {
senses = uby.getSynsetById(sourceID).getSenses();
} else {
senses = uby.getSensesByOriginalReference(sourceType, sourceID,
sourceLexicon);
}
if (senses.size() == 0) {
}
return senses;
}
/**
* Read UBY LMF XML to database
*
* @param dbConfig
* @param xmlSource
* @param idPrefix
* @throws DocumentException
* @throws FileNotFoundException
* @throws IllegalArgumentException
*/
protected static void convertToDB(DBConfig dbConfig, File xmlSource,
String fullPrefix) throws DocumentException, FileNotFoundException,
IllegalArgumentException {
XMLToDBTransformer xml2DB = new XMLToDBTransformer(dbConfig);
xml2DB.transform(xmlSource, fullPrefix);
}
/**
* Read file containing sense alignment or predicate argument alignment
*
* @param alignmentFile
* - file in generic alignment format
* @throws IOException
*/
protected void readAlignmentFile(File alignmentFile) throws IOException {
AlignmentXmlReader reader = null;
try {
reader = new AlignmentXmlReader(alignmentFile);
metadata = reader.readMetaData();
alignments = reader.readAlignments().source;
logger.info("Read so many alignments : " + alignments.size());
} catch (IOException e) {
throw new IOException(e);
} finally {
reader.close();
}
}
}