package tbx2rdf;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.vocabulary.RDF;
import tbx2rdf.datasets.lexvo.LexvoManager;
import tbx2rdf.types.MartifHeader;
import tbx2rdf.types.Term;
import tbx2rdf.vocab.ONTOLEX;
import org.apache.crimson.tree.XmlDocumentBuilder;
/**
* This class makes the XML parsing of the TBX using the SAX lexiconsModel.
* It only captures the lexicons and the Martif Header
*/
public class SAXHandler extends DefaultHandler {
///Mappings
private Mappings mappings;
///
private MartifHeader header;
///Internal use
XmlDocumentBuilder consumer;
XMLReader producer;
//Languages present in the file
Set<String> languages = new HashSet();
//
private String martiftype="";
/**
* Initializes the handler
* @param _mappings The mappings to be given
*/
public SAXHandler(Mappings _mappings) {
super();
mappings = _mappings;
}
/**
* Retrieves the MartifHeader in the TBX file
*/
public MartifHeader getMartifHeader() {
return header;
}
public String getMartifType()
{
return martiftype;
}
Model lexiconsModel = ModelFactory.createDefaultModel();
/**
* Gets the Jena model with the Lexicons.
*/
public Model getLexiconsModel()
{
return lexiconsModel;
}
/**
* Obtains a map of lexicons present in the
*/
public HashMap<String, Resource> getLexicons()
{
lexiconsModel = ModelFactory.createDefaultModel();
final HashMap<String, Resource> lexicons = new HashMap<>();
for(String language : languages)
{
if (lexicons.containsKey(language))
continue;
final Resource lexicon = lexiconsModel.createResource(Main.DATA_NAMESPACE + language);
Resource rlan=LexvoManager.mgr.getLexvoFromISO2(language);
lexicon.addProperty(ONTOLEX.language, rlan); //before it was the mere constant "language"
lexicon.addProperty(RDF.type, ONTOLEX.Lexicon);
lexicons.put(language, lexicon);
}
return lexicons;
}
/**
* Retrieves the list of terms extracted from the TBX file
*/
public List<Term> getTerms() {
return null;
}
//Set of all the languages present in the file
@Override
public void startElement(String uri, String localName, String qName, Attributes attrs)
throws SAXException {
if (qName.equalsIgnoreCase("martif")) {
int index=attrs.getIndex("type");
if (index!=-1)
martiftype=attrs.getValue(index);
}
if (qName.equalsIgnoreCase("langSet")) {
int index=attrs.getIndex("xml:lang");
if (index!=-1)
languages.add(attrs.getValue(index));
}
if (qName.equalsIgnoreCase("martifHeader")) {
header = new MartifHeader();
}
/*
if (qName.equalsIgnoreCase("text"))
{
Element elem=null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
DOMImplementation impl = builder.getDOMImplementation();
// Create the document
Document myDoc = impl.createDocument(null, null, null);
elem = myDoc.createElementNS(uri, qName);
// Add each attribute.
for (int i = 0; i < attrs.getLength(); ++i) {
String ns_uri = attrs.getURI(i);
String qname = attrs.getQName(i);
String value = attrs.getValue(i);
Attr attr = myDoc.createAttributeNS(ns_uri, qname);
attr.setValue(value);
elem.setAttributeNodeNS(attr);
}
TBX2RDF_Converter converter = new TBX2RDF_Converter();
Collection<Term> terms=converter.processText(elem, mappings);
for(Term term : terms)
{
System.out.println(term);
}
} catch (Exception e) {
e.printStackTrace();
}
}
*/
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
/* if (qName.equalsIgnoreCase("titleStmt")) {
header.fileDesc.titleStmt = new TitleStmt(content);
}
/* if (qName.equalsIgnoreCase("martifHeader")) {
try {
XmlDocumentBuilder consumer;
XMLReader producer;
consumer = new XmlDocumentBuilder();
producer = XMLReaderFactory.createXMLReader();
producer.setContentHandler(consumer);
producer.setDTDHandler(consumer);
producer.setProperty("http://xml.org/sax/properties/lexical-handler", consumer);
producer.setProperty("http://xml.org/sax/properties/declaration-handler", consumer);
producer.parse(uri);
Document doc = consumer.getDocument();
} catch (Exception e) {
e.printStackTrace();
}
}*/
}
@Override
public void characters(char ch[], int start, int length) throws SAXException {
// content = String.copyValueOf(ch, start, length).trim();
}
}