package edu.unc.ils.mrc.hive.converter.nbii;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/*
* Problems to read repgen.xml. Not use this class for the moment!!!
*/
public class NBIIXMLReader extends DefaultHandler
{
private XMLReader xr;
private String currentElement;
private Concept concept;
private Thesaurus thesaurus;
private boolean ok;
public NBIIXMLReader() {
try {
this.xr = XMLReaderFactory.createXMLReader();
this.xr.setContentHandler(this);
this.xr.setErrorHandler(this);
} catch (SAXException e) {
System.err.println("Problem with XMLReader inicialization");
e.printStackTrace();
}
this.thesaurus = new SKOSThesaurus();
this.ok = false;
}
public Thesaurus readThesaurus(String file) {
FileReader fr;
try {
fr = new FileReader(file);
xr.parse(new InputSource(fr));
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return this.thesaurus;
}
public Thesaurus readThesaurus(String[] file) {
return null;
// TODO
}
@Override
public void startDocument() {
System.out.println("Starting XML document");
}
@Override
public void endDocument() {
System.out.println("Finishing XML document");
}
@Override
public void startElement(String uri, String name, String qName,
Attributes atts) {
this.currentElement = name;
if (this.currentElement.equals("DESCRIPTOR")) {
this.concept = new SKOSConcept("http://thesaurus.nbii.gov/");
this.ok = true;
}
}
@Override
public void endElement(String uri, String name, String qName) {
if (this.currentElement.equals("UPD") && this.ok) {
this.thesaurus.addConcept(this.concept);
this.ok = false;
}
}
@Override
public void characters(char[] ch, int start, int end) throws SAXException {
String s;
s = new String(ch, start, end);
s = s.trim();
if (currentElement.equals("DESCRIPTOR") && !s.equals("")) {
this.concept.setPrefLabel(s);
this.concept.setUri(this.concept.getUri() + s);
if (this.concept.getUri().contains(" ")) {
this.concept.setUri(this.concept.getUri().replaceAll(" ", "-"));
}
}
if (currentElement.equals("BT") && !s.equals("")) {
this.concept.setBroader(s);
}
if (currentElement.equals("UF") && !s.equals("")) {
this.concept.setAltLabel(s);
}
if (currentElement.equals("NT") && !s.equals("")) {
this.concept.setNarrower(s);
}
if ((currentElement.equals("SN") || currentElement.equals("SC"))
&& !s.equals("")) {
this.concept.setScopeNote(s);
}
if (currentElement.equals("RT") && !s.equals("")) {
this.concept.setRelated(s);
}
}
public static void main(String[] args) throws FileNotFoundException,
IOException, SAXException {
NBIIXMLReader lector = new NBIIXMLReader();
// lector.leer("/home/jose/Desktop/qual2009.xml");
lector.readThesaurus("/usr/local/hive/sources/nbii/repgen.xml");
System.out.println("Thesaurus Size: " + lector.thesaurus.getSize());
lector.thesaurus.printThesaurus("/tmp/nbii.rdf");
}
}