package lia.tika;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.commons.digester.Digester;
import org.xml.sax.SAXException;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
// From chapter 7
public class DigesterXMLDocument {
private Digester dig;
private static Document doc;
public DigesterXMLDocument() {
dig = new Digester();
dig.setValidating(false);
dig.addObjectCreate("address-book", DigesterXMLDocument.class); // #1
dig.addObjectCreate("address-book/contact", Contact.class); // #2
dig.addSetProperties("address-book/contact", "type", "type"); // #3
dig.addCallMethod("address-book/contact/name", // #4
"setName", 0); // #4
dig.addCallMethod("address-book/contact/address",
"setAddress", 0);
dig.addCallMethod("address-book/contact/city",
"setCity", 0);
dig.addCallMethod("address-book/contact/province",
"setProvince", 0);
dig.addCallMethod("address-book/contact/postalcode",
"setPostalcode", 0);
dig.addCallMethod("address-book/contact/country",
"setCountry", 0);
dig.addCallMethod("address-book/contact/telephone",
"setTelephone", 0);
dig.addSetNext("address-book/contact", "populateDocument"); // #5
}
public synchronized Document getDocument(InputStream is)
throws DocumentHandlerException {
try {
dig.parse(is); // #6
}
catch (IOException e) {
throw new DocumentHandlerException(
"Cannot parse XML document", e);
}
catch (SAXException e) {
throw new DocumentHandlerException(
"Cannot parse XML document", e);
}
return doc;
}
public void populateDocument(Contact contact) { // #7
doc = new Document();
doc.add(new Field("type", contact.getType(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("name", contact.getName(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("address", contact.getAddress(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("city", contact.getCity(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("province", contact.getProvince(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("postalcode", contact.getPostalcode(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("country", contact.getCountry(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("telephone", contact.getTelephone(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
}
public static class Contact {
private String type;
private String name;
private String address;
private String city;
private String province;
private String postalcode;
private String country;
private String telephone;
public void setType(String newType) {
type = newType;
}
public String getType() {
return type;
}
public void setName(String newName) {
name = newName;
}
public String getName() {
return name;
}
public void setAddress(String newAddress) {
address = newAddress;
}
public String getAddress() {
return address;
}
public void setCity(String newCity) {
city = newCity;
}
public String getCity() {
return city;
}
public void setProvince(String newProvince) {
province = newProvince;
}
public String getProvince() {
return province;
}
public void setPostalcode(String newPostalcode) {
postalcode = newPostalcode;
}
public String getPostalcode() {
return postalcode;
}
public void setCountry(String newCountry) {
country = newCountry;
}
public String getCountry() {
return country;
}
public void setTelephone(String newTelephone) {
telephone = newTelephone;
}
public String getTelephone() {
return telephone;
}
}
public static void main(String[] args) throws Exception {
DigesterXMLDocument handler = new DigesterXMLDocument();
Document doc =
handler.getDocument(new FileInputStream(new File(args[0])));
System.out.println(doc);
}
}
/*
#1 Create DigesterXMLDocument
#2 Create Contact
#3 Set type attribute
#4 Set name property
#5 Call populateDocument
#6 Parse XML InputStream
#7 Create Lucene document
*/