/**
* Copyright 1999-2009 The Pegadi Team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.pegadi.server.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.pegadi.index.IndexJob;
import org.pegadi.index.JobContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import javax.sql.DataSource;
import java.io.IOException;
import java.io.StringReader;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class IndexAllArticlesJob implements IndexJob {
private DataSource dataSource;
private Logger log = LoggerFactory.getLogger(getClass());
public void executeJob(JobContext jobContext) {
int count = 0;
Connection c = null;
try {
XMLReader reader = XMLReaderFactory.createXMLReader();
final StringBuffer text = new StringBuffer();
ContentHandler handler = new DefaultHandler() {
boolean isInText = false;
public void startElement(String string, String string1, String string2, Attributes attributes) throws SAXException {
if(string1.equals("text")) {
isInText = true;
}
}
public void characters(char[] chars, int i, int i1) throws SAXException {
if(isInText) {
text.append(chars, i , i1);
}
}
public void endElement(String string, String string1, String string2) throws SAXException {
if(string1.equals("text")) {
isInText = false;
}
}
};
reader.setContentHandler(handler);
c = dataSource.getConnection();
PreparedStatement p = c.prepareStatement("SELECT id, text from article");
ResultSet rs = p.executeQuery();
while(rs.next()) {
String id = rs.getString(1);
String xml = rs.getString(2);
text.setLength(0);
try {
if(xml != null && xml.startsWith("<?")) {
reader.parse(new InputSource(new StringReader(xml)));
}
} catch (IOException e) {
log.error("parsing of article " + id +" failed");
} catch (SAXException e) {
log.error("parsing of article " + id +" failed");
}
Document d = new Document();
d.add(Field.Keyword("id", id));
d.add(Field.Text("content", new StringReader(text.toString())));
try {
jobContext.getIndexWriter().addDocument(d);
} catch (IOException e) {
log.error("Failed adding article " + id +" to index");
}
if(count++ % 100 == 0) {
log.info("Indexed " + count +" articles");
}
}
} catch (SQLException e) {
log.error("Sql error", e);
} catch (SAXException e) {
log.error("Parse error", e);
} finally {
if(c != null) {
try {
c.close();
} catch (SQLException e) {
log.error("sql error", e);
}
}
}
}
public boolean isRecreateJob() {
return true;
}
public void setDataSource(DataSource dataSource) {
this.dataSource = dataSource;
}
}