/** * Copyright 1999-2009 The Pegadi Team * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.pegadi.server.index; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.pegadi.index.IndexJob; import org.pegadi.index.JobContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; import javax.sql.DataSource; import java.io.IOException; import java.io.StringReader; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; public class IndexAllArticlesJob implements IndexJob { private DataSource dataSource; private Logger log = LoggerFactory.getLogger(getClass()); public void executeJob(JobContext jobContext) { int count = 0; Connection c = null; try { XMLReader reader = XMLReaderFactory.createXMLReader(); final StringBuffer text = new StringBuffer(); ContentHandler handler = new DefaultHandler() { boolean isInText = false; public void startElement(String string, String string1, String string2, Attributes attributes) throws SAXException { if(string1.equals("text")) { isInText = true; } } public void characters(char[] chars, int i, int i1) throws SAXException { if(isInText) { text.append(chars, i , i1); } } public void endElement(String string, String string1, String string2) throws SAXException { if(string1.equals("text")) { isInText = false; } } }; reader.setContentHandler(handler); c = dataSource.getConnection(); PreparedStatement p = c.prepareStatement("SELECT id, text from article"); ResultSet rs = p.executeQuery(); while(rs.next()) { String id = rs.getString(1); String xml = rs.getString(2); text.setLength(0); try { if(xml != null && xml.startsWith("<?")) { reader.parse(new InputSource(new StringReader(xml))); } } catch (IOException e) { log.error("parsing of article " + id +" failed"); } catch (SAXException e) { log.error("parsing of article " + id +" failed"); } Document d = new Document(); d.add(Field.Keyword("id", id)); d.add(Field.Text("content", new StringReader(text.toString()))); try { jobContext.getIndexWriter().addDocument(d); } catch (IOException e) { log.error("Failed adding article " + id +" to index"); } if(count++ % 100 == 0) { log.info("Indexed " + count +" articles"); } } } catch (SQLException e) { log.error("Sql error", e); } catch (SAXException e) { log.error("Parse error", e); } finally { if(c != null) { try { c.close(); } catch (SQLException e) { log.error("sql error", e); } } } } public boolean isRecreateJob() { return true; } public void setDataSource(DataSource dataSource) { this.dataSource = dataSource; } }