/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2010-2015 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see <http://www.gnu.org/licenses/>.
**/
package com.jaeksoft.searchlib.crawler.database;
import java.io.IOException;
import java.net.URISyntaxException;
import java.security.NoSuchAlgorithmException;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import com.jaeksoft.pojodbc.Query;
import com.jaeksoft.pojodbc.Transaction;
import com.jaeksoft.pojodbc.connection.JDBCConnection;
import com.jaeksoft.searchlib.Client;
import com.jaeksoft.searchlib.Logging;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.LanguageEnum;
import com.jaeksoft.searchlib.crawler.FieldMapContext;
import com.jaeksoft.searchlib.crawler.common.process.CrawlStatus;
import com.jaeksoft.searchlib.function.expression.SyntaxError;
import com.jaeksoft.searchlib.index.IndexDocument;
import com.jaeksoft.searchlib.query.ParseException;
import com.jaeksoft.searchlib.util.DatabaseUtils;
import com.jaeksoft.searchlib.util.InfoCallback;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.util.Variables;
public class DatabaseCrawlSqlThread extends DatabaseCrawlThread {
private final ReadWriteLock rwl = new ReadWriteLock();
private final DatabaseCrawlSql databaseCrawl;
public DatabaseCrawlSqlThread(Client client, DatabaseCrawlMaster crawlMaster, DatabaseCrawlSql databaseCrawl,
Variables variables, InfoCallback infoCallback) {
super(client, crawlMaster, databaseCrawl, infoCallback);
this.databaseCrawl = (DatabaseCrawlSql) databaseCrawl.duplicate();
this.databaseCrawl.applyVariables(variables);
}
private boolean index(Transaction transaction, List<IndexDocument> indexDocumentList, int limit,
List<String> pkList) throws NoSuchAlgorithmException, IOException, URISyntaxException, SearchLibException,
InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException,
InterruptedException {
int i = indexDocumentList.size();
if (i == 0 || i < limit)
return false;
setStatus(CrawlStatus.INDEXATION);
client.updateDocuments(indexDocumentList);
rwl.w.lock();
try {
pendingIndexDocumentCount -= i;
updatedIndexDocumentCount += i;
} finally {
rwl.w.unlock();
}
DatabaseUtils.update(transaction, pkList, null, databaseCrawl.getSqlUpdateMode(), databaseCrawl.getSqlUpdate());
pkList.clear();
indexDocumentList.clear();
if (infoCallback != null)
infoCallback.setInfo(updatedIndexDocumentCount + " document(s) indexed");
sleepMs(databaseCrawl.getMsSleep());
return true;
}
private boolean delete(Transaction transaction, List<String> deleteDocumentList, int limit)
throws NoSuchAlgorithmException, IOException, URISyntaxException, SearchLibException,
InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException, InterruptedException {
int i = deleteDocumentList.size();
if (i == 0 || i < limit)
return false;
setStatus(CrawlStatus.DELETION);
client.deleteDocuments(client.getSchema().getUniqueField(), deleteDocumentList);
rwl.w.lock();
try {
pendingDeleteDocumentCount -= i;
updatedDeleteDocumentCount += i;
} finally {
rwl.w.unlock();
}
DatabaseUtils.update(transaction, deleteDocumentList, null, databaseCrawl.getSqlUpdateMode(),
databaseCrawl.getSqlUpdate());
deleteDocumentList.clear();
if (infoCallback != null)
infoCallback.setInfo(updatedDeleteDocumentCount + " document(s) deleted");
sleepMs(databaseCrawl.getMsSleep());
return true;
}
final private void runner_update(Transaction transaction, ResultSet resultSet, TreeSet<String> columns)
throws NoSuchAlgorithmException, SQLException, IOException, URISyntaxException, SearchLibException,
InstantiationException, IllegalAccessException, ClassNotFoundException, ParseException, SyntaxError,
InterruptedException {
String dbPrimaryKey = databaseCrawl.getPrimaryKey();
DatabaseFieldMap databaseFieldMap = databaseCrawl.getFieldMap();
int bufferSize = databaseCrawl.getBufferSize();
IndexDocument indexDocument = null;
IndexDocument lastFieldContent = null;
boolean merge = false;
String lastPrimaryKey = null;
List<IndexDocument> indexDocumentList = new ArrayList<IndexDocument>(0);
List<String> pkList = new ArrayList<String>(0);
FieldMapContext context = new FieldMapContext(client, databaseCrawl.getLang());
Set<String> filePathSet = new TreeSet<String>();
int faultTolerancy = 10;
while (!isAborted()) {
try {
if (!resultSet.next())
break;
faultTolerancy = 10;
} catch (SQLException e) {
if (faultTolerancy <= 0)
throw e;
Logging.error(e.getMessage() + " Vendor Error Number: " + e.getErrorCode() + " Counters: "
+ this.getCountInfo(), e);
faultTolerancy--;
continue;
}
if (dbPrimaryKey != null && dbPrimaryKey.length() == 0)
dbPrimaryKey = null;
if (dbPrimaryKey != null) {
merge = false;
String pKey = resultSet.getString(dbPrimaryKey);
if (pKey != null && lastPrimaryKey != null)
if (pKey.equals(lastPrimaryKey))
merge = true;
lastPrimaryKey = pKey;
}
if (!merge) {
if (index(transaction, indexDocumentList, bufferSize, pkList))
setStatus(CrawlStatus.CRAWL);
indexDocument = new IndexDocument(context.lang);
indexDocumentList.add(indexDocument);
filePathSet.clear();
pendingIndexDocumentCount++;
pkList.add(lastPrimaryKey);
}
LanguageEnum lang = databaseCrawl.getLang();
IndexDocument newFieldContents = new IndexDocument(lang);
databaseFieldMap.mapResultSet(context, resultSet, columns, newFieldContents, filePathSet);
if (merge && lastFieldContent != null) {
indexDocument.addIfNotAlreadyHere(newFieldContents);
} else
indexDocument.add(newFieldContents);
lastFieldContent = newFieldContents;
}
index(transaction, indexDocumentList, 0, pkList);
}
final private void runner_delete(Transaction transaction, ResultSet resultSet, TreeSet<String> columns)
throws NoSuchAlgorithmException, SQLException, IOException, URISyntaxException, SearchLibException,
InstantiationException, IllegalAccessException, ClassNotFoundException, InterruptedException {
List<String> deleteKeyList = new ArrayList<String>(0);
String uniqueKeyDeleteField = databaseCrawl.getUniqueKeyDeleteField();
int bf = databaseCrawl.getBufferSize();
while (resultSet.next() && !isAborted()) {
if (delete(transaction, deleteKeyList, bf))
setStatus(CrawlStatus.CRAWL);
String uKey = resultSet.getString(uniqueKeyDeleteField);
if (uKey != null) {
deleteKeyList.add(uKey);
pendingDeleteDocumentCount++;
}
}
delete(transaction, deleteKeyList, 0);
}
@Override
public void runner() throws Exception {
setStatus(CrawlStatus.STARTING);
JDBCConnection connectionManager = databaseCrawl.getNewJdbcConnection();
String sqlUpdate = databaseCrawl.getSqlUpdate();
if (sqlUpdate != null && sqlUpdate.length() == 0)
sqlUpdate = null;
Transaction transaction = null;
try {
transaction = databaseCrawl.getNewTransaction(connectionManager);
Query query = transaction.prepare(databaseCrawl.getSqlSelect());
query.getStatement().setFetchSize(databaseCrawl.getFetchSize());
ResultSet resultSet = query.getResultSet();
setStatus(CrawlStatus.CRAWL);
// Store the list of columns in a treeset
ResultSetMetaData metaData = resultSet.getMetaData();
TreeSet<String> columns = new TreeSet<String>();
int columnCount = metaData.getColumnCount();
for (int i = 1; i <= columnCount; i++)
columns.add(metaData.getColumnLabel(i));
String ukDeleteField = databaseCrawl.getUniqueKeyDeleteField();
if (ukDeleteField != null && ukDeleteField.length() == 0)
ukDeleteField = null;
if (ukDeleteField != null)
runner_delete(transaction, resultSet, columns);
else
runner_update(transaction, resultSet, columns);
if (updatedIndexDocumentCount > 0 || updatedDeleteDocumentCount > 0) {
transaction.commit();
client.reload();
}
} finally {
if (transaction != null)
transaction.close();
}
}
}