/**
* Copyright (c) Codice Foundation
* <p/>
* This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License as published by the Free Software Foundation, either version 3 of the
* License, or any later version.
* <p/>
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details. A copy of the GNU Lesser General Public License
* is distributed along with this program and can be found at
* <http://www.gnu.org/licenses/lgpl.html>.
*/
package ddf.catalog.source.solr;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.solr.client.solrj.SolrServer;
import org.codice.solr.factory.ConfigurationFileProxy;
import org.codice.solr.factory.ConfigurationStore;
import org.codice.solr.factory.SolrServerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import ddf.catalog.data.Metacard;
import ddf.catalog.data.impl.MetacardImpl;
import ddf.catalog.filter.proxy.adapter.GeotoolsFilterAdapterImpl;
import ddf.catalog.operation.impl.CreateRequestImpl;
import ddf.catalog.source.IngestException;
public class ReuterSolrImport implements Runnable {
private static final Logger LOGGER = LoggerFactory.getLogger(ReuterSolrImport.class);
private static final String UNABLE_TO_READ_DIR_EXCEPTION_MSG = "unable to read directory";
private SolrServer solrServer;
private SolrCatalogProvider solrProvider;
private File[] arrayOfFile;
public ReuterSolrImport(File[] arrayOfFile) {
this.arrayOfFile = arrayOfFile;
try {
this.solrServer = SolrServerFactory
.getEmbeddedSolrServer("solrconfigSoft.xml", "schema.xml",
new ConfigurationFileProxy(ConfigurationStore.getInstance()));
this.solrProvider = new SolrCatalogProvider(this.solrServer,
new GeotoolsFilterAdapterImpl(), new SolrFilterDelegateFactoryImpl());
} catch (Exception localException) {
throw new RuntimeException("unable to connect to solr server: ", localException);
}
}
public static void main2(String[] paramArrayOfString) {
long start = System.currentTimeMillis();
for (int i = 0; i < 1; i++) {
LOGGER.info(Integer.toString(i) + " start");
main2(paramArrayOfString);
LOGGER.info(Integer.toString(i) + " done");
}
long elapsedTimeMillis = System.currentTimeMillis() - start;
// Get elapsed time in seconds
float elapsedTimeSec = elapsedTimeMillis / 1000F;
LOGGER.info(Float.toString(elapsedTimeSec) + " seconds");
LOGGER.info(Float.toString(elapsedTimeSec / 60F) + " minutes");
}
/**
* @param paramArrayOfString
*/
public static void main(String[] paramArrayOfString) {
String str = "Usage: java -jar reutersparser.jar <datadir>";
File localFile = null;
try {
localFile = new File(paramArrayOfString[0]);
if ((!localFile.exists()) || (!localFile.isDirectory())) {
LOGGER.warn("Second argument needs to be an existing directory!");
LOGGER.warn(str);
}
} catch (Exception localException2) {
LOGGER.warn("Second argument needs to be an existing directory!");
LOGGER.warn(str);
}
if ((localFile != null) && (localFile.exists()) && (localFile.isDirectory())) {
File[] allFiles = null;
try {
allFiles = readDirectory(localFile);
} catch (XPathExpressionException e) {
LOGGER.error(UNABLE_TO_READ_DIR_EXCEPTION_MSG, e);
} catch (IOException e) {
LOGGER.error(UNABLE_TO_READ_DIR_EXCEPTION_MSG, e);
} catch (ParserConfigurationException e) {
LOGGER.error(UNABLE_TO_READ_DIR_EXCEPTION_MSG, e);
} catch (SAXException e) {
LOGGER.error(UNABLE_TO_READ_DIR_EXCEPTION_MSG, e);
} catch (ParseException e) {
LOGGER.error(UNABLE_TO_READ_DIR_EXCEPTION_MSG, e);
}
// int threadCount = 1;
// Thread[] threads = new Thread[threadCount];
//
// for (int i = 0; i < threadCount; i++) {
// int from = i * count;
// int to = from + count;
// File[] threadFiles = Arrays.copyOfRange(allFiles, from, to);
// threads[i] = new Thread(new ddf.catalog.source.solr.ReuterSolrImport(threadFiles));
// threads[i].start();
// }
//
// if (allFiles.length % threadCount > 0) {
// int remainder = allFiles.length % threadCount;
// int from = allFiles.length - remainder;
// int to = allFiles.length;
// File[] threadFiles = Arrays.copyOfRange(allFiles, from, to);
// threads[threads.length - 1] = new Thread(new ddf.catalog.source.solr.ReuterSolrImport(threadFiles));
// threads[threads.length - 1].start();
// }
//
ReuterSolrImport importer = new ReuterSolrImport(allFiles);
LOGGER.info("Starting ingest.");
long start = System.currentTimeMillis();
importer.ingest();
// for (int i = 0; i < threads.length; i++) {
// try {
// threads[i].join();
// } catch (InterruptedException e) {
//
// }
// }
long elapsedTimeMillis = System.currentTimeMillis() - start;
// Get elapsed time in seconds
float elapsedTimeSec = elapsedTimeMillis / 1000F;
LOGGER.info(Float.toString(elapsedTimeSec) + " seconds");
LOGGER.info("records/sec = " + 21578F / elapsedTimeSec);
LOGGER.info("Done!");
}
}
public static File[] readDirectory(File paramFile)
throws XPathExpressionException, IOException, ParserConfigurationException,
SAXException, ParseException {
File[] allFiles = paramFile.listFiles(new FileFilter() {
public boolean accept(File paramFile) {
return paramFile.getName().contains(".dat");
}
});
if (allFiles.length == 0) {
throw new RuntimeException("Directory doesn't contain sgml files!");
}
Arrays.sort(allFiles);
return allFiles;
}
private Metacard readFile(File localFile) {
MetacardImpl mc = null;
try {
FileInputStream fin = new FileInputStream(localFile);
ObjectInputStream ois = new ObjectInputStream(fin);
mc = (MetacardImpl) ois.readObject();
ois.close();
if (mc.getLocation() != null && mc.getLocation().length() != 0) {
// solrProvider.create(new CreateRequestImpl(mc));
return mc;
} else {
return null;
}
} catch (Exception e) {
LOGGER.error("Unable to read file", e);
}
return null;
}
public void run() {
List<Metacard> metacards = new ArrayList<Metacard>();
for (int i = 0; i < arrayOfFile.length; i++) {
File localFile = arrayOfFile[i];
Metacard mc = readFile(localFile);
if (mc != null) {
metacards.add(mc);
}
}
try {
solrProvider.create(new CreateRequestImpl(metacards));
} catch (IngestException e) {
LOGGER.error("Unexpected IngestException", e);
}
solrServer.shutdown();
}
public void ingest() {
List<Metacard> metacards = new ArrayList<Metacard>();
for (int i = 0; i < arrayOfFile.length; i++) {
File localFile = arrayOfFile[i];
Metacard mc = readFile(localFile);
if (mc != null) {
metacards.add(mc);
}
}
try {
solrProvider.create(new CreateRequestImpl(metacards));
} catch (IngestException e) {
LOGGER.error("Unexpected IngestException", e);
}
solrServer.shutdown();
}
}