/*
* Data Hub Service (DHuS) - For Space data distribution.
* Copyright (C) 2013,2014,2015 GAEL Systems
*
* This file is part of DHuS software sources.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.gael.dhus.datastore.scanner;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
import com.hp.hpl.jena.ontology.OntClass;
import com.hp.hpl.jena.ontology.OntProperty;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
import fr.gael.dhus.database.object.Collection;
import fr.gael.dhus.database.object.Product;
import fr.gael.dhus.database.object.User;
import fr.gael.dhus.service.FileScannerService;
import fr.gael.dhus.service.ProductService;
import fr.gael.drbx.cortex.DrbCortexItemClass;
import fr.gael.drbx.cortex.DrbCortexModel;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.olingo.odata2.api.exception.ODataException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
/**
* Provide the scanner instances.
*/
@Component ("scannerFactory")
public class ScannerFactory
{
private static final Logger LOGGER = LogManager.getLogger(ScannerFactory.class);
@Autowired
private ProductService productService;
@Autowired
private FileScannerService fs_service;
private ConcurrentHashMap<Long, Scanner> runningScanners =
new ConcurrentHashMap<> ();
private String[] itemClasses;
/**
* Retrieve the list of items that the scanner is able to retrieve. This
* support allow not to perform selective ingest according to the item
* classes.
*
* @return the list of supported items.
*/
public List<DrbCortexItemClass> getScannerSupport ()
{
if (itemClasses == null)
{
itemClasses = getDefaultCortexSupport ();
if ((itemClasses!=null) && LOGGER.isDebugEnabled())
{
LOGGER.debug("Supported classes:");
for (String cl:itemClasses)
{
LOGGER.debug(" - " + cl);
}
}
}
if (itemClasses == null)
throw new UnsupportedOperationException (
"Empty item list: no scanner support.");
List<DrbCortexItemClass> supported = new ArrayList<DrbCortexItemClass> ();
if (itemClasses != null)
{
for (String s : itemClasses)
{
try
{
supported.add (DrbCortexItemClass.getCortexItemClassByName (s));
}
catch (Exception e)
{
LOGGER.error ("Cannot add support for class " + s);
}
}
}
return supported;
}
/**
* Retrieve the dhus system supported items for file scanning processing.
* Is considered supported all classes having
* <code>http://www.gael.fr/dhus#metadataExtractor</code> property
* connection.
* @return the list of supported class names.
*/
public static synchronized String[] getDefaultCortexSupport ()
{
DrbCortexModel model;
try
{
model = DrbCortexModel.getDefaultModel ();
}
catch (IOException e)
{
throw new UnsupportedOperationException (
"Drb cortex not properly initialized.");
}
ExtendedIterator it=model.getCortexModel ().getOntModel ().listClasses ();
List<String>list = new ArrayList<String> ();
while (it.hasNext ())
{
OntClass cl = (OntClass)it.next ();
OntProperty metadata_extractor_p = cl.getOntModel().getOntProperty(
"http://www.gael.fr/dhus#support");
StmtIterator properties = cl.listProperties (metadata_extractor_p);
while (properties.hasNext ())
{
Statement stmt = properties.nextStatement ();
LOGGER.debug ("Scanner Support Added for " +
stmt.getSubject ().toString ());
list.add (stmt.getSubject ().toString ());
}
}
return list.toArray (new String[list.size ()]);
}
/**
* Retrieve scanner for given url
*/
public Scanner getScanner (String url)
{
return getScanner(url, null, null, null);
}
/**
* Retrieve the scanner according to the passed archive.
*
* @param archive used to define the scanner.
* @return the scanner able to scan passed archive, null if no scanner found.
*/
public Scanner getScanner (String url, String username, String password,
String pattern)
{
if (url == null) throw new NullPointerException ("URL is required.");
if ( (new File (url)).exists ())
{
FileScanner scan = new FileScanner (url, false);
scan.setUserPattern (pattern);
return scan;
}
if (url.startsWith ("file:"))
{
FileScanner scan = new FileScanner (url.split ("file:", 2)[1], false);
scan.setUserPattern (pattern);
return scan;
}
if (url.startsWith ("ftp"))
{
FtpScanner s = new FtpScanner (url, false, username, password);
s.setUserPattern (pattern);
return s;
}
if (url.startsWith ("http")) // http or https
{
ODataScanner scan = null;
try
{
scan = new ODataScanner (url, false, username, password);
}
catch (URISyntaxException | IOException | ODataException e)
{
throw new RuntimeException (e);
}
scan.setUserPattern (pattern);
return scan;
}
throw new UnsupportedOperationException ("Url not supported (\"" +
url + "\").");
}
/**
* Provides a scanner able to fully scan&upload passed URL.
*
* @param url
* @param username
* @param password
* @return
*/
public Scanner getUploadScanner (String url, final String username,
final String password, String pattern)
{
final Scanner scanner = getScanner (url, username, password, pattern);
scanner.setSupportedClasses (getScannerSupport ());
return scanner;
}
/**
* Process passed file scanner attached to a the passed user within
* a separate thread. If the requested scanner is already running (
* from schedule or UI), it will not restart.
*
* @param scan_id
* @param user
* @throws ScannerException when scanner cannot be started.
*/
public void processScan (final Long scan_id, final User user)
throws ScannerException
{
SimpleDateFormat sdf = new SimpleDateFormat (
"EEEE dd MMMM yyyy - HH:mm:ss", Locale.ENGLISH);
// Synchronize with runningScanner instance to avoid 2 simultaneous
// scanners executions.
// Running scanner hash table should contains the scanner, but during the
// transition between scanner the status settings and the scanner
// initialization, runningScanner[scan_id] could contains null to avoid
// the same scanner being executed twice.
synchronized (runningScanners)
{
if (runningScanners.containsKey (scan_id))
{
throw new ScannerException (
"Scanner #" + scan_id + " already running.");
}
runningScanners.put (scan_id, new UninitilizedScanner ());
}
fr.gael.dhus.database.object.FileScanner fs =
fs_service.getFileScanner (scan_id);
fs.setStatus (fr.gael.dhus.database.object.FileScanner.STATUS_RUNNING);
fs.setStatusMessage ("Started on " + sdf.format (new Date ()));
fs_service.updateFileScanner (fs);
// prepare scan
ScannerListener listener = new ScannerListener ();
Scanner scanner = getUploadScanner (fs.getUrl (), fs.getUsername (),
fs.getPassword (), fs.getPattern ());
scanner.getScanList ().addListener (listener);
Hook hook = new Hook (fs);
Runtime.getRuntime ().addShutdownHook (hook);
// perform scan
try
{
scanner.scan ();
}
catch (InterruptedException e)
{
fs.setStatus (fr.gael.dhus.database.object.FileScanner.STATUS_OK);
fs.setStatusMessage (
"Scanner stopped by user on " + sdf.format (new Date ()));
fs_service.updateFileScanner (fs);
LOGGER.warn ("Scanner stop by a user");
return;
}
// prepare ingestion
List<URL> waiting_product = listener.newlyProducts();
if (waiting_product.isEmpty())
{
runningScanners.remove(scan_id);
LOGGER.info("Scanner #{}: No products scanned.", scan_id);
return;
}
List<Collection> collections = fs_service.getScannerCollection(fs);
FileScannerWrapper wrapper = new FileScannerWrapper (fs)
{
@Override
protected synchronized void processingsDone (String end_message)
{
super.processingsDone (end_message);
runningScanners.remove (scan_id);
}
};
wrapper.setTotalProcessed (waiting_product.size ());
LOGGER.info("Scanner #{}: {} products scanned.", scan_id, wrapper.getTotalProcessed());
// perform ingestion
for (URL url : waiting_product)
{
try
{
Product p = productService.addProduct (url, user, url.toString ());
productService.processProduct (
p, user, collections, scanner, wrapper);
}
catch (RuntimeException e)
{
LOGGER.error("Unable to start ingestion.", e);
fs.setStatus(fr.gael.dhus.database.object.FileScanner.STATUS_ERROR);
fs.setStatusMessage (e.getMessage ());
fs_service.updateFileScanner (fs);
runningScanners.remove (scan_id);
}
}
}
public void stopScan (final Long scan_id)
throws ScannerException
{
Scanner scanner = null;
// Thread-safe retrieve the scanner and remove it from the list.
synchronized (runningScanners)
{
scanner = runningScanners.get (scan_id);
if (scanner == null)
{
LOGGER.warn ("Scanner already stopped.");
return;
}
if (scanner instanceof UninitilizedScanner)
{
LOGGER.warn ("Scanner not initialized (retry stop later).");
return;
}
runningScanners.remove(scan_id);
}
fr.gael.dhus.database.object.FileScanner fileScanner =
fs_service.getFileScanner (scan_id);
if (fileScanner != null)
{
// Just update the message
fileScanner.setStatusMessage (fileScanner.getStatusMessage () +
"<b>Interrupted</b>: waiting ongoing processings ends...<br>\n");
fs_service.updateFileScanner (fileScanner);
}
LOGGER.info ("Scanner stopped.");
scanner.stop ();
}
/**
* Shutdown hook used to manage Scanner message when user stops dhus
* while scanner is running.
*/
class Hook extends Thread
{
private fr.gael.dhus.database.object.FileScanner scanner;
public Hook (fr.gael.dhus.database.object.FileScanner scanner)
{
this.scanner = scanner;
}
public void run()
{
scanner.setStatus (
fr.gael.dhus.database.object.FileScanner.STATUS_ERROR);
scanner.setStatusMessage (
scanner.getStatusMessage () +
"Scanner interrupted because DHuS stopped.");
fs_service.updateFileScanner (scanner);
}
}
/**
* An internal scanner implementation to manage the scanner initialization
* transition.
*/
class UninitilizedScanner implements Scanner
{
@Override
public int scan() throws InterruptedException
{
return 0;
}
@Override
public void stop()
{
}
@Override
public boolean isStopped()
{
return false;
}
@Override
public AsynchronousLinkedList<URLExt> getScanList()
{
return null;
}
@Override
public void setSupportedClasses(List<DrbCortexItemClass> supported)
{
}
@Override
public void setForceNavigate(boolean force)
{
}
@Override
public boolean isForceNavigate()
{
return false;
}
@Override
public void setUserPattern(String pattern)
{
}
}
}