/**
*
*/
package org.ariadne_eu.utils.lucene.reindex;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Vector;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.apache.log4j.Logger;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.apache.xpath.XPathAPI;
import org.ariadne.config.PropertiesManager;
import org.ariadne_eu.metadata.insert.InsertMetadataFactory;
import org.ariadne_eu.metadata.insert.InsertMetadataImpl;
import org.ariadne_eu.metadata.insert.InsertMetadataLuceneImpl;
import org.ariadne_eu.utils.config.RepositoryConstants;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
/**
* @author gonzalo
*
*/
public class ReIndexFSImpl extends ReIndexImpl {
private static Logger log = Logger.getLogger(ReIndexFSImpl.class);
private String dirString;
private static Vector xpathQueries;
public ReIndexFSImpl() {
initialize();
}
void initialize() {
super.initialize();
try {
dirString = PropertiesManager.getInstance().getProperty(RepositoryConstants.getInstance().MD_SPIFS_DIR );
if (dirString == null)
log.error("initialize failed: no " + RepositoryConstants.getInstance().MD_SPIFS_DIR + " found");
File dir = new File(dirString);
if (!dir.isDirectory())
log.error("initialize failed: " + RepositoryConstants.getInstance().MD_SPIFS_DIR + " invalid directory");
xpathQueries = new Vector();
if (PropertiesManager.getInstance().getProperty(RepositoryConstants.getInstance().SR_XPATH_QRY_ID + ".1") == null)
xpathQueries.add("general/identifier/entry/text()");
else {
int i = 1;
while(PropertiesManager.getInstance().getProperty(RepositoryConstants.getInstance().SR_XPATH_QRY_ID + "." + i) != null) {
xpathQueries.add(PropertiesManager.getInstance().getProperty(RepositoryConstants.getInstance().SR_XPATH_QRY_ID + "." + i));
i++;
}
}
//TODO: check for valid lucene index
} catch (Throwable t) {
log.error("initialize: ", t);
}
}
public void reIndexMetadata(String outputDir) {
File mdFile;
File dir = new File(outputDir);
File[] files = dir.listFiles();
InsertMetadataImpl[] insertImpls = InsertMetadataFactory.getInsertImpl();
InsertMetadataLuceneImpl luceneImpl = null;
for (int i = 0; i < insertImpls.length; i++) {
InsertMetadataImpl insertImpl = insertImpls[i];
if (insertImpl instanceof InsertMetadataLuceneImpl)
luceneImpl = (InsertMetadataLuceneImpl) insertImpl;
}
if (luceneImpl == null)
return;
luceneImpl.createLuceneIndex();
String implementation = PropertiesManager.getInstance().getProperty(RepositoryConstants.getInstance().MD_INSERT_IMPLEMENTATION);
if (implementation != null) {
for (int i = 0; i < files.length; i++) {
mdFile = files[i];
if (mdFile.isDirectory()) {
indexFile(mdFile, luceneImpl, new String[]{mdFile.getName()});
} else {
indexFile(mdFile, luceneImpl, new String[]{"ARIADNE"});
}
}
}
}
public void reIndexMetadata() {
reIndexMetadata(dirString);
}
private static void indexFile (File mdFile, InsertMetadataLuceneImpl luceneImpl, String[] cName) {
String xml = null;
if (!mdFile.getName().equalsIgnoreCase(".DS_Store")) {
if (mdFile.isDirectory()) {
File[] collection = mdFile.listFiles();
for (int j = 0; j < collection.length; j++) {
if (collection[j].isDirectory()) {
List<String> allCnames = new ArrayList<String>(Arrays.asList(cName));
allCnames.add(collection[j].getName());
String[] newcName = allCnames.toArray(new String[1]);
System.out.println(allCnames);
indexFile(collection[j], luceneImpl, newcName);
} else {
indexFile(collection[j], luceneImpl, cName);
}
}
} else {
xml = readFile(mdFile, "UTF-8");
try {
Document doc = getDoc(xml);
String identifier = getIdentifier(doc);
StringWriter out = new StringWriter();
XMLSerializer serializer = new XMLSerializer(out, new OutputFormat(doc));
serializer.serialize((Element) doc.getFirstChild());
String lom = out.toString();
if (identifier != null)
luceneImpl.insertMetadata(identifier, lom, cName);
} catch (Exception e) {
log.error("indexFile: fileName=" + mdFile.getName(), e);
}
}
}
}
private static String getIdentifier (Document doc) {
String identifier = null;
for (int j = 0; j < xpathQueries.size() && identifier == null; j++) {
String xpathQuery = (String) xpathQueries.elementAt(j);
try {
identifier = XPathAPI.selectSingleNode(doc.getFirstChild(),xpathQuery).getNodeValue();
} catch (Exception e) {
log.debug("getIdentifier", e);
}
}
return identifier;
}
private static Document getDoc (String xml) {
Document doc = null;
StringReader stringReader = new StringReader(xml);
InputSource input = new InputSource(stringReader);
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
doc = factory.newDocumentBuilder().parse(input);
} catch (Exception e) {
log.error("getDoc:",e);
}
return doc;
}
public static String readFile(File file, String encoding){
String content = "";
LineIterator it;
try {
it = FileUtils.lineIterator(file, encoding);
while (it.hasNext()) {
String line = it.nextLine();
content = content + line + "\n";
}
} catch (IOException e) {
log.error("readFile: fileName=" + file.getName(),e);
return "";
}
LineIterator.closeQuietly(it);
return content;
}
}