package org.jbake.app;
import com.orientechnologies.orient.core.record.impl.ODocument;
import org.apache.commons.configuration.CompositeConfiguration;
import org.apache.commons.io.FilenameUtils;
import org.jbake.app.ConfigUtil.Keys;
import org.jbake.app.Crawler.Attributes.Status;
import org.jbake.model.DocumentAttributes;
import org.jbake.model.DocumentStatus;
import org.jbake.model.DocumentTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.Arrays;
import java.util.Date;
import java.util.Map;
import static java.io.File.separator;
/**
* Crawls a file system looking for content.
*
* @author Jonathan Bullock <a href="mailto:jonbullock@gmail.com">jonbullock@gmail.com</a>
*/
public class Crawler {
public interface Attributes {
/**
* Possible values of the {@link Attributes#STATUS} property
* @author ndx
*
*/
interface Status {
String PUBLISHED_DATE = "published-date";
String PUBLISHED = "published";
String DRAFT = "draft";
}
String DATE = "date";
String STATUS = "status";
String TYPE = "type";
String TITLE = "title";
String URI = "uri";
String FILE = "file";
String TAGS = "tags";
String TAG = "tag";
String ROOTPATH = "rootpath";
String ID = "id";
String NO_EXTENSION_URI = "noExtensionUri";
String ALLTAGS = "alltags";
String PUBLISHED_DATE = "published_date";
String BODY = "body";
}
private static final Logger LOGGER = LoggerFactory.getLogger(Crawler.class);
private CompositeConfiguration config;
private Parser parser;
private final ContentStore db;
private String contentPath;
/**
* Creates new instance of Crawler.
* @param db Database instance for content
* @param source Base directory where content directory is located
* @param config Project configuration
*/
public Crawler(ContentStore db, File source, CompositeConfiguration config) {
this.db = db;
this.config = config;
this.contentPath = FilenameUtils.concat(source.getAbsolutePath(), config.getString(ConfigUtil.Keys.CONTENT_FOLDER));
this.parser = new Parser(config, contentPath);
}
/**
* Crawl all files and folders looking for content.
*
* @param path Folder to start from
*/
public void crawl(File path) {
File[] contents = path.listFiles(FileUtil.getFileFilter());
if (contents != null) {
Arrays.sort(contents);
for (File sourceFile : contents) {
if (sourceFile.isFile()) {
StringBuilder sb = new StringBuilder();
sb.append("Processing [").append(sourceFile.getPath()).append("]... ");
String sha1 = buildHash(sourceFile);
String uri = buildURI(sourceFile);
boolean process = true;
DocumentStatus status = DocumentStatus.NEW;
for (String docType : DocumentTypes.getDocumentTypes()) {
status = findDocumentStatus(docType, uri, sha1);
switch (status) {
case UPDATED:
sb.append(" : modified ");
db.deleteContent(docType, uri);
break;
case IDENTICAL:
sb.append(" : same ");
process = false;
}
if (!process) {
break;
}
}
if (DocumentStatus.NEW == status) {
sb.append(" : new ");
}
if (process) { // new or updated
crawlSourceFile(sourceFile, sha1, uri);
}
LOGGER.info(sb.toString());
}
if (sourceFile.isDirectory()) {
crawl(sourceFile);
}
}
}
}
private String buildHash(final File sourceFile) {
String sha1;
try {
sha1 = FileUtil.sha1(sourceFile);
} catch (Exception e) {
e.printStackTrace();
sha1 = "";
}
return sha1;
}
private String buildURI(final File sourceFile) {
String uri = FileUtil.asPath(sourceFile.getPath()).replace(FileUtil.asPath( contentPath), "");
boolean noExtensionUri = config.getBoolean(Keys.URI_NO_EXTENSION);
String noExtensionUriPrefix = config.getString(Keys.URI_NO_EXTENSION_PREFIX);
if (noExtensionUri && noExtensionUriPrefix != null && noExtensionUriPrefix.length() > 0) {
// convert URI from xxx.html to xxx/index.html
if (uri.startsWith(noExtensionUriPrefix)) {
uri = "/" + FilenameUtils.getPath(uri) + FilenameUtils.getBaseName(uri) + "/index" + config.getString(Keys.OUTPUT_EXTENSION);
}
} else {
uri = uri.substring(0, uri.lastIndexOf(".")) + config.getString(Keys.OUTPUT_EXTENSION);
}
// strip off leading / to enable generating non-root based sites
if (uri.startsWith("/")) {
uri = uri.substring(1, uri.length());
}
return uri;
}
private void crawlSourceFile(final File sourceFile, final String sha1, final String uri) {
Map<String, Object> fileContents = parser.processFile(sourceFile);
if (fileContents != null) {
fileContents.put(Attributes.ROOTPATH, getPathToRoot(sourceFile));
fileContents.put(String.valueOf(DocumentAttributes.SHA1), sha1);
fileContents.put(String.valueOf(DocumentAttributes.RENDERED), false);
if (fileContents.get(Attributes.TAGS) != null) {
// store them as a String[]
String[] tags = (String[]) fileContents.get(Attributes.TAGS);
fileContents.put(Attributes.TAGS, tags);
}
fileContents.put(Attributes.FILE, sourceFile.getPath());
fileContents.put(String.valueOf(DocumentAttributes.SOURCE_URI), uri);
fileContents.put(Attributes.URI, uri);
String documentType = (String) fileContents.get(Attributes.TYPE);
if (fileContents.get(Attributes.STATUS).equals(Status.PUBLISHED_DATE)) {
if (fileContents.get(Attributes.DATE) != null && (fileContents.get(Attributes.DATE) instanceof Date)) {
if (new Date().after((Date) fileContents.get(Attributes.DATE))) {
fileContents.put(Attributes.STATUS, Status.PUBLISHED);
}
}
}
if (config.getBoolean(Keys.URI_NO_EXTENSION)) {
fileContents.put(Attributes.NO_EXTENSION_URI, uri.replace("/index.html", "/"));
}
ODocument doc = new ODocument(documentType);
doc.fields(fileContents);
boolean cached = fileContents.get(DocumentAttributes.CACHED) != null ? Boolean.valueOf((String)fileContents.get(DocumentAttributes.CACHED)):true;
doc.field(String.valueOf(DocumentAttributes.CACHED), cached);
doc.save();
} else {
LOGGER.warn("{} has an invalid header, it has been ignored!", sourceFile);
}
}
public String getPathToRoot(File sourceFile) {
File rootPath = new File(contentPath);
File parentPath = sourceFile.getParentFile();
int parentCount = 0;
while (!parentPath.equals(rootPath)) {
parentPath = parentPath.getParentFile();
parentCount++;
}
StringBuilder sb = new StringBuilder();
for (int i = 0; i < parentCount; i++) {
sb.append("../");
}
return sb.toString();
}
private DocumentStatus findDocumentStatus(String docType, String uri, String sha1) {
DocumentList match = db.getDocumentStatus(docType, uri);
if (!match.isEmpty()) {
Map entries = match.get(0);
String oldHash = (String) entries.get(String.valueOf(DocumentAttributes.SHA1));
if (!(oldHash.equals(sha1)) || Boolean.FALSE.equals(entries.get(String.valueOf(DocumentAttributes.RENDERED)))) {
return DocumentStatus.UPDATED;
} else {
return DocumentStatus.IDENTICAL;
}
} else {
return DocumentStatus.NEW;
}
}
}