package com.cadrlife.devsearch.agent.indexing;
import com.cadrlife.devsearch.agent.UpdateScope;
import com.cadrlife.devsearch.agent.service.analysis.FileProcessor;
import com.cadrlife.devsearch.agent.service.analysis.JavaReferenceFinder;
import com.cadrlife.devsearch.domain.DevSearchDateFormat;
import com.cadrlife.devsearch.domain.IdUtil;
import com.cadrlife.devsearch.domain.Project;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.deletebyquery.DeleteByQueryRequestBuilder;
import org.elasticsearch.action.deletebyquery.DeleteByQueryResponse;
import org.elasticsearch.action.deletebyquery.IndexDeleteByQueryResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilders;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import javax.inject.Named;
import java.io.Closeable;
import java.io.IOException;
import java.nio.file.Path;
import java.util.*;
public class LocalRepoCrawler implements Closeable {
private static final Logger LOG = LoggerFactory.getLogger(LocalRepoCrawler.class);
// private static final int maxBulkRequestSize = 1000;
// private static final long maxBulkRequestContentBytes = 3*1000*1000;
public static final String DOC_TYPE = "doc";
public static final String PROJECT_TYPE = "project";
private final String codeIndex;
private final Path rootPath;
private final Client esClient;
// private BulkRequestBuilder bulkRequestBuilder;
private final BulkProcessor bulkProcessor;
// private FileProcessor fileProcessor;
// long bytesInCurrentBatch = 0;
final Date lastIndexDateForProjects;
private FileProcessor fileProcessor;
@Inject
public LocalRepoCrawler(Client esClient, @Named("checkout.root") Path rootPath, @Named("elasticsearch.code.index") String codeIndex, FileProcessor fileProcessor) {
this.esClient = esClient;
this.rootPath = rootPath;
this.codeIndex = codeIndex;
this.fileProcessor = fileProcessor;
this.lastIndexDateForProjects = new Date();
this.bulkProcessor = newBulkProcessor(esClient, "global");
}
private BulkProcessor newBulkProcessor(Client esClient, final String name) {
return BulkProcessor.builder(esClient, new BulkProcessor.Listener() {
public void beforeBulk(long executionId, BulkRequest request) {
LOG.debug("BulkProcessor Execution: {}, about to execute new bulk insert composed of {} actions", name, request.numberOfActions());
}
public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
LOG.debug("BulkProcessor Execution: {} index {}, bulk insert composed of {} actions, took {} ms", name, codeIndex,
request.numberOfActions(), response.getTookInMillis());
}
public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
LOG.error("BulkProcessor FAILED: {}, Throwing", name,
request.numberOfActions(), failure);
throw new RuntimeException(String.format("Execution with id %s failed", name), failure);
}
})
// .setFlushInterval(new TimeValue(1, TimeUnit.MINUTES))
// .setBulkActions(500)
.setName(name).build();
}
public void walkRepo(UpdateScope updateScope) throws IOException {
List<String> projectNames = new ArrayList<>();
for (Project project : updateScope.getAffectedProjects()) {
if (project == null) {
LOG.error("Project is null, WTF?");
continue;
}
walkProject(project);
projectNames.add(project.getName());
LOG.info("Affected project " + project.getName());
}
deleteOldFiles(updateScope.getRepoName(), projectNames);
//TODO make this work for walkProject called from outside
if (updateScope.isAllProjects() && !projectNames.isEmpty()) {
deleteOldProjects(updateScope.getRepoName());
}
}
public void walkProject(Project project) throws IOException {
// BulkProcessor bulkProcessor = newBulkProcessor(esClient, project.getName());
if (project.getCheckoutPath() == null) {
throw new RuntimeException("Checkout path null for Project '" + project + "', WTF?");
}
project.setLastIndexedDate(lastIndexDateForProjects);
new ProjectWalker(rootPath, project, codeIndex, esClient, bulkProcessor, fileProcessor).walkProject();
// Files.walkFileTree(project.getCheckoutPath(), this);
String id = IdUtil.projectId(project.getRepo(), project.getName());
LOG.info("Indexing {}", project.getName());
project.setId(id);
LOG.info("Indexing ID {} for {}", id, project);
indexProject(project, bulkProcessor);
// TODO
// deleteOldProjectFiles(project);
// LOG.info("Flushing {}" + project.getName());
// bulkProcessor.flush();
// LOG.info("Done Flushing {}" + project.getName());
// bulkProcessor.close();
// LOG.info("Done Closing {}" + project.getName());
// bulkProcessor.close();
// executeCurrentBulk();
}
private void deleteOldFiles(String repo, List<String> projectsUpdated) {
String cutoffDate = DevSearchDateFormat.format(lastIndexDateForProjects);
LOG.info("deleting all previously indexed files for {}::{} with cutoff {}",repo, projectsUpdated, cutoffDate);
for (String projectName : projectsUpdated) {
// Not terribly efficient, but terms query has a size limit.
DeleteByQueryRequestBuilder deleteRequest = esClient.prepareDeleteByQuery(codeIndex)
.setTypes(DOC_TYPE)
.setQuery(QueryBuilders.boolQuery()
// .must(QueryBuilders.inQuery("project", projectsUpdated))
.must(QueryBuilders.termQuery("project", projectName))
.must(QueryBuilders.termQuery("repo", repo))
.mustNot(QueryBuilders.rangeQuery("lastIndexed").gte(cutoffDate)));
DeleteByQueryResponse response = deleteRequest.execute().actionGet();
checkForFailures(projectName, response);
}
LOG.info("done deleting all previously indexed files");
}
private void checkForFailures(String name, DeleteByQueryResponse actionGet) {
for (IndexDeleteByQueryResponse r : actionGet) {
if (r.getFailedShards() > 0) {
LOG.error("SuccessfulShards {} / {} for {}",
r.getSuccessfulShards(), r.getTotalShards(), name);
}
}
}
private void deleteOldProjects(String repo) {
String cutoffDate = DevSearchDateFormat.format(lastIndexDateForProjects);
LOG.info("deleting all projects in {} last indexed before {}",repo, cutoffDate);
DeleteByQueryRequestBuilder deleteRequest = esClient.prepareDeleteByQuery(codeIndex)
.setTypes(PROJECT_TYPE)
.setQuery(QueryBuilders.boolQuery()
.must(QueryBuilders.termQuery("repo", repo))
.mustNot(QueryBuilders.rangeQuery("lastIndexed").gte(cutoffDate)));
DeleteByQueryResponse response = deleteRequest.execute().actionGet();
checkForFailures(repo, response);
}
private void deleteOldProjectFiles(Project project) {
String cutoffDate = DevSearchDateFormat.format(lastIndexDateForProjects);
LOG.info("deleting all files from project {} last indexed before {}",project.getId(), cutoffDate);
DeleteByQueryRequestBuilder deleteRequest = esClient.prepareDeleteByQuery(codeIndex)
.setTypes(DOC_TYPE)
.setQuery(QueryBuilders.boolQuery()
.must(QueryBuilders.termQuery("repo", project.getRepo()))
.must(QueryBuilders.termQuery("project", project.getName()))
.mustNot(QueryBuilders.rangeQuery("lastIndexed").gte(cutoffDate)));
DeleteByQueryResponse response = deleteRequest.execute().actionGet();
checkForFailures(project.getId(), response);
}
private void addIndexToBulk(BulkProcessor bulkProcessor, IndexRequest indexRequest) {
bulkProcessor.add(indexRequest);
}
public void indexProject(Project project, BulkProcessor bulkProcessor) {
Map<String, Object> map = new HashMap<String, Object>();
map.put("name", project.getName());
map.put("repo", project.getRepo());
map.put("lastIndexed", project.getLastIndexed());
map.put("lastChanged", project.getLastChanged());
map.put("dirty", false);
map.put("dormant", false);
map.put("cloneCommand", project.getCloneCommand());
map.put("PROJECT_TYPE", project.getProjectType());
while (map.values().remove(null));
LOG.info("Project id: {} index: {}", project.getId(), map);
bulkProcessor.add(esClient.prepareIndex(codeIndex, PROJECT_TYPE, project.getId()).setSource(map).request());
}
@Override
public void close() throws IOException {
LOG.info("Flushing");
bulkProcessor.flush();
LOG.info("Done Flushing");
try {
Thread.sleep(5000L);
} catch (InterruptedException e) {
}
LOG.info("Closing");
bulkProcessor.close();
LOG.info("Done Closing");
}
}