package com.cadrlife.devsearch.agent.indexing; import com.cadrlife.devsearch.agent.UpdateScope; import com.cadrlife.devsearch.agent.service.analysis.FileProcessor; import com.cadrlife.devsearch.agent.service.analysis.JavaReferenceFinder; import com.cadrlife.devsearch.domain.DevSearchDateFormat; import com.cadrlife.devsearch.domain.IdUtil; import com.cadrlife.devsearch.domain.Project; import org.elasticsearch.action.bulk.BulkProcessor; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.deletebyquery.DeleteByQueryRequestBuilder; import org.elasticsearch.action.deletebyquery.DeleteByQueryResponse; import org.elasticsearch.action.deletebyquery.IndexDeleteByQueryResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.client.Client; import org.elasticsearch.index.query.QueryBuilders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.inject.Inject; import javax.inject.Named; import java.io.Closeable; import java.io.IOException; import java.nio.file.Path; import java.util.*; public class LocalRepoCrawler implements Closeable { private static final Logger LOG = LoggerFactory.getLogger(LocalRepoCrawler.class); // private static final int maxBulkRequestSize = 1000; // private static final long maxBulkRequestContentBytes = 3*1000*1000; public static final String DOC_TYPE = "doc"; public static final String PROJECT_TYPE = "project"; private final String codeIndex; private final Path rootPath; private final Client esClient; // private BulkRequestBuilder bulkRequestBuilder; private final BulkProcessor bulkProcessor; // private FileProcessor fileProcessor; // long bytesInCurrentBatch = 0; final Date lastIndexDateForProjects; private FileProcessor fileProcessor; @Inject public LocalRepoCrawler(Client esClient, @Named("checkout.root") Path rootPath, @Named("elasticsearch.code.index") String codeIndex, FileProcessor fileProcessor) { this.esClient = esClient; this.rootPath = rootPath; this.codeIndex = codeIndex; this.fileProcessor = fileProcessor; this.lastIndexDateForProjects = new Date(); this.bulkProcessor = newBulkProcessor(esClient, "global"); } private BulkProcessor newBulkProcessor(Client esClient, final String name) { return BulkProcessor.builder(esClient, new BulkProcessor.Listener() { public void beforeBulk(long executionId, BulkRequest request) { LOG.debug("BulkProcessor Execution: {}, about to execute new bulk insert composed of {} actions", name, request.numberOfActions()); } public void afterBulk(long executionId, BulkRequest request, BulkResponse response) { LOG.debug("BulkProcessor Execution: {} index {}, bulk insert composed of {} actions, took {} ms", name, codeIndex, request.numberOfActions(), response.getTookInMillis()); } public void afterBulk(long executionId, BulkRequest request, Throwable failure) { LOG.error("BulkProcessor FAILED: {}, Throwing", name, request.numberOfActions(), failure); throw new RuntimeException(String.format("Execution with id %s failed", name), failure); } }) // .setFlushInterval(new TimeValue(1, TimeUnit.MINUTES)) // .setBulkActions(500) .setName(name).build(); } public void walkRepo(UpdateScope updateScope) throws IOException { List<String> projectNames = new ArrayList<>(); for (Project project : updateScope.getAffectedProjects()) { if (project == null) { LOG.error("Project is null, WTF?"); continue; } walkProject(project); projectNames.add(project.getName()); LOG.info("Affected project " + project.getName()); } deleteOldFiles(updateScope.getRepoName(), projectNames); //TODO make this work for walkProject called from outside if (updateScope.isAllProjects() && !projectNames.isEmpty()) { deleteOldProjects(updateScope.getRepoName()); } } public void walkProject(Project project) throws IOException { // BulkProcessor bulkProcessor = newBulkProcessor(esClient, project.getName()); if (project.getCheckoutPath() == null) { throw new RuntimeException("Checkout path null for Project '" + project + "', WTF?"); } project.setLastIndexedDate(lastIndexDateForProjects); new ProjectWalker(rootPath, project, codeIndex, esClient, bulkProcessor, fileProcessor).walkProject(); // Files.walkFileTree(project.getCheckoutPath(), this); String id = IdUtil.projectId(project.getRepo(), project.getName()); LOG.info("Indexing {}", project.getName()); project.setId(id); LOG.info("Indexing ID {} for {}", id, project); indexProject(project, bulkProcessor); // TODO // deleteOldProjectFiles(project); // LOG.info("Flushing {}" + project.getName()); // bulkProcessor.flush(); // LOG.info("Done Flushing {}" + project.getName()); // bulkProcessor.close(); // LOG.info("Done Closing {}" + project.getName()); // bulkProcessor.close(); // executeCurrentBulk(); } private void deleteOldFiles(String repo, List<String> projectsUpdated) { String cutoffDate = DevSearchDateFormat.format(lastIndexDateForProjects); LOG.info("deleting all previously indexed files for {}::{} with cutoff {}",repo, projectsUpdated, cutoffDate); for (String projectName : projectsUpdated) { // Not terribly efficient, but terms query has a size limit. DeleteByQueryRequestBuilder deleteRequest = esClient.prepareDeleteByQuery(codeIndex) .setTypes(DOC_TYPE) .setQuery(QueryBuilders.boolQuery() // .must(QueryBuilders.inQuery("project", projectsUpdated)) .must(QueryBuilders.termQuery("project", projectName)) .must(QueryBuilders.termQuery("repo", repo)) .mustNot(QueryBuilders.rangeQuery("lastIndexed").gte(cutoffDate))); DeleteByQueryResponse response = deleteRequest.execute().actionGet(); checkForFailures(projectName, response); } LOG.info("done deleting all previously indexed files"); } private void checkForFailures(String name, DeleteByQueryResponse actionGet) { for (IndexDeleteByQueryResponse r : actionGet) { if (r.getFailedShards() > 0) { LOG.error("SuccessfulShards {} / {} for {}", r.getSuccessfulShards(), r.getTotalShards(), name); } } } private void deleteOldProjects(String repo) { String cutoffDate = DevSearchDateFormat.format(lastIndexDateForProjects); LOG.info("deleting all projects in {} last indexed before {}",repo, cutoffDate); DeleteByQueryRequestBuilder deleteRequest = esClient.prepareDeleteByQuery(codeIndex) .setTypes(PROJECT_TYPE) .setQuery(QueryBuilders.boolQuery() .must(QueryBuilders.termQuery("repo", repo)) .mustNot(QueryBuilders.rangeQuery("lastIndexed").gte(cutoffDate))); DeleteByQueryResponse response = deleteRequest.execute().actionGet(); checkForFailures(repo, response); } private void deleteOldProjectFiles(Project project) { String cutoffDate = DevSearchDateFormat.format(lastIndexDateForProjects); LOG.info("deleting all files from project {} last indexed before {}",project.getId(), cutoffDate); DeleteByQueryRequestBuilder deleteRequest = esClient.prepareDeleteByQuery(codeIndex) .setTypes(DOC_TYPE) .setQuery(QueryBuilders.boolQuery() .must(QueryBuilders.termQuery("repo", project.getRepo())) .must(QueryBuilders.termQuery("project", project.getName())) .mustNot(QueryBuilders.rangeQuery("lastIndexed").gte(cutoffDate))); DeleteByQueryResponse response = deleteRequest.execute().actionGet(); checkForFailures(project.getId(), response); } private void addIndexToBulk(BulkProcessor bulkProcessor, IndexRequest indexRequest) { bulkProcessor.add(indexRequest); } public void indexProject(Project project, BulkProcessor bulkProcessor) { Map<String, Object> map = new HashMap<String, Object>(); map.put("name", project.getName()); map.put("repo", project.getRepo()); map.put("lastIndexed", project.getLastIndexed()); map.put("lastChanged", project.getLastChanged()); map.put("dirty", false); map.put("dormant", false); map.put("cloneCommand", project.getCloneCommand()); map.put("PROJECT_TYPE", project.getProjectType()); while (map.values().remove(null)); LOG.info("Project id: {} index: {}", project.getId(), map); bulkProcessor.add(esClient.prepareIndex(codeIndex, PROJECT_TYPE, project.getId()).setSource(map).request()); } @Override public void close() throws IOException { LOG.info("Flushing"); bulkProcessor.flush(); LOG.info("Done Flushing"); try { Thread.sleep(5000L); } catch (InterruptedException e) { } LOG.info("Closing"); bulkProcessor.close(); LOG.info("Done Closing"); } }