/*
* Licensed to David Pilato (the "Author") under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Author licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package fr.pilato.elasticsearch.crawler.fs.rest;
import fr.pilato.elasticsearch.crawler.fs.SignTool;
import fr.pilato.elasticsearch.crawler.fs.client.BulkProcessor;
import fr.pilato.elasticsearch.crawler.fs.client.IndexRequest;
import fr.pilato.elasticsearch.crawler.fs.meta.doc.Doc;
import fr.pilato.elasticsearch.crawler.fs.meta.doc.DocParser;
import fr.pilato.elasticsearch.crawler.fs.meta.settings.Elasticsearch;
import fr.pilato.elasticsearch.crawler.fs.meta.settings.FsSettings;
import fr.pilato.elasticsearch.crawler.fs.tika.TikaDocParser;
import fr.pilato.elasticsearch.crawler.fs.util.TimeBasedUUIDGenerator;
import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
import org.glassfish.jersey.media.multipart.FormDataParam;
import javax.ws.rs.Consumes;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.core.MediaType;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDateTime;
import static fr.pilato.elasticsearch.crawler.fs.util.FsCrawlerUtil.buildUrl;
@Path("/_upload")
public class UploadApi extends RestApi {
private final BulkProcessor bulkProcessor;
private final FsSettings settings;
private final MessageDigest messageDigest;
private static final TimeBasedUUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator();
public UploadApi(FsSettings settings, BulkProcessor bulkProcessor) {
this.settings = settings;
this.bulkProcessor = bulkProcessor;
// Create MessageDigest instance
try {
messageDigest = settings.getFs().getChecksum() == null ?
null : MessageDigest.getInstance(settings.getFs().getChecksum());
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("This should never happen as we checked that previously");
}
}
@POST
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.MULTIPART_FORM_DATA)
public UploadResponse post(
@QueryParam("debug") String debug,
@QueryParam("simulate") String simulate,
@FormDataParam("id") String id,
@FormDataParam("file") InputStream filecontent,
@FormDataParam("file") FormDataContentDisposition d) throws IOException, NoSuchAlgorithmException {
// Create the Doc object
Doc doc = new Doc();
String filename = d.getFileName();
long filesize = d.getSize();
// File
doc.getFile().setFilename(filename);
doc.getFile().setIndexingDate(LocalDateTime.now());
// File
// Path
if (id == null) {
id = SignTool.sign(filename);
} else if (id.equals("_auto_")) {
// We are using a specific id which tells us to generate a unique _id like elasticsearch does
id = TIME_UUID_GENERATOR.getBase64UUID();
}
doc.getPath().setVirtual(filename);
doc.getPath().setReal(filename);
// Path
// Read the file content
TikaDocParser.generate(settings, filecontent, filename, doc, messageDigest, filesize);
String url = null;
if (Boolean.parseBoolean(simulate)) {
logger.debug("Simulate mode is on, so we skip sending document [{}] to elasticsearch.", filename);
} else {
logger.debug("Sending document [{}] to elasticsearch.", filename);
bulkProcessor.add(new IndexRequest(settings.getElasticsearch().getIndex(), settings.getElasticsearch().getType(), id)
.source(DocParser.toJson(doc)));
// Elasticsearch entity coordinates (we use the first node address)
Elasticsearch.Node node = settings.getElasticsearch().getNodes().get(0);
url = buildUrl(
node.getScheme().toLowerCase(), node.getHost(), node.getPort()) + "/" +
settings.getElasticsearch().getIndex() + "/" +
settings.getElasticsearch().getType() + "/" +
id;
}
UploadResponse response = new UploadResponse();
response.setOk(true);
response.setFilename(filename);
response.setUrl(url);
if (logger.isDebugEnabled() || Boolean.parseBoolean(debug)) {
// We send the content back if debug is on or if we got in the query explicitly a debug command
response.setDoc(doc);
}
return response;
}
}