package edu.isi.karma.web.services.publish.es;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import javax.servlet.ServletContext;
import javax.ws.rs.Consumes;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.core.Application;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.MultivaluedMap;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLContextBuilder;
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONTokener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import edu.isi.karma.config.ModelingConfiguration;
import edu.isi.karma.config.ModelingConfigurationRegistry;
import edu.isi.karma.controller.update.UpdateContainer;
import edu.isi.karma.er.helper.PythonRepository;
import edu.isi.karma.er.helper.PythonRepositoryRegistry;
import edu.isi.karma.kr2rml.ContextIdentifier;
import edu.isi.karma.kr2rml.mapping.R2RMLMappingIdentifier;
import edu.isi.karma.kr2rml.planning.UserSpecifiedRootStrategy;
import edu.isi.karma.kr2rml.writer.JSONKR2RMLRDFWriter;
import edu.isi.karma.kr2rml.writer.KR2RMLRDFWriter;
import edu.isi.karma.metadata.KarmaMetadataManager;
import edu.isi.karma.metadata.PythonTransformationMetadata;
import edu.isi.karma.metadata.UserConfigMetadata;
import edu.isi.karma.metadata.UserPreferencesMetadata;
import edu.isi.karma.modeling.Uris;
import edu.isi.karma.modeling.semantictypes.SemanticTypeUtil;
import edu.isi.karma.rdf.GenericRDFGenerator;
import edu.isi.karma.rdf.RDFGeneratorRequest;
import edu.isi.karma.webserver.ContextParametersRegistry;
import edu.isi.karma.webserver.KarmaException;
import edu.isi.karma.webserver.ServletContextParameterMap;
import edu.isi.karma.webserver.ServletContextParameterMap.ContextParameter;
@Path("/")
public class ElasticSearchPublishServlet extends Application {
private static Logger logger = LoggerFactory
.getLogger(ElasticSearchPublishServlet.class);
private static final int retry = 10;
private int bulksize = 100;
private int sleepTime = 100;
private ServletContext context;
public ElasticSearchPublishServlet(@Context ServletContext context) {
this.context = context;
try {
initialization(context);
} catch (KarmaException ke) {
logger.error("KarmaException: " + ke.getMessage());
}
String bulksize = context.getInitParameter("ESBulkSize");
if(bulksize != null)
this.bulksize = Integer.parseInt(bulksize);
String sleep = context.getInitParameter("ESUploadInterval");
if(sleep != null)
this.sleepTime = Integer.parseInt(sleep);
}
@POST
@Consumes(MediaType.APPLICATION_FORM_URLENCODED)
@Path("/data")
public String publishFromData(MultivaluedMap<String, String> formParams) {
try {
logger.info("Path - es/json . Generate jsonld and publish to ES");
ElasticSearchConfig esConfig = ElasticSearchConfig.parse(context, formParams);
R2RMLConfig r2rmlConfig = R2RMLConfig.parse(context, formParams);
String jsonld = generateJSONLD(r2rmlConfig);
if(jsonld != null)
return publishES(jsonld, esConfig);
} catch (Exception e) {
logger.error("Error generating JSON", e);
return "Exception: " + e.getMessage();
}
return null;
}
@POST
@Path("/data")
@Consumes(MediaType.APPLICATION_JSON)
public String publishFromData(JSONObject json) {
try {
logger.info("Path - es/json . Generate jsonld from multipart and publish to ES");
ElasticSearchConfig esConfig = ElasticSearchConfig.parse(context, null);
R2RMLConfig r2rmlConfig = R2RMLConfig.parse(context, null);
InputStream is = IOUtils.toInputStream(json.toString());
r2rmlConfig.setInput(is);
String jsonld = generateJSONLD(r2rmlConfig);
if(jsonld != null)
return publishES(jsonld, esConfig);
} catch (Exception e) {
logger.error("Error generating JSON", e);
return "Exception: " + e.getMessage();
}
return null;
}
@POST
@Consumes(MediaType.APPLICATION_FORM_URLENCODED)
@Path("/jsonld")
public String publishFromJsonLD(MultivaluedMap<String, String> formParams) {
try {
logger.info("Path - es/jsonld . Publish JSONLD to ES");
ElasticSearchConfig esConfig = ElasticSearchConfig.parse(context, formParams);
R2RMLConfig r2rmlConfig = R2RMLConfig.parse(context, formParams);
String jsonld = IOUtils.toString(r2rmlConfig.getInput());
if(jsonld != null)
return publishES(jsonld, esConfig);
} catch (Exception e) {
logger.error("Error generating JSON", e);
return "Exception: " + e.getMessage();
}
return null;
}
private String publishES(String jsonld, ElasticSearchConfig esConfig) throws KeyManagementException, NoSuchAlgorithmException, KeyStoreException {
CloseableHttpClient httpClient = getHttpClient(esConfig);
HttpPost httpPost = getHttpPost(esConfig);
String bulkFormat = null;
StringBuilder sb = new StringBuilder();
// System.out.println("GOt JSONLD:");
// System.out.println(jsonld);
logger.info("Got JSONLD, now pushing to ES");
JSONArray jsonArray = null;
if(jsonld.startsWith("["))
jsonArray = new JSONArray(jsonld);
else {
JSONObject jObj = new JSONObject(jsonld);
jsonArray = new JSONArray();
jsonArray.put(jObj);
}
logger.info("FInished de-serializing JSON-LD");
long counter = 0;
Exception postException = null;
String index = esConfig.getIndex();
String type = esConfig.getType();
for(int k=0; k<jsonArray.length(); k++) {
JSONObject jObj = jsonArray.getJSONObject(k);
String id = null;
if(jObj.has("uri"))
{
id = jObj.getString("uri");
}
if(id != null)
{
bulkFormat = "{\"index\":{\"_index\":\"" + index+ "\",\"_type\":\""+ type +"\",\"_id\":\""+id+"\"}}";
}
else
{
bulkFormat = "{\"index\":{\"_index\":\"" + index+ "\",\"_type\":\""+ type +"\"}}";
}
sb.append(bulkFormat);
sb.append(System.getProperty("line.separator"));
sb.append(jObj.toString());
sb.append(System.getProperty("line.separator"));
counter++;
if (counter % bulksize == 0) {
int i = 0;
Exception ex = null;
while (i < retry) {
try {
StringEntity entity = new StringEntity(sb.toString(),"UTF-8");
entity.setContentType("application/json");
httpPost.setEntity(entity);
httpClient.execute(httpPost);
httpClient.close();
System.out.println(counter + " processed");
break;
}catch(Exception e) {
ex = e;
logger.error("Error", e);
i++;
}
}
if (i > 0) {
logger.error("Exception occurred!", ex);
postException = ex;
break;
}
httpClient = getHttpClient(esConfig);
httpPost = getHttpPost(esConfig);
sb = new StringBuilder();
try {
Thread.sleep(sleepTime);
} catch (InterruptedException e) {
}
}
}
try {
StringEntity entity = new StringEntity(sb.toString(),"UTF-8");
entity.setContentType("application/json");
httpPost.setEntity(entity);
httpClient.execute(httpPost);
httpClient.close();
} catch(Exception e) {
postException = e;
}
if (postException != null) {
logger.error("Exception occurred!", postException);
return "{\"result\": {\"code\": \"0\", \"message\": \"" + postException.getMessage() + "\"}}";
}
return "{\"result\": {\"code\": \"1\", \"message\": \"success\"}}";
}
private HttpPost getHttpPost(ElasticSearchConfig esConfig) {
return new HttpPost(esConfig.getProtocol()+"://" + esConfig.getHostname() +
":" + esConfig.getPort() + "/" + esConfig.getIndex() + "/_bulk");
}
private CloseableHttpClient getHttpClient(ElasticSearchConfig esConfig) throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {
if(esConfig.getProtocol().equalsIgnoreCase("https")) {
SSLContextBuilder builder = new SSLContextBuilder();
builder.loadTrustMaterial(null, new TrustSelfSignedStrategy());
SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(builder.build());
return HttpClients.custom().setSSLSocketFactory(sslsf).build();
} else if(esConfig.getProtocol().equalsIgnoreCase("http"))
return HttpClients.createDefault();
return null;
}
private String generateJSONLD(R2RMLConfig config) throws JSONException, MalformedURLException, KarmaException, IOException{
InputStream is = config.getInput();
if(is != null) {
URL contextLocation = config.getContextUrl();
URLConnection contextConnection = contextLocation.openConnection();
GenericRDFGenerator rdfGen = new GenericRDFGenerator(null);
// Add the models in;
R2RMLMappingIdentifier modelIdentifier = new R2RMLMappingIdentifier(
"generic-model", config.getR2rmlUrl());
rdfGen.addModel(modelIdentifier);
Model model = rdfGen.getModelParser("generic-model").getModel();
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
JSONTokener token = new JSONTokener(contextConnection.getInputStream());
ContextIdentifier contextId = new ContextIdentifier("generic-context", contextLocation);
JSONKR2RMLRDFWriter writer = new JSONKR2RMLRDFWriter(pw);
writer.setGlobalContext(new org.json.JSONObject(token), contextId);
RDFGeneratorRequest request = generateRDFRequest("generic-model", model, "Karma-Web-Services", is, config, writer);
rdfGen.generateRDF(request);
String rdf = sw.toString();
return rdf;
}
return null;
}
private RDFGeneratorRequest generateRDFRequest(String modelName, Model model, String sourceName, InputStream is, R2RMLConfig config, KR2RMLRDFWriter writer) {
RDFGeneratorRequest request = new RDFGeneratorRequest(modelName, sourceName);
request.addWriter(writer);
request.setInputStream(is);
request.setDataType(config.getContentType());
request.setAddProvenance(false);
request.setMaxNumLines(config.getMaxNumLines());
request.setEncoding(config.getEncoding());
request.setDelimiter(config.getColumnDelimiter());
request.setTextQualifier(config.getTextQualifier());
request.setDataStartIndex(config.getDataStartIndex());
request.setHeaderStartIndex(config.getHeaderStartIndex());
request.setWorksheetIndex(config.getWorksheetIndex());
String rootTripleMap = config.getContextRoot();
if(rootTripleMap != null && !rootTripleMap.isEmpty()) {
StmtIterator itr = model.listStatements(null, model.getProperty(Uris.KM_NODE_ID_URI), rootTripleMap);
Resource subject = null;
while (itr.hasNext()) {
subject = itr.next().getSubject();
}
if (subject != null) {
itr = model.listStatements(null, model.getProperty(Uris.RR_SUBJECTMAP_URI), subject);
while (itr.hasNext()) {
rootTripleMap = itr.next().getSubject().toString();
}
}
}
request.setStrategy(new UserSpecifiedRootStrategy(rootTripleMap));
return request;
}
//TODO find a way to refactor this out. Also in servletstart
public static void initContextParameters(ServletContext ctx, ServletContextParameterMap contextParameters)
{
Enumeration<?> params = ctx.getInitParameterNames();
List<String> validParams = new ArrayList<>();
for (ContextParameter param : ContextParameter.values()) {
validParams.add(param.name());
}
while (params.hasMoreElements()) {
String param = params.nextElement().toString();
if (validParams.contains(param)) {
ContextParameter mapParam = ContextParameter.valueOf(param);
String value = ctx.getInitParameter(param);
contextParameters.setParameterValue(mapParam, value);
}
}
//String contextPath = ctx.getRealPath(File.separator);
String contextPath = ctx.getRealPath("/"); //File.separator was not working in Windows. / works
contextParameters.setParameterValue(ContextParameter.WEBAPP_PATH, contextPath);
}
private void initialization(ServletContext context) throws KarmaException {
ServletContextParameterMap contextParameters = ContextParametersRegistry.getInstance().getDefault();
initContextParameters(context, contextParameters);
ContextParametersRegistry contextParametersRegistry = ContextParametersRegistry.getInstance();
contextParameters = contextParametersRegistry.registerByKarmaHome(null);
UpdateContainer uc = new UpdateContainer();
KarmaMetadataManager userMetadataManager = new KarmaMetadataManager(contextParameters);
userMetadataManager.register(new UserPreferencesMetadata(contextParameters), uc);
userMetadataManager.register(new UserConfigMetadata(contextParameters), uc);
userMetadataManager.register(new PythonTransformationMetadata(contextParameters), uc);
PythonRepository pythonRepository = new PythonRepository(false, contextParameters.getParameterValue(ContextParameter.USER_PYTHON_SCRIPTS_DIRECTORY));
PythonRepositoryRegistry.getInstance().register(pythonRepository);
SemanticTypeUtil.setSemanticTypeTrainingStatus(false);
ModelingConfiguration modelingConfiguration = ModelingConfigurationRegistry.getInstance().register(contextParameters.getId());
modelingConfiguration.setLearnerEnabled(false); // disable automatic learning // learning
}
}