package focusedCrawler.rest;
import static java.util.Objects.requireNonNull;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.net.DatagramSocket;
import java.net.ServerSocket;
import java.util.List;
import java.util.Map;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.jvm.ThreadDump;
import com.google.common.collect.ImmutableMap;
import focusedCrawler.Main;
import focusedCrawler.config.ConfigService;
import focusedCrawler.target.TargetStorageConfig;
import focusedCrawler.target.repository.elasticsearch.ElasticSearchConfig;
import spark.Route;
import spark.Service;
public class RestServer {
public static final String VERSION = Main.class.getPackage().getImplementationVersion();
private static final Logger logger = LoggerFactory.getLogger(RestServer.class);
private RestConfig restConfig;
private MetricRegistry metricsRegistry;
private Service server;
private boolean isSearchEnabled = false;
private String esHostAddress;
private String esIndexName;
private String esTypeName;
private CloseableHttpClient httpclient;
private LabelsManager labelsManager;
private RestServer(String dataPath, RestConfig restConfig, MetricRegistry metricsRegistry) {
this(dataPath, restConfig, metricsRegistry, null, null, null);
}
private RestServer(String dataPath, RestConfig restConfig, MetricRegistry metricsRegistry,
String esIndexName, String esTypeName, String esHostAddress) {
this.restConfig = restConfig;
this.metricsRegistry = metricsRegistry;
if (esIndexName != null && esHostAddress != null) {
this.esIndexName = esIndexName;
this.esHostAddress = esHostAddress;
isSearchEnabled = true;
if (esTypeName != null && !esTypeName.isEmpty()) {
this.esTypeName = esTypeName;
} else {
this.esTypeName = "page"; // default type name
}
this.httpclient = HttpClients.createDefault();
this.labelsManager = new LabelsManager(dataPath);
}
}
public void start() {
String host = restConfig.getHost();
int port = restConfig.getPort();
while(!portIsAvailable(port) && port < restConfig.getPort()+100) {
logger.error("Port {} not available. Trying port {}.", port, port+1);
port++;
}
server = Service.ignite();
server.port(port);
server.ipAddress(host);
server.staticFiles.location("/public");
if(restConfig.isEnableCors()) {
enableCORS("*", "GET");
}
/*
* API endpoints routes
*/
server.get("/status", Transformers.json(crawlerInfoResource));
server.get("/metrics", Transformers.json(metricsResource));
server.get("/thread/dump", Transformers.text(threadDumpResource));
if(isSearchEnabled) {
/*
* Elasticsearch proxy routes
*/
server.get("/_search", "*/*", elasticsearchApiProxy);
server.post("/_search", "*/*", elasticsearchApiProxy);
/*
* Endpoints for labeling web pages
*/
server.get( "/labels", Transformers.json(labelsManager.getLabelsResource));
server.put( "/labels", Transformers.json(labelsManager.addLabelsResource));
server.post("/labels", Transformers.json(labelsManager.addLabelsResource));
}
/*
* Routes used by the static web application
*/
server.get("/search", StaticFileEngine.noopRouter, StaticFileEngine.engine);
server.awaitInitialization();
logger.info("---------------------------------------------");
logger.info("ACHE server available at http://{}:{}", host, port);
logger.info("---------------------------------------------");
}
public void stop() {
try {
httpclient.close();
} catch (IOException e) {
logger.error("Failed to close http client.", e);
}
}
private Route crawlerInfoResource = (request, response) -> {
Map<?, ?> crawlerInfo = ImmutableMap.of(
"status", 200,
"name", "ACHE Crawler",
"version", VERSION,
"searchEnabled", isSearchEnabled
);
return crawlerInfo;
};
private Route elasticsearchApiProxy = (request, response) -> {
try {
String query = "";
for (String param : request.queryParams()) {
query += param + "=" + request.queryParams(param);
}
String url = String.format("%s/%s/%s/_search", esHostAddress, esIndexName, esTypeName);
if (!query.isEmpty()) {
url += "?" + query;
}
HttpPost post = new HttpPost(url);
post.setEntity(new StringEntity(request.body(), "UTF-8"));
CloseableHttpResponse apiResponse = httpclient.execute(post);
try {
HttpEntity entity = apiResponse.getEntity();
Header[] headers = apiResponse.getAllHeaders();
for (Header header : headers) {
response.header(header.getName(), header.getValue());
}
String body = EntityUtils.toString(entity);
response.body(body);
return body;
} finally {
apiResponse.close();
}
} catch (Exception e) {
logger.error("Failed to forward request to ElasticSearch.", e);
throw e;
}
};
private Route metricsResource = (request, response) -> {
return metricsRegistry;
};
private ThreadDump threadDump = new ThreadDump(ManagementFactory.getThreadMXBean());
private Route threadDumpResource = (request, response) -> {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
threadDump.dump(baos);
return baos.toString();
};
private boolean portIsAvailable(int port) {
ServerSocket ss = null;
DatagramSocket ds = null;
try {
ss = new ServerSocket(port);
ss.setReuseAddress(true);
ds = new DatagramSocket(port);
ds.setReuseAddress(true);
return true;
} catch (IOException e) {
return false;
} finally {
if (ds != null) ds.close();
if (ss != null) {
try {
ss.close();
} catch (IOException e) {
}
}
}
}
public void shutdown() {
server.stop();
try {
this.httpclient.close();
} catch (IOException e) {
logger.error("Failed to close http client.", e);
}
}
private void enableCORS(final String origin, final String methods) {
server.options("/*", (request, response) -> {
String accessControlRequestHeaders = request.headers("Access-Control-Request-Headers");
if (accessControlRequestHeaders != null) {
response.header("Access-Control-Allow-Headers", accessControlRequestHeaders);
}
String accessControlRequestMethod = request.headers("Access-Control-Request-Method");
if (accessControlRequestMethod != null) {
response.header("Access-Control-Allow-Methods", accessControlRequestMethod);
}
return "OK";
});
server.before((request, response) -> {
response.header("Access-Control-Allow-Origin", origin);
response.header("Access-Control-Request-Method", methods);
});
}
public static RestServer create(String dataPath, RestConfig restConfig, MetricRegistry metricsRegistry) {
return new RestServer(dataPath, restConfig, metricsRegistry);
}
public static RestServer create(String dataPath, MetricRegistry metricsRegistry,
ConfigService config, String esIndexName, String esTypeName) {
requireNonNull(metricsRegistry, "A metrics registry must be provided.");
requireNonNull(config, "A configuration must be provided.");
TargetStorageConfig targetStorageConfig = config.getTargetStorageConfig();
if("ELASTICSEARCH".equals(targetStorageConfig.getDataFormat())) {
ElasticSearchConfig esConfig = targetStorageConfig.getElasticSearchConfig();
List<String> hosts = esConfig.getRestApiHosts();
if(hosts == null || hosts.isEmpty()) {
throw new IllegalArgumentException("Elasticsearch host addresses (REST API) can not be empty");
}
requireNonNull(esIndexName, "Elasticsearch index name should be provided when using ELASTICSEARCH data format.");
if(esTypeName == null || esTypeName.isEmpty()) {
esTypeName = "page";
}
String esHostAddress = hosts.iterator().next();
return new RestServer(dataPath, config.getRestConfig(), metricsRegistry, esIndexName, esTypeName, esHostAddress);
} else {
return new RestServer(dataPath, config.getRestConfig(), metricsRegistry);
}
}
}