/*******************************************************************************
* Copyright French Prime minister Office/SGMAP/DINSIC/Vitam Program (2015-2019)
*
* contact.vitam@culture.gouv.fr
*
* This software is a computer program whose purpose is to implement a digital archiving back-office system managing
* high volumetry securely and efficiently.
*
* This software is governed by the CeCILL 2.1 license under French law and abiding by the rules of distribution of free
* software. You can use, modify and/ or redistribute the software under the terms of the CeCILL 2.1 license as
* circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info".
*
* As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license,
* users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the
* successive licensors have only limited liability.
*
* In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or
* developing or reproducing the software by the user in light of its specific status of free software, that may mean
* that it is complicated to manipulate, and that also therefore means that it is reserved for developers and
* experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the
* software's suitability as regards their requirements in conditions enabling the security of their systems and/or data
* to be ensured and, more generally, to use and operate it in the same conditions as regards security.
*
* The fact that you are presently reading this means that you have had knowledge of the CeCILL 2.1 license and that you
* accept its terms.
*******************************************************************************/
package fr.gouv.vitam.common.database.server.elasticsearch;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.List;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import fr.gouv.vitam.common.ParametersChecker;
import fr.gouv.vitam.common.VitamConfiguration;
import fr.gouv.vitam.common.exception.VitamException;
import fr.gouv.vitam.common.logging.SysErrLogger;
import fr.gouv.vitam.common.logging.VitamLogger;
import fr.gouv.vitam.common.logging.VitamLoggerFactory;
import fr.gouv.vitam.common.server.application.configuration.DatabaseConnection;
/**
* Elasticsearch Access
*/
public class ElasticsearchAccess implements DatabaseConnection {
private static final int TOSECOND = 1000;
private static final VitamLogger LOGGER = VitamLoggerFactory.getInstance(ElasticsearchAccess.class);
private static final String DEFAULT_FRENCH_STOP_WORDS = "\"_french_\"";
private static final String FRENCH_STOP_WORDS = "[\"vers\", \"a\", \"à\", \"afin\", \"ai\", \"ainsi\", \"après\", \"au\", \"auquel\", \"aussi\", " +
"\"autre\", \"autres\", \"aux\", \"auxquelles\", \"auxquels\", \"avait\", \"c\", \"ça\", \"ce\", \"ceci\", \"cela\", \"celle\", \"celles\", " +
"\"celui\", \"cependant\", \"certain\", \"certaine\", \"certaines\", \"certains\", \"ces\", \"cet\", \"cette\", \"ceux\", \"chez\", \"ci\", " +
"\"combien\", \"comme\", \"comment\", \"concernant\", \"contre\", \"d\", \"dans\", \"de\", \"dedans\", \"dehors\", \"delà\", \"depuis\", " +
"\"des\", \"dès\", \"désormais\", \"desquelles\", \"desquels\", \"dessous\", \"dessus\", \"devant\", \"devers\", \"devra\", \"divers\", " +
"\"diverse\", \"diverses\", \"doit\", \"donc\", \"dont\", \"du\", \"duquel\", \"durant\", \"elle\", \"elles\", \"en\", \"entre\", " +
"\"environ\", \"et\", \"etc\", \"etre\", \"être\", \"eu\", \"eux\", \"hélas\", \"hormis\", \"hors\", \"hui\", \"il\", \"ils\", \"j\", \"je\", " +
"\"jusqu\", \"jusque\", \"l\", \"la\", \"là\", \"laquelle\", \"le\", \"lequel\", \"les\", \"lesquelles\", \"lesquels\", \"leur\", \"leurs\", " +
"\"lorsque\", \"lui\", \"ma\", \"mais\", \"malgré\", \"me\", \"même\", \"mêmes\", \"mes\", \"mien\", \"mienne\", \"miennes\", \"miens\", " +
"\"moi\", \"moins\", \"mon\", \"moyennant\", \"n\", \"ne\", \"néanmoins\", \"ni\", \"non\", \"nos\", \"notre\", \"nôtre\", \"nôtres\", " +
"\"nous\", \"ô\", \"on\", \"ont\", \"ou\", \"où\", \"outre\", \"par\", \"parmi\", \"pas\", \"pendant\", \"plein\", \"plus\", \"plusieurs\", " +
"\"pour\", \"pourquoi\", \"près\", \"proche\", \"puisque\", \"qu\", \"quand\", \"que\", \"quel\", \"quelle\", \"quelles\", \"quels\", \"qui\", " +
"\"quoi\", \"quoique\", \"revoici\", \"revoilà\", \"s\", \"sa\", \"sauf\", \"se\", \"selon\", \"seront\", \"ses\", \"si\", \"sien\", \"sienne\", " +
"\"siennes\", \"siens\", \"sinon\", \"soi\", \"soit\", \"son\", \"sont\", \"sous\", \"suivant\", \"sur\", \"ta\", \"te\", \"tes\", \"tien\", " +
"\"tienne\", \"tiennes\", \"tiens\", \"toi\", \"ton\", \"tous\", \"tout\", \"toute\", \"toutes\", \"tu\", \"un\", \"une\", \"va\", \"voici\", " +
"\"voilà\", \"vos\", \"votre\", \"vôtre\", \"vôtres\", \"vous\", \"vu\", \"y\"]";
/**
* Default Index Configuration
*/
public static final String DEFAULT_INDEX_CONFIGURATION = "{\"analysis\":{" +
"\"analyzer\": {" +
"\"default\": {\"type\":\"custom\",\"tokenizer\":\"letter\"," +
"\"filter\":[\"stopwords\",\"asciifolding\",\"lowercase\",\"snowball\",\"elision\",\"worddelimiter\"]," +
"\"char_filter\": [\"html_strip\"]}," +
"\"default_search\":{\"type\":\"custom\",\"tokenizer\":\"letter\"," +
"\"filter\":[\"stopwords\",\"asciifolding\",\"lowercase\",\"snowball\",\"elision\",\"worddelimiter\"]," +
"\"char_filter\": [\"html_strip\"]}}," +
"\"tokenizer\":{\"letter\":{\"type\":\"letter\"}}," +
"\"filter\":{" +
"\"snowball\":{\"type\":\"snowball\",\"language\":\"French\"}," +
"\"elision\":{\"type\":\"elision\",\"articles\":[\"l\",\"m\",\"t\",\"qu\",\"n\",\"s\",\"j\",\"d\",\"jusqu\",\"quoiqu\",\"lorsqu\",\"puisqu\"]}," +
"\"stopwords\":{\"type\":\"stop\",\"stopwords\":" + DEFAULT_FRENCH_STOP_WORDS + ",\"ignore_case\":true}," +
"\"worddelimiter\":{\"type\":\"word_delimiter\"}}}}";
protected final TransportClient client;
protected final String clusterName;
protected final List<ElasticsearchNode> nodes;
/**
* Create an ElasticSearch access
*
* @param clusterName the name of the Cluster
* @param nodes the elasticsearch nodes
* @throws VitamException
*/
public ElasticsearchAccess(final String clusterName, List<ElasticsearchNode> nodes) throws VitamException {
ParametersChecker.checkParameter("clusterName, elasticsearch nodes list are a mandatory parameters",
clusterName, nodes);
if (nodes.isEmpty()) {
throw new VitamException("elasticsearch nodes list is empty");
}
this.clusterName = clusterName;
this.nodes = nodes;
final Settings settings = getSettings();
client = getClient(settings);
}
/**
* Production settings, see Elasticsearch production settings
* https://www.elastic.co/guide/en/elasticsearch/guide/current/deploy.html.</br>
* </br>
* Additionnal on server side:</br>
* in sysctl "vm.swappiness = 1", "vm.max_map_count=262144"</br>
* in elasticsearch.yml "bootstrap.mlockall: true"
*
* @return Settings for Elasticsearch client
*/
private Settings getSettings() {
return Settings.settingsBuilder().put("cluster.name", clusterName)
.put("client.transport.sniff", true)
.put("client.transport.ping_timeout", "2s")
.put("transport.tcp.connect_timeout", "1s")
.put("transport.profiles.client.connect_timeout", "1s")
.put("transport.profiles.tcp.connect_timeout", "1s")
.put("watcher.http.default_read_timeout", VitamConfiguration.getReadTimeout() / TOSECOND + "s")
.build();
}
private TransportClient getClient(Settings settings) throws VitamException {
try {
final TransportClient clientNew = TransportClient.builder().settings(settings).build();
for (final ElasticsearchNode node : nodes) {
clientNew.addTransportAddress(
new InetSocketTransportAddress(InetAddress.getByName(node.getHostName()), node.getTcpPort()));
}
return clientNew;
} catch (final UnknownHostException e) {
LOGGER.error(e.getMessage(), e);
throw new VitamException(e.getMessage());
}
}
/**
* Close the ElasticSearch connection
*/
public void close() {
client.close();
}
/**
*
* @return the Cluster Name
*/
public String getClusterName() {
return clusterName;
}
/**
* @return the client
*/
public Client getClient() {
return client;
}
/**
* @return the nodes
*/
public List<ElasticsearchNode> getNodes() {
return nodes;
}
@Override
public boolean checkConnection() {
try (TransportClient clientCheck = getClient(getSettings())) {
return !clientCheck.connectedNodes().isEmpty();
} catch (final VitamException e) {
LOGGER.warn(e);
return false;
}
}
@Override
public String toString() {
return clusterName;
}
}