/* * Copyright 2012-2017 CodeLibs Project and the Others. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.codelibs.fess.es.config.exentity; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.apache.http.auth.AuthScheme; import org.apache.http.auth.AuthScope; import org.apache.http.auth.Credentials; import org.apache.http.auth.NTCredentials; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.impl.auth.BasicScheme; import org.apache.http.impl.auth.DigestScheme; import org.apache.http.impl.auth.NTLMScheme; import org.codelibs.core.lang.StringUtil; import org.codelibs.fess.Constants; import org.codelibs.fess.crawler.client.CrawlerClientFactory; import org.codelibs.fess.crawler.client.ftp.FtpAuthentication; import org.codelibs.fess.crawler.client.ftp.FtpClient; import org.codelibs.fess.crawler.client.http.Authentication; import org.codelibs.fess.crawler.client.http.HcHttpClient; import org.codelibs.fess.crawler.client.http.impl.AuthenticationImpl; import org.codelibs.fess.crawler.client.http.ntlm.JcifsEngine; import org.codelibs.fess.crawler.client.smb.SmbAuthentication; import org.codelibs.fess.crawler.client.smb.SmbClient; import org.codelibs.fess.es.config.bsentity.BsDataConfig; import org.codelibs.fess.es.config.exbhv.DataConfigToLabelBhv; import org.codelibs.fess.es.config.exbhv.LabelTypeBhv; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ParameterUtil; import org.dbflute.cbean.result.ListResultBean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @author FreeGen */ public class DataConfig extends BsDataConfig implements CrawlingConfig { private static final long serialVersionUID = 1L; private static final Logger logger = LoggerFactory.getLogger(DataConfig.class); private static final String CRAWLER_WEB_HEADER_PREFIX = "crawler.web.header."; private static final String CRAWLER_WEB_AUTH = "crawler.web.auth"; private static final String CRAWLER_USERAGENT = "crawler.useragent"; private static final String CRAWLER_PARAM_PREFIX = "crawler.param."; private static final Object CRAWLER_FILE_AUTH = "crawler.file.auth"; private String[] labelTypeIds; protected Pattern[] includedDocPathPatterns; protected Pattern[] excludedDocPathPatterns; private Map<String, String> handlerParameterMap; private Map<String, String> handlerScriptMap; private volatile List<LabelType> labelTypeList; public DataConfig() { super(); setBoost(1.0f); } public String[] getLabelTypeIds() { if (labelTypeIds == null) { return StringUtil.EMPTY_STRINGS; } return labelTypeIds; } public void setLabelTypeIds(final String[] labelTypeIds) { this.labelTypeIds = labelTypeIds; } public List<LabelType> getLabelTypeList() { if (labelTypeList == null) { synchronized (this) { if (labelTypeList == null) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); final DataConfigToLabelBhv dataConfigToLabelBhv = ComponentUtil.getComponent(DataConfigToLabelBhv.class); final ListResultBean<DataConfigToLabel> mappingList = dataConfigToLabelBhv.selectList(cb -> { cb.query().setDataConfigId_Equal(getId()); cb.specify().columnLabelTypeId(); cb.paging(fessConfig.getPageLabeltypeMaxFetchSizeAsInteger().intValue(), 1); }); final List<String> labelIdList = new ArrayList<>(); for (final DataConfigToLabel mapping : mappingList) { labelIdList.add(mapping.getLabelTypeId()); } final LabelTypeBhv labelTypeBhv = ComponentUtil.getComponent(LabelTypeBhv.class); labelTypeList = labelIdList.isEmpty() ? Collections.emptyList() : labelTypeBhv.selectList(cb -> { cb.query().setId_InScope(labelIdList); cb.query().addOrderBy_SortOrder_Asc(); cb.fetchFirst(fessConfig.getPageLabeltypeMaxFetchSizeAsInteger()); }); } } } return labelTypeList; } @Override public String[] getLabelTypeValues() { final List<LabelType> list = getLabelTypeList(); final List<String> labelValueList = new ArrayList<>(list.size()); for (final LabelType labelType : list) { labelValueList.add(labelType.getValue()); } return labelValueList.toArray(new String[labelValueList.size()]); } @Override public String getDocumentBoost() { return Float.valueOf(getBoost().floatValue()).toString(); } public String getBoostValue() { if (boost != null) { return boost.toString(); } return null; } public void setBoostValue(final String value) { if (value != null) { try { boost = Float.parseFloat(value); } catch (final Exception e) {} } } @Override public String getIndexingTarget(final String input) { // always return true return Constants.TRUE; } @Override public String getConfigId() { return ConfigType.DATA.getConfigId(getId()); } public Map<String, String> getHandlerParameterMap() { if (handlerParameterMap == null) { handlerParameterMap = ParameterUtil.parse(getHandlerParameter()); } return handlerParameterMap; } public Map<String, String> getHandlerScriptMap() { if (handlerScriptMap == null) { handlerScriptMap = ParameterUtil.parse(getHandlerScript()); } return handlerScriptMap; } @Override public Map<String, Object> initializeClientFactory(final CrawlerClientFactory crawlerClientFactory) { final Map<String, String> paramMap = getHandlerParameterMap(); final Map<String, Object> factoryParamMap = new HashMap<>(); crawlerClientFactory.setInitParameterMap(factoryParamMap); // parameters for (final Map.Entry<String, String> entry : paramMap.entrySet()) { final String key = entry.getKey(); if (key.startsWith(CRAWLER_PARAM_PREFIX)) { factoryParamMap.put(key.substring(CRAWLER_PARAM_PREFIX.length()), entry.getValue()); } } // user agent final String userAgent = paramMap.get(CRAWLER_USERAGENT); if (StringUtil.isNotBlank(userAgent)) { factoryParamMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent); } // web auth final String webAuthStr = paramMap.get(CRAWLER_WEB_AUTH); if (StringUtil.isNotBlank(webAuthStr)) { final String[] webAuthNames = webAuthStr.split(","); final List<Authentication> basicAuthList = new ArrayList<>(); for (final String webAuthName : webAuthNames) { final String scheme = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".scheme"); final String hostname = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".host"); final String port = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".port"); final String realm = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".realm"); final String username = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".username"); final String password = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".password"); if (StringUtil.isEmpty(username)) { logger.warn("username is empty. webAuth:" + webAuthName); continue; } AuthScheme authScheme = null; if (Constants.BASIC.equals(scheme)) { authScheme = new BasicScheme(); } else if (Constants.DIGEST.equals(scheme)) { authScheme = new DigestScheme(); } else if (Constants.NTLM.equals(scheme)) { authScheme = new NTLMScheme(new JcifsEngine()); } // TODO FORM AuthScope authScope; if (StringUtil.isBlank(hostname)) { authScope = AuthScope.ANY; } else { int p = AuthScope.ANY_PORT; if (StringUtil.isNotBlank(port)) { try { p = Integer.parseInt(port); } catch (final NumberFormatException e) { logger.warn("Failed to parse " + port, e); } } String r = realm; if (StringUtil.isBlank(realm)) { r = AuthScope.ANY_REALM; } String s = scheme; if (StringUtil.isBlank(scheme) || Constants.NTLM.equals(scheme)) { s = AuthScope.ANY_SCHEME; } authScope = new AuthScope(hostname, p, r, s); } Credentials credentials; if (Constants.NTLM.equals(scheme)) { final String workstation = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".workstation"); final String domain = paramMap.get(CRAWLER_WEB_AUTH + "." + webAuthName + ".domain"); credentials = new NTCredentials(username, password == null ? StringUtil.EMPTY : password, workstation == null ? StringUtil.EMPTY : workstation, domain == null ? StringUtil.EMPTY : domain); } else { credentials = new UsernamePasswordCredentials(username, password == null ? StringUtil.EMPTY : password); } basicAuthList.add(new AuthenticationImpl(authScope, credentials, authScheme)); } factoryParamMap.put(HcHttpClient.BASIC_AUTHENTICATIONS_PROPERTY, basicAuthList.toArray(new Authentication[basicAuthList.size()])); } // request header final List<org.codelibs.fess.crawler.client.http.RequestHeader> rhList = new ArrayList<>(); int count = 1; String headerName = paramMap.get(CRAWLER_WEB_HEADER_PREFIX + count + ".name"); while (StringUtil.isNotBlank(headerName)) { final String headerValue = paramMap.get(CRAWLER_WEB_HEADER_PREFIX + count + ".value"); rhList.add(new org.codelibs.fess.crawler.client.http.RequestHeader(headerName, headerValue)); count++; headerName = paramMap.get(CRAWLER_WEB_HEADER_PREFIX + count + ".name"); } if (!rhList.isEmpty()) { factoryParamMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, rhList.toArray(new org.codelibs.fess.crawler.client.http.RequestHeader[rhList.size()])); } // file auth final String fileAuthStr = paramMap.get(CRAWLER_FILE_AUTH); if (StringUtil.isNotBlank(fileAuthStr)) { final String[] fileAuthNames = fileAuthStr.split(","); final List<SmbAuthentication> smbAuthList = new ArrayList<>(); final List<FtpAuthentication> ftpAuthList = new ArrayList<>(); for (final String fileAuthName : fileAuthNames) { final String scheme = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".scheme"); if (Constants.SAMBA.equals(scheme)) { final String domain = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".domain"); final String hostname = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".host"); final String port = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".port"); final String username = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".username"); final String password = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".password"); if (StringUtil.isEmpty(username)) { logger.warn("username is empty. fileAuth:" + fileAuthName); continue; } final SmbAuthentication smbAuth = new SmbAuthentication(); smbAuth.setDomain(domain == null ? StringUtil.EMPTY : domain); smbAuth.setServer(hostname); if (StringUtil.isNotBlank(port)) { try { smbAuth.setPort(Integer.parseInt(port)); } catch (final NumberFormatException e) { logger.warn("Failed to parse " + port, e); } } smbAuth.setUsername(username); smbAuth.setPassword(password == null ? StringUtil.EMPTY : password); smbAuthList.add(smbAuth); } else if (Constants.FTP.equals(scheme)) { final String hostname = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".host"); final String port = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".port"); final String username = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".username"); final String password = paramMap.get(CRAWLER_FILE_AUTH + "." + fileAuthName + ".password"); if (StringUtil.isEmpty(username)) { logger.warn("username is empty. fileAuth:" + fileAuthName); continue; } final FtpAuthentication ftpAuth = new FtpAuthentication(); ftpAuth.setServer(hostname); if (StringUtil.isNotBlank(port)) { try { ftpAuth.setPort(Integer.parseInt(port)); } catch (final NumberFormatException e) { logger.warn("Failed to parse " + port, e); } } ftpAuth.setUsername(username); ftpAuth.setPassword(password == null ? StringUtil.EMPTY : password); ftpAuthList.add(ftpAuth); } } if (!smbAuthList.isEmpty()) { factoryParamMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY, smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()])); } if (!ftpAuthList.isEmpty()) { factoryParamMap.put(FtpClient.FTP_AUTHENTICATIONS_PROPERTY, ftpAuthList.toArray(new FtpAuthentication[ftpAuthList.size()])); } } return factoryParamMap; } @Override public Map<String, String> getConfigParameterMap(final ConfigName name) { return Collections.emptyMap(); } @Override public String getId() { return asDocMeta().id(); } public void setId(final String id) { asDocMeta().id(id); } public Long getVersionNo() { return asDocMeta().version(); } public void setVersionNo(final Long version) { asDocMeta().version(version); } @Override public Integer getTimeToLive() { final String value = getHandlerParameterMap().get("timeToLive"); if (StringUtil.isBlank(value)) { return null; } try { return Integer.parseInt(value); } catch (final NumberFormatException e) { if (logger.isDebugEnabled()) { logger.debug("Invalid format: " + value, e); } } return null; } @Override public String toString() { return "DataConfig [available=" + available + ", boost=" + boost + ", createdBy=" + createdBy + ", createdTime=" + createdTime + ", handlerName=" + handlerName + ", handlerParameter=" + handlerParameter + ", handlerScript=" + handlerScript + ", name=" + name + ", permissions=" + Arrays.toString(permissions) + ", sortOrder=" + sortOrder + ", updatedBy=" + updatedBy + ", updatedTime=" + updatedTime + "]"; } }