/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.ingest.useragent; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; final class UserAgentParser { private final UserAgentCache cache; private final List<UserAgentSubpattern> uaPatterns = new ArrayList<>(); private final List<UserAgentSubpattern> osPatterns = new ArrayList<>(); private final List<UserAgentSubpattern> devicePatterns = new ArrayList<>(); private final String name; UserAgentParser(String name, InputStream regexStream, UserAgentCache cache) { this.name = name; this.cache = cache; try { init(regexStream); } catch (IOException e) { throw new ElasticsearchParseException("error parsing regular expression file", e); } } private void init(InputStream regexStream) throws IOException { // EMPTY is safe here because we don't use namedObject XContentParser yamlParser = XContentFactory.xContent(XContentType.YAML).createParser(NamedXContentRegistry.EMPTY, regexStream); XContentParser.Token token = yamlParser.nextToken(); if (token == XContentParser.Token.START_OBJECT) { token = yamlParser.nextToken(); for (; token != null; token = yamlParser.nextToken()) { if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("user_agent_parsers")) { List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser); for (Map<String, String> map : parserConfigurations) { uaPatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")), map.get("family_replacement"), map.get("v1_replacement"), map.get("v2_replacement"), map.get("v3_replacement"), map.get("v4_replacement"))); } } else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("os_parsers")) { List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser); for (Map<String, String> map : parserConfigurations) { osPatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")), map.get("os_replacement"), map.get("os_v1_replacement"), map.get("os_v2_replacement"), map.get("os_v3_replacement"), map.get("os_v4_replacement"))); } } else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("device_parsers")) { List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser); for (Map<String, String> map : parserConfigurations) { devicePatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")), map.get("device_replacement"), null, null, null, null)); } } } } if (uaPatterns.isEmpty() && osPatterns.isEmpty() && devicePatterns.isEmpty()) { throw new ElasticsearchParseException("not a valid regular expression file"); } } private Pattern compilePattern(String regex, String regex_flag) { // Only flag present in the current default regexes.yaml if (regex_flag != null && regex_flag.equals("i")) { return Pattern.compile(regex, Pattern.CASE_INSENSITIVE); } else { return Pattern.compile(regex); } } private List<Map<String, String>> readParserConfigurations(XContentParser yamlParser) throws IOException { List <Map<String, String>> patternList = new ArrayList<>(); XContentParser.Token token = yamlParser.nextToken(); if (token != XContentParser.Token.START_ARRAY) { throw new ElasticsearchParseException("malformed regular expression file, should continue with 'array' after 'object'"); } token = yamlParser.nextToken(); if (token != XContentParser.Token.START_OBJECT) { throw new ElasticsearchParseException("malformed regular expression file, expecting 'object'"); } while (token == XContentParser.Token.START_OBJECT) { token = yamlParser.nextToken(); if (token != XContentParser.Token.FIELD_NAME) { throw new ElasticsearchParseException("malformed regular expression file, should continue with 'field_name' after 'array'"); } Map<String, String> regexMap = new HashMap<>(); for (; token == XContentParser.Token.FIELD_NAME; token = yamlParser.nextToken()) { String fieldName = yamlParser.currentName(); token = yamlParser.nextToken(); String fieldValue = yamlParser.text(); regexMap.put(fieldName, fieldValue); } patternList.add(regexMap); token = yamlParser.nextToken(); } return patternList; } List<UserAgentSubpattern> getUaPatterns() { return uaPatterns; } List<UserAgentSubpattern> getOsPatterns() { return osPatterns; } List<UserAgentSubpattern> getDevicePatterns() { return devicePatterns; } String getName() { return name; } public Details parse(String agentString) { Details details = cache.get(name, agentString);; if (details == null) { VersionedName userAgent = findMatch(uaPatterns, agentString); VersionedName operatingSystem = findMatch(osPatterns, agentString); VersionedName device = findMatch(devicePatterns, agentString); details = new Details(userAgent, operatingSystem, device); cache.put(name, agentString, details); } return details; } private VersionedName findMatch(List<UserAgentSubpattern> possiblePatterns, String agentString) { VersionedName name; for (UserAgentSubpattern pattern : possiblePatterns) { name = pattern.match(agentString); if (name != null) { return name; } } return null; } static final class Details { public final VersionedName userAgent; public final VersionedName operatingSystem; public final VersionedName device; Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device) { this.userAgent = userAgent; this.operatingSystem = operatingSystem; this.device = device; } } static final class VersionedName { public final String name; public final String major; public final String minor; public final String patch; public final String build; VersionedName(String name, String major, String minor, String patch, String build) { this.name = name; this.major = major; this.minor = minor; this.patch = patch; this.build = build; } } /** * One of: user agent, operating system, device */ static final class UserAgentSubpattern { private final Pattern pattern; private final String nameReplacement, v1Replacement, v2Replacement, v3Replacement, v4Replacement; UserAgentSubpattern(Pattern pattern, String nameReplacement, String v1Replacement, String v2Replacement, String v3Replacement, String v4Replacement) { this.pattern = pattern; this.nameReplacement = nameReplacement; this.v1Replacement = v1Replacement; this.v2Replacement = v2Replacement; this.v3Replacement = v3Replacement; this.v4Replacement = v4Replacement; } public VersionedName match(String agentString) { String name = null, major = null, minor = null, patch = null, build = null; Matcher matcher = pattern.matcher(agentString); if (!matcher.find()) { return null; } int groupCount = matcher.groupCount(); if (nameReplacement != null) { if (nameReplacement.contains("$1") && groupCount >= 1 && matcher.group(1) != null) { name = nameReplacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1))); } else { name = nameReplacement; } } else if (groupCount >= 1) { name = matcher.group(1); } if (v1Replacement != null) { major = v1Replacement; } else if (groupCount >= 2) { major = matcher.group(2); } if (v2Replacement != null) { minor = v2Replacement; } else if (groupCount >= 3) { minor = matcher.group(3); } if (v3Replacement != null) { patch = v3Replacement; } else if (groupCount >= 4) { patch = matcher.group(4); } if (v4Replacement != null) { build = v4Replacement; } else if (groupCount >= 5) { build = matcher.group(5); } return name == null ? null : new VersionedName(name, major, minor, patch, build); } } }