/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.reference.regexswap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.http.client.HttpClient;
import org.datacleaner.util.http.HttpXmlUtils;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
/**
* Client class for the RegexSwap, which allows for easy retrieval of shared
* regular expressions.
*
* @author Kasper Sørensen
*/
public final class RegexSwapClient {
public static final String CATEGORIES_URL = "https://datacleaner.org/ws/categories";
public static final String REGEXES_URL = "https://datacleaner.org/ws/regexes";
private final Map<String, Category> _categories = new HashMap<>();
private final Map<String, Regex> _regexes = new HashMap<>();
private final HttpClient _httpClient;
public RegexSwapClient(final HttpClient httpClient) {
_httpClient = httpClient;
}
public Category getCategoryByName(final String name) {
Category category = _categories.get(name);
if (category == null) {
refreshCategories();
category = _categories.get(name);
}
return category;
}
public Regex getRegexByName(final String name) {
Regex regex = _regexes.get(name);
if (regex == null) {
refreshRegexes();
regex = _regexes.get(name);
}
return regex;
}
public void refreshRegexes() {
final Element rootNode = HttpXmlUtils.getRootNode(_httpClient, REGEXES_URL);
final List<Node> regexNodes = HttpXmlUtils.getChildNodesByName(rootNode, "regex");
for (final Node node : regexNodes) {
createRegex((Element) node);
}
}
public Collection<Category> getCategories() {
if (_categories.isEmpty()) {
refreshCategories();
}
return _categories.values();
}
public void refreshCategories() {
final Element rootNode = HttpXmlUtils.getRootNode(_httpClient, CATEGORIES_URL);
final List<Node> categoryNodes = HttpXmlUtils.getChildNodesByName(rootNode, "category");
for (final Node categoryNode : categoryNodes) {
final String name = HttpXmlUtils.getChildNodeText(categoryNode, "name");
final String description = HttpXmlUtils.getChildNodeText(categoryNode, "description");
final String detailsUrl = HttpXmlUtils.getChildNodeText(categoryNode, "detailsUrl");
final Category category = new Category(name, description, detailsUrl);
_categories.put(name, category);
}
}
private Regex createRegex(final Element regexNode) {
final String name = HttpXmlUtils.getChildNodeText(regexNode, "name");
final String description = HttpXmlUtils.getChildNodeText(regexNode, "description");
final String expression = HttpXmlUtils.getChildNodeText(regexNode, "expression");
final String author = HttpXmlUtils.getChildNodeText(regexNode, "author");
final long timestamp = Long.parseLong(HttpXmlUtils.getChildNodeText(regexNode, "timestamp"));
final int positiveVotes = Integer.parseInt(HttpXmlUtils.getChildNodeText(regexNode, "positiveVotes"));
final int negativeVotes = Integer.parseInt(HttpXmlUtils.getChildNodeText(regexNode, "negativeVotes"));
final String detailsUrl = HttpXmlUtils.getChildNodeText(regexNode, "detailsUrl");
final List<Category> categories = new ArrayList<>();
final List<Node> categoriesNodes = HttpXmlUtils.getChildNodesByName(regexNode, "categories");
if (!categoriesNodes.isEmpty()) {
final Node categoriesNode = categoriesNodes.get(0);
final List<Node> categoryNodes = HttpXmlUtils.getChildNodesByName(categoriesNode, "category");
for (final Node categoryNode : categoryNodes) {
final String categoryName = HttpXmlUtils.getText(categoryNode);
final Category category = getCategoryByName(categoryName);
if (category != null) {
categories.add(category);
}
}
}
final Regex regex =
new Regex(name, description, expression, author, timestamp, positiveVotes, negativeVotes, detailsUrl,
categories);
_regexes.put(name, regex);
return regex;
}
public Regex refreshRegex(Regex regex) {
final String detailsUrl = regex.getDetailsUrl();
final Element regexNode = HttpXmlUtils.getRootNode(_httpClient, detailsUrl);
regex = createRegex(regexNode);
return regex;
}
public List<Regex> getRegexes(final Category category) {
final List<Regex> regexes = new ArrayList<>();
final Node rootNode = HttpXmlUtils.getRootNode(_httpClient, category.getDetailsUrl());
final List<Node> regexNodes = HttpXmlUtils.getChildNodesByName(rootNode, "regex");
for (final Node regexNode : regexNodes) {
final String name = HttpXmlUtils.getChildNodeText(regexNode, "name");
final String description = HttpXmlUtils.getChildNodeText(regexNode, "description");
final String expression = HttpXmlUtils.getChildNodeText(regexNode, "expression");
final String author = HttpXmlUtils.getChildNodeText(regexNode, "author");
final long timestamp = Long.parseLong(HttpXmlUtils.getChildNodeText(regexNode, "timestamp"));
final int positiveVotes = Integer.parseInt(HttpXmlUtils.getChildNodeText(regexNode, "positiveVotes"));
final int negativeVotes = Integer.parseInt(HttpXmlUtils.getChildNodeText(regexNode, "negativeVotes"));
final String detailsUrl = HttpXmlUtils.getChildNodeText(regexNode, "detailsUrl");
final List<Category> categories;
Regex regex = _regexes.get(name);
if (regex == null) {
categories = new ArrayList<>();
regex = new Regex(name, description, expression, author, timestamp, positiveVotes, negativeVotes,
detailsUrl, categories);
} else {
categories = regex.getCategories();
if (!categories.contains(category)) {
categories.add(category);
}
}
_regexes.put(name, regex);
regexes.add(regex);
}
return regexes;
}
}