/*
* WPCleaner: A tool to help on Wikipedia maintenance tasks.
* Copyright (C) 2014 Nicolas Vervelle
*
* See README.txt file for licensing information.
*/
package org.wikipediacleaner.api.data;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.filter.Filters;
import org.jdom2.input.SAXBuilder;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
import org.wikipediacleaner.i18n.GT;
/**
* Utility class to manage ISBN ranges.
*
* Values are extracted from RangeMessage.xml.
* This file can be generated at https://www.isbn-international.org/range_file_generation.
*/
public class ISBNRange {
/** Global flag for knowing when ranges are loaded */
private static boolean rangesLoaded = false;
/** Lock for loading ranges */
private static final Object rangesLock = new Object();
/** EAN prefixes */
private static List<Range> eanPrefixes = null;
/** Registration groups */
private static List<Range> registrationGroups = null;
/**
* Utility class initialization.
*/
public static void initialize() {
loadRanges();
}
/**
* @param isbn ISBN
* @return EAN prefix for the given ISBN.
*/
public static Range getEANPrefix(String isbn) {
return getRange(isbn, eanPrefixes);
}
/**
* @param isbn ISBN
* @return Registration group for the given ISBN.
*/
public static Range getRegistrationGroup(String isbn) {
return getRange(isbn, registrationGroups);
}
/**
* @param isbn ISBN
* @param ranges List of ranges.
* @return Range for the given ISBN.
*/
private static Range getRange(String isbn, List<Range> ranges) {
if ((isbn == null) || (ranges == null)) {
return null;
}
isbn = PageElementISBN.cleanISBN(isbn);
if (isbn.length() == 10) {
isbn = "978" + isbn;
}
for (Range range : ranges) {
String cleanPrefix = range.getCleanPrefix();
if ((cleanPrefix != null) && (isbn.startsWith(cleanPrefix))) {
return range;
}
}
return null;
}
/**
* Bean for holding information about an ISBN.
*/
public static class ISBNInformation {
List<String> texts;
boolean unknownRange;
boolean reservedRange;
ISBNInformation() {
this.texts = new ArrayList<String>();
}
public List<String> getTexts() {
return texts;
}
public boolean isInUnknownRange() {
return unknownRange;
}
public boolean isInReservedRange() {
return reservedRange;
}
}
/**
* @param isbn ISBN.
* @return Information about ISBN.
*/
public static ISBNInformation getInformation(String isbn) {
if (isbn == null) {
return null;
}
// Retrieve information about ISBN
isbn = PageElementISBN.cleanISBN(isbn);
boolean isbn10 = false;
if (isbn.length() == 10) {
isbn = "978" + isbn;
isbn10 = true;
}
Range eanPrefix = getEANPrefix(isbn);
Range registrationGroup = getRegistrationGroup(isbn);
// Build information
ISBNInformation isbnInfo = new ISBNInformation();
if ((eanPrefix != null) && !isbn10) {
isbnInfo.texts.add(eanPrefix.getPrefix() + " - " + eanPrefix.getAgency());
}
String prefix = null;
String cleanPrefix = null;
if (registrationGroup != null) {
prefix = registrationGroup.getPrefix();
cleanPrefix = registrationGroup.getCleanPrefix();
if (isbn10 && (prefix != null) && (prefix.length() > 4)) {
// Remove "978-"
prefix = prefix.substring(4);
}
if ((prefix != null) && (prefix.length() > 0)) {
isbnInfo.texts.add(prefix + "-" + registrationGroup.getAgency());
}
}
// Suggest a formatted ISBN
if ((registrationGroup != null) && (cleanPrefix != null)) {
String suffix = isbn.substring(cleanPrefix.length());
Rule rule = registrationGroup.getRule(suffix);
if (rule == null) {
isbnInfo.texts.add(GT._("No range found for ISBN"));
isbnInfo.unknownRange = true;
} else {
int nextLength = rule.getLength();
if (nextLength > 0) {
if (suffix.length() > nextLength + 1) {
String suggestedISBN = prefix + "-" +
suffix.substring(0, nextLength) + "-" +
suffix.substring(nextLength, suffix.length() - 1) + "-" +
suffix.substring(suffix.length() - 1);
isbnInfo.texts.add(GT._("Suggested format: {0}", suggestedISBN));
} else {
isbnInfo.texts.add(GT._("ISBN length incoherent with range found"));
}
} else {
isbnInfo.texts.add(GT._(
"ISBN is in a reserved range {0}",
prefix + "-(" + rule.getFrom() + "-" + rule.getTo() + ")"));
isbnInfo.reservedRange = true;
}
}
} else {
isbnInfo.texts.add(GT._("No range found for ISBN"));
isbnInfo.unknownRange = true;
}
return isbnInfo;
}
/**
* Load ISBN ranges
*/
private static void loadRanges() {
if (rangesLoaded == true) {
return;
}
synchronized (rangesLock) {
if (rangesLoaded == true) {
return;
}
InputStream is = ISBNRange.class.getClassLoader().getResourceAsStream(
"org/wikipediacleaner/api/data/RangeMessage.xml");
if (is != null) {
analyzeRangeMessage(is);
}
rangesLoaded = true;
}
}
/**
* Analyze RangeMessage.xml file.
*
* @param is Contents of RangeMessage.xml file.
*/
private static void analyzeRangeMessage(InputStream is) {
try {
SAXBuilder sxb = new SAXBuilder();
Document document = sxb.build(is);
Element root = document.getRootElement();
if (root == null) {
return;
}
analyzeEANPrefixes(root);
analyzeRegistrationGroups(root);
} catch (IOException e) {
// Nothing to do
} catch (JDOMException e) {
// Nothing to do
}
}
/**
* Analyze RangeMessage.xml file for EAN Prefixes.
*
* @param root Root of RangeMessage.xml file.
* @throws JDOMException
*/
private static void analyzeEANPrefixes(Element root) throws JDOMException {
eanPrefixes = new ArrayList<Range>();
analyzeRanges(root, eanPrefixes, "/ISBNRangeMessage/EAN.UCCPrefixes/EAN.UCC");
}
/**
* Analyze RangeMessage.xml file for Registration Groups.
*
* @param root Root of RangeMessage.xml file.
* @throws JDOMException
*/
private static void analyzeRegistrationGroups(Element root) throws JDOMException {
registrationGroups = new ArrayList<Range>();
analyzeRanges(root, registrationGroups, "/ISBNRangeMessage/RegistrationGroups/Group");
}
/**
* Analyze RangeMessage.xml file for Ranges.
*
* @param root Root of RangeMessage.xml file.
* @param ranges Current list of ranges.
* @param xpath XPath selector.
* @throws JDOMException
*/
private static void analyzeRanges(Element root, List<Range> ranges, String xpath) throws JDOMException {
XPathExpression<Element> xpa = XPathFactory.instance().compile(xpath, Filters.element());
List<Element> results = xpa.evaluate(root);
Iterator<Element> iter = results.iterator();
while (iter.hasNext()) {
Element node = iter.next();
Element prefixNode = node.getChild("Prefix");
String prefix = (prefixNode != null) ? prefixNode.getValue() : null;
Element agencyNode = node.getChild("Agency");
String agency = (agencyNode != null) ? agencyNode.getValue() : null;
Range range = new Range(prefix, agency);
analyzeRules(node, range);
ranges.add(range);
}
}
/**
* Analyze RangeMessage.xml file Rules.
*
* @param node Current node.
* @param rangeElement Range element.
* @throws JDOMException
*/
private static void analyzeRules(Element node, Range rangeElement) throws JDOMException {
XPathExpression<Element> xpa = XPathFactory.instance().compile(
"./Rules/Rule", Filters.element());
List<Element> results = xpa.evaluate(node);
Iterator<Element> iter = results.iterator();
while (iter.hasNext()) {
Element ruleNode = iter.next();
Element rangeNode = ruleNode.getChild("Range");
String range = (rangeNode != null) ? rangeNode.getValue() : null;
Element lengthNode = ruleNode.getChild("Length");
String length = (lengthNode != null) ? lengthNode.getValue() : null;
if ((range != null) && (length != null)) {
String[] rangeElements = range.split("\\-");
if ((rangeElements != null) && (rangeElements.length == 2)) {
Rule rule = new Rule(rangeElements[0], rangeElements[1], Integer.parseInt(length));
rangeElement.addRule(rule);
}
}
}
}
/**
* Bean for memorizing information about ranges.
*/
public static class Range {
/** ISBN prefix */
private final String prefix;
/** ISBN prefix */
private final String cleanPrefix;
/** Agency */
private final String agency;
/** Rules */
private final List<Rule> rules;
/**
* @param prefix ISBN prefix.
* @param agency Agency.
*/
Range(String prefix, String agency) {
this.prefix = prefix;
this.cleanPrefix = (prefix != null) ? prefix.replaceAll("\\-", "") : null;
this.agency = agency;
this.rules = new ArrayList<ISBNRange.Rule>();
}
/**
* @return ISBN prefix.
*/
public String getPrefix() {
return prefix;
}
/**
* @return ISBN clean prefix.
*/
public String getCleanPrefix() {
return cleanPrefix;
}
/**
* @return Agency.
*/
public String getAgency() {
return agency;
}
/**
* Add a rule.
*
* @param rule Rule.
*/
void addRule(Rule rule) {
rules.add(rule);
}
/**
* @param suffix Suffix.
* @return Rule for the next element according to the suffix.
*/
Rule getRule(String suffix) {
for (Rule rule : rules) {
if (suffix.compareTo(rule.getFrom()) >= 0) {
String to = rule.getTo();
if (suffix.length() > to.length()) {
suffix = suffix.substring(0, to.length());
}
if (suffix.compareTo(to) <= 0) {
return rule;
}
}
}
return null;
}
/**
* @return Description of the EAN Prefix.
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append(prefix);
buffer.append(" - ");
buffer.append(agency);
for (Rule rule : rules) {
buffer.append("\n ");
buffer.append(rule);
}
return buffer.toString();
}
}
/**
* Bean for memorizing information about rules.
*/
public static class Rule {
/** Beginning of the range */
private final String from;
/** End of the range */
private final String to;
/** Length of the next element */
private final int length;
Rule(String from, String to, int length) {
this.from = from;
this.to = to;
this.length = length;
}
/**
* @return Beginning of the range.
*/
public String getFrom() {
return from;
}
/**
* @return End of the range.
*/
public String getTo() {
return to;
}
/**
* @return Length of the next element.
*/
public int getLength() {
return length;
}
/**
* @return Description of the EAN Prefix.
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append(from);
buffer.append(" - ");
buffer.append(to);
buffer.append(" - ");
buffer.append(length);
return buffer.toString();
}
}
}