/*
* Copyright (C) 2014 Jörg Prante
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses
* or write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*
* The interactive user interfaces in modified source and object code
* versions of this program must display Appropriate Legal Notices,
* as required under Section 5 of the GNU Affero General Public License.
*
*/
package org.xbib.elasticsearch.index.analysis.standardnumber;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.collect.Sets;
import org.elasticsearch.common.component.AbstractLifecycleComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.xbib.standardnumber.ARK;
import org.xbib.standardnumber.DOI;
import org.xbib.standardnumber.EAN;
import org.xbib.standardnumber.GTIN;
import org.xbib.standardnumber.IBAN;
import org.xbib.standardnumber.ISAN;
import org.xbib.standardnumber.ISBN;
import org.xbib.standardnumber.ISMN;
import org.xbib.standardnumber.ISNI;
import org.xbib.standardnumber.ISSN;
import org.xbib.standardnumber.ISTC;
import org.xbib.standardnumber.ISWC;
import org.xbib.standardnumber.ORCID;
import org.xbib.standardnumber.PPN;
import org.xbib.standardnumber.StandardNumber;
import org.xbib.standardnumber.UPC;
import org.xbib.standardnumber.ZDB;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import static org.elasticsearch.common.collect.Lists.newLinkedList;
public class StandardNumberService extends AbstractLifecycleComponent<StandardNumberService> {
private final static ThreadLocal<Set<StandardNumber>> stdnums = new ThreadLocal<Set<StandardNumber>>();
@Inject
public StandardNumberService(Settings settings) {
super(settings);
}
@Override
protected void doStart() throws ElasticsearchException {
}
@Override
protected void doStop() throws ElasticsearchException {
}
@Override
protected void doClose() throws ElasticsearchException {
}
protected Collection<StandardNumber> getStdNums() {
if (stdnums.get() == null) {
String[] s = settings.getAsArray("number_types", null);
Set<String> types = s != null ? Sets.newTreeSet(Arrays.asList(s)) : null;
Set<StandardNumber> set = Sets.newLinkedHashSet();
set.addAll(types == null ? create() : create(types));
stdnums.set(set);
}
return stdnums.get();
}
public Collection<StandardNumber> detect(CharSequence content) {
Collection<StandardNumber> candidates = newLinkedList();
for (StandardNumber stdnum : getStdNums()) {
stdnum.reset();
try {
candidates.add(stdnum.set(content).normalize().verify());
} catch (NumberFormatException e) {
// skip
}
}
return candidates;
}
public Collection<CharSequence> lookup(CharSequence content) {
Collection<CharSequence> variants = newLinkedList();
for (StandardNumber stdnum : getStdNums()) {
stdnum.reset();
if (stdnum instanceof ISBN) {
handleISBN((ISBN) stdnum, content, variants);
} else {
stdnum = stdnum.set(content).normalize();
if (stdnum.isValid()) {
for (String s : stdnum.getTypedVariants()) {
if (s != null) {
variants.add(s);
}
}
}
}
}
return variants;
}
public static StandardNumber create(String type) {
switch (type.toLowerCase()) {
case "ark" : return new ARK();
case "doi" : return new DOI();
case "ean" : return new EAN();
case "gtin": return new GTIN();
case "iban": return new IBAN();
case "isan": return new ISAN();
case "isbn": return new ISBN();
case "ismn": return new ISMN();
case "isni": return new ISNI();
case "issn": return new ISSN();
case "istc": return new ISTC();
case "iswc": return new ISWC();
case "orcid": return new ORCID();
case "ppn": return new PPN();
case "upc": return new UPC();
case "zdb": return new ZDB();
}
return null;
}
public static Collection<StandardNumber> create(Collection<String> types) {
List<StandardNumber> stdnums = newLinkedList();
for (String type : types) {
stdnums.add(create(type));
}
return stdnums;
}
// do not contains ISTC and SICI by default, too broad character pattern filter mangles up everything.
public static Collection<StandardNumber> create() {
StandardNumber[] array = new StandardNumber[] {
new ARK(),
new DOI(),
new EAN(),
new GTIN(),
new IBAN(),
new ISAN(),
new ISBN(),
new ISMN(),
new ISNI(),
new ISSN(),
new ISWC(),
new ORCID(),
new PPN(),
new UPC(),
new ZDB()
};
return Arrays.asList(array);
}
private void handleISBN(ISBN stdnum, CharSequence content, Collection<CharSequence> variants) throws NumberFormatException {
ISBN isbn = stdnum.set(content).normalize();
if (isbn.isValid()) {
if (!isbn.isEAN()) {
// create variants: ISBN, ISBN normalized, ISBN-13, ISBN-13 normalized
variants.add(isbn.ean(false).format());
variants.add(isbn.ean(false).normalizedValue());
isbn = isbn.ean(true).set(content).normalize();
if (isbn.isValid()) {
variants.add(isbn.format());
variants.add(isbn.normalizedValue());
}
} else {
// 2 variants, do not create ISBN-10 for an ISBN-13
variants.add(isbn.ean(true).format());
variants.add(isbn.ean(true).normalizedValue());
}
}
}
}