package mediawiki.tool; import java.net.MalformedURLException; import java.net.URL; import java.text.NumberFormat; import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Map; import mediawiki.MediaWikiConnection; import mediawiki.MediaWikiRequest; import mediawiki.WikidataQuery; import mediawiki.info.Article; import mediawiki.info.wikibase.Claim; import mediawiki.info.wikibase.MonolingualText; import mediawiki.info.wikibase.Property; import mediawiki.info.wikibase.Reference; import mediawiki.info.wikibase.ValueSnak; import mediawiki.info.wikibase.Statement; import mediawiki.info.wikibase.WikibaseDate; import mediawiki.info.wikibase.WikibaseQuantity; import mediawiki.info.wikibase.snaks.CommonsSnak; import mediawiki.info.wikibase.snaks.DateSnak; import mediawiki.info.wikibase.snaks.ItemSnak; import mediawiki.info.wikibase.snaks.MonolingualTextSnak; import mediawiki.info.wikibase.snaks.QuantitySnak; import mediawiki.info.wikibase.snaks.StringSnak; import mediawiki.info.wikibase.snaks.URLSnak; import mediawiki.request.GetTemplatesValuesRequest; import mediawiki.request.PageExistsRequest; import mediawiki.request.TemplateEmbeddedInRequest; import mediawiki.request.WikiBaseItemRequest; import mediawiki.request.wikibase.CreateClaimRequest; import mediawiki.request.wikibase.GetSpecificStatementRequest; import mediawiki.request.wikibase.SetReferenceRequest; public class Importer { private MediaWikiConnection wikidata; private MediaWikiConnection wikipedia; private Reference reference; public Importer(MediaWikiConnection wikidata, MediaWikiConnection wikipedia, Reference reference) { this.wikidata = wikidata; this.wikipedia = wikipedia; this.reference = reference; } public abstract class ImportHelper { abstract public void doImport(Article a, String base, Map<String, String> template) throws Exception; public Importer getImporter() { return Importer.this; } public MediaWikiConnection getWikidataConnection() { return getImporter().getWikidataConnection(); } public MediaWikiConnection getWikipediaConnection() { return getImporter().getWikipediaConnection(); } } public void each(MediaWikiRequest<List<Article>> r, String template, ImportHelper ih) throws Exception{ List<Article> articles = wikipedia.request(r); Collections.shuffle(articles); for(Article a : articles) { try{ String base = wikipedia.request(new WikiBaseItemRequest(a)); if(base == null) continue; List<Map<String,String>> t = wikipedia.request(new GetTemplatesValuesRequest(a, template)); if(t.size() != 1) continue; ih.doImport(a, base, t.get(0)); }catch(Exception e){ e.printStackTrace(); } } } public void each(String template, ImportHelper ih) throws Exception { each(template, ih, 0); } public void each(String template, ImportHelper ih, Integer...ns) throws Exception { each(new TemplateEmbeddedInRequest("Template:"+template, ns), template, ih); } public Statement importValue(String base, Property p, String value, ImportHandler<?> ih) throws Exception{ if(base == null) return null; if(value == null || value.trim().length() == 0) return null; if(wikidata.request(new GetSpecificStatementRequest(base, p)).size() > 0) return null; ValueSnak<?> sn = ih.handleImport(value); if(sn == null) return null; Statement s = wikidata.request(new CreateClaimRequest(base, new Claim(p,sn))); wikidata.request(new SetReferenceRequest(s, getReference())); return s; } public MediaWikiConnection getWikipediaConnection() { return wikipedia; } public void setWikipediaConnection(MediaWikiConnection wikipedia) { this.wikipedia = wikipedia; } public MediaWikiConnection getWikidataConnection() { return wikidata; } public void setWikidataConnection(MediaWikiConnection wikidata) { this.wikidata = wikidata; } public Reference getReference() { return reference; } public void setReference(Reference reference) { this.reference = reference; } public interface ImportHandler<T extends ValueSnak<?>> { public T handleImport(String value) throws Exception; } public class SimpleItemHandler implements ImportHandler<ItemSnak> { private WikidataQuery l = null; public SimpleItemHandler(WikidataQuery w) { l = w; } public SimpleItemHandler() { } @Override public ItemSnak handleImport(String value) throws Exception { int v = Integer.parseInt(value.substring(1)); if(l != null && ! l.getResult().contains(v)) return null; return new ItemSnak(v); } } public class ItemHandler extends SimpleItemHandler { public ItemHandler(WikidataQuery w ) { super(w); } public ItemHandler() { super(); } @Override public ItemSnak handleImport(String value) throws Exception { if(value.matches("^Q\\d+$")) return super.handleImport(value); if(value.matches("\\[\\[([^\\|\\{\\]]*)\\|?.*\\]\\]")) value = value.replaceAll("\\[\\[([^\\|\\{\\]]*)\\|?.*\\]\\]", "$1"); return super.handleImport(Importer.this.getWikipediaConnection().request(new WikiBaseItemRequest(value))); } } public class MediaHandler implements ImportHandler<CommonsSnak> { private MediaWikiConnection commons; public MediaHandler(MediaWikiConnection commons) { this.commons = commons; } @Override public CommonsSnak handleImport(String value) throws Exception { if(value.startsWith("[[")) value = value.replaceAll("^\\[\\[\\w+\\:([^\\|\\]\\[]+)[\\|\\]].*", "$1"); value = value.trim(); if(! commons.request(new PageExistsRequest("File:"+value))) return null; return new CommonsSnak(value); } } public class URLHandler implements ImportHandler<URLSnak> { @Override public URLSnak handleImport(String url) throws Exception { if(url.matches("^\\[.*")) url = url.replaceAll("\\[([^\\s]+)\\s.*\\]", "$1"); URL u = null; try{ u = new URL(url); }catch(MalformedURLException m){ url = "http://"+url; try{ u = new URL(url); }catch(MalformedURLException m2){ m2.printStackTrace(); } } if(u == null) return null; return new URLSnak(u); } } public class DateHandler implements ImportHandler<DateSnak> { @Override public DateSnak handleImport(String value) throws Exception { return new DateSnak(WikibaseDate.parseWikipediaDate(value)); } } public class RegexHandler implements ImportHandler<StringSnak> { private String regex; public RegexHandler(String regex) { this.regex = regex; } @Override public StringSnak handleImport(String value) throws Exception { if(! value.matches(regex)) return null; return new StringSnak(value); } } public class QuantityHandler implements ImportHandler<QuantitySnak> { private NumberFormat format; private String unit = "1"; public QuantityHandler(NumberFormat f) { format = f; } public QuantityHandler(Locale l) { this(NumberFormat.getInstance(l)); } public QuantityHandler(NumberFormat f, String unit) { format = f; this.unit = unit; } public QuantityHandler(Locale l, String unit) { this(NumberFormat.getInstance(l), unit); } @Override public QuantitySnak handleImport(String value) throws Exception { if(! value.matches("^[\\,\\.0-9\\+\\-]+$")) return null; return new QuantitySnak(new WikibaseQuantity(format.parse(value).doubleValue(), unit)); } } public class MonolingualTextHandler implements ImportHandler<MonolingualTextSnak> { private String language = null; public MonolingualTextHandler(String l) { language = l; } public MonolingualTextHandler() throws MalformedURLException { this(Importer.this.getWikipediaConnection().getProject().getLanguage()); } @Override public MonolingualTextSnak handleImport(String value) throws Exception { return new MonolingualTextSnak(new MonolingualText(value, language)); } } }