package mediawiki.task; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; import java.text.ParseException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import mediawiki.MediaWikiConnection; import mediawiki.MediaWikiException; import mediawiki.info.Article; import mediawiki.info.Language; import mediawiki.info.wikibase.Claim; import mediawiki.info.wikibase.Entity; import mediawiki.info.wikibase.Property; import mediawiki.info.wikibase.Statement; import mediawiki.info.wikibase.WikibaseDate; import mediawiki.info.wikibase.snaks.DateSnak; import mediawiki.info.wikibase.snaks.ItemSnak; import mediawiki.request.GetTemplatesValuesRequest; import mediawiki.request.TemplateEmbeddedInRequest; import mediawiki.request.WikiBaseItemRequest; import mediawiki.request.wikibase.GetEntityRequest; import mediawiki.request.wikibase.ParseValueRequest; import mediawiki.request.wikibase.SetStatementValueRequest; import mediawiki.task.config.pdata.PersondataTracker; /** * PersondataTask for enwiki */ public class PersondataTask extends WikipediaWikidataTask { private static final Language language = new Language("en"); private HashSet<PersondataTracker> trackers = new HashSet<>(); public PersondataTask(MediaWikiConnection wikidata, MediaWikiConnection wikipedia) throws FileNotFoundException { super(wikidata, wikipedia); } @Override public void run() { try{ TemplateEmbeddedInRequest t = new TemplateEmbeddedInRequest("Template:Persondata", 0); List<Article> l = getWikipediaConnection().request(t); for(Article a : l) { try{ Entity entity = getWikidataConnection().request(new GetEntityRequest(getWikipediaConnection().getProject(), a.getTitle())); if(entity == null){ // CREATE ENTITY } GetTemplatesValuesRequest gtvr = new GetTemplatesValuesRequest(a, "Persondata"); gtvr.setUppercaseMode(true); List<Map<String, String>> ms = getWikipediaConnection().request(gtvr); if(ms.size() != 1) continue; Map<String, String> template = ms.get(0); System.out.println(a.getTitle()); System.out.println(entity != null ? entity.getID() : ""); System.out.println(template); if(template.containsKey("NAME")) { template.put("ALTERNATIVE NAMES", (template.containsKey("ALTERNATIVE NAMES") && template.get("ALTERNATIVE NAMES").trim().length() > 0 ? template.get("ALTERNATIVE NAMES")+";" : "")+template.get("NAME")); template.remove("NAME"); } for(Entry<String, String> info : template.entrySet()) { String value = info.getValue(); if(info.getKey().equalsIgnoreCase("SHORT DESCRIPTION") || info.getKey().equalsIgnoreCase("NAME") || info.getKey().equalsIgnoreCase("DATE OF BIRTH") || info.getKey().equalsIgnoreCase("DATE OF DEATH")) { value = value.replaceAll("[\\{\\}\\[\\]\\|]*", ""); } value = value.trim(); if(value.length() == 0) continue; switch(info.getKey().toUpperCase()){ /* case "NAME" : value = value.contains(",") ? value.replaceAll("^([^\\,]+)\\ ?\\,\\ ?([^\\,]+)$", "$2 $1") : value; value = value.replaceAll("\\ {1,}", " "); value = value.trim(); if(entity == null || ! entity.hasLabel(language)) { trackLabel(a, entity, value, false); }else if(! entity.getLabel(language).equalsIgnoreCase(value)) { trackLabel(a, entity, value, true); } break; */ case "ALTERNATIVE NAMES" : String[] values = value.contains(";") ? value.split("\\;") : new String[]{value}; for(String vs : values){ vs = vs.contains(",") ? vs.replaceAll("^([^\\,]+)\\ ?\\,\\ ?([^\\,]+)$", "$2 $1") : vs; vs = vs.replaceAll("\\ {1,}", " "); vs = vs.trim(); if(entity != null && entity.getLabel(language) != null && entity.getLabel(language).equalsIgnoreCase(vs)) continue; if(entity == null || ! entity.hasAlias(language, vs)) { trackAlias(a, entity, vs); } } break; case "SHORT DESCRIPTION" : if(entity == null || ! entity.hasDescription(language)) { trackDescription(a, entity, value, false); }else if(! entity.getDescription(language).equalsIgnoreCase(value)) { trackDescription(a, entity, value, true); } break; case "PLACE OF BIRTH" : case "PLACE OF DEATH" : { Property p = new Property(info.getKey().equals("PLACE OF BIRTH") ? 19 : 20); String fallback = null; if(value.contains("[[")) value = value.replaceAll("^\\[\\[([^\\[\\]\\|]+)\\|?[^\\[\\]\\|]*\\]\\].*$", "$1"); if(value.contains(",")) fallback = value.substring(0, value.indexOf(',')).trim(); String base = getWikipediaConnection().request(new WikiBaseItemRequest(value)); if(base == null){ if(fallback != null){ base = getWikipediaConnection().request(new WikiBaseItemRequest(fallback)); } if(base == null){ trackUnparsable(a, entity, p, value); continue; } } ItemSnak snak = new ItemSnak(Integer.parseInt(base.substring(1))); List<Statement> statements = (entity == null ? new ArrayList<Statement>() : entity.getStatements(p)); if(statements.size() == 0) { trackPlace(a, entity, p, snak, false); System.out.println("add "+p+" statement"); }else{ boolean wdhasvalue = false; for(Statement statement : statements) { if(statement.getClaim().getSnak().equals(snak)) { wdhasvalue = true; System.out.println("has "+p+" statement"); break; } } if(! wdhasvalue) { trackPlace(a, entity, p, snak, true); System.out.println("different "+p+" statement"); } } } break; case "DATE OF BIRTH" : case "DATE OF DEATH" : { Property p = new Property(info.getKey().equals("DATE OF BIRTH") ? 569 : 570); DateSnak snak = null; try{ snak = (DateSnak) getWikidataConnection().request(new ParseValueRequest(value, "time")); }catch(ParseException e) { trackUnparsable(a, entity, p, value); break; } List<Statement> statements = (entity == null ? new ArrayList<Statement>() : entity.getStatements(p)); if(statements.size() == 0) { trackDate(a, entity, p, snak, false); System.out.println("add "+p+" statement"); }else{ boolean wdhasvalue = false; for(Statement statement : statements) { if(statement.getClaim().getSnak().equals(snak)) { wdhasvalue = true; System.out.println("has "+p+" statement"); break; }else if( ((WikibaseDate)statement.getClaim().getSnak().getValue()).getCalendarmodel().equals(snak.getValue().getCalendarmodel()) == false && ((WikibaseDate)statement.getClaim().getSnak().getValue()).getDate().equals(snak.getValue().getDate()) && statement.hasReference(new Claim(new Property(143), new ItemSnak(328)))) { getWikidataConnection().requestConcurrently(new SetStatementValueRequest(statement, snak, "reimport of [[en:Wikipedia:Persondata|Persondata]], fixing calendar model")); wdhasvalue = true; System.out.println("has "+p+" statement (bypass of wrong calendarmodel)"); break; } } if(! wdhasvalue) { trackDate(a, entity, p, snak, true); System.out.println("different "+p+" statement"); } } break; } default: throw new MediaWikiException("unknown field "+info.getKey()); } } try{ System.out.println("Wikipedia is going to be edited"); }catch(Exception e){ e.printStackTrace(); } System.out.println("\n\n"); }catch(Exception e2) { e2.printStackTrace(); } } }catch(Exception e){ e.printStackTrace(); } } private void trackAlias(Article article, Entity item, String alias) throws Exception { for(PersondataTracker t : trackers) { t.trackAlias(article, item, language, alias); } } private void trackDescription(Article article, Entity item, String description, boolean conflict) throws Exception { for(PersondataTracker t : trackers) { t.trackDescription(article, item, language, description, conflict); } } private void trackPlace(Article article, Entity item, Property property, ItemSnak snak, boolean conflict) throws Exception { for(PersondataTracker t : trackers) { t.trackPlace(article, item, property, snak, conflict); } } private void trackDate(Article article, Entity item, Property property, DateSnak snak, boolean conflict) throws Exception { for(PersondataTracker t : trackers) { t.trackDate(article, item, property, snak, conflict); } } private void trackUnparsable(Article article, Entity item, Property property, String value) throws Exception { for(PersondataTracker t : trackers) { t.trackUnparsable(article, item, property, value); } } public void addPersondataTracker(PersondataTracker pt) { trackers.add(pt); } }