package mediawiki.task;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import org.json.JSONException;
import javat.xml.Element;
import datasets.in.GND;
import datasets.in.MARC;
import mediawiki.WikidataQuery;
import mediawiki.MediaWikiConnection;
import mediawiki.MediaWikiTask;
import mediawiki.MediaWikiUtil;
import mediawiki.info.wikibase.Claim;
import mediawiki.info.wikibase.Property;
import mediawiki.info.wikibase.Qualifier;
import mediawiki.info.wikibase.Reference;
import mediawiki.info.wikibase.Statement;
import mediawiki.info.wikibase.WikibaseDate;
import mediawiki.info.wikibase.snaks.DateSnak;
import mediawiki.info.wikibase.snaks.ItemSnak;
import mediawiki.request.wikibase.AddQualifierRequest;
import mediawiki.request.wikibase.CreateClaimRequest;
import mediawiki.request.wikibase.GetLabelRequest;
import mediawiki.request.wikibase.GetSpecificStatementRequest;
import mediawiki.request.wikibase.HasClaimRequest;
import mediawiki.request.wikibase.SetLabelRequest;
import mediawiki.request.wikibase.SetReferenceRequest;
public class GNDSetInformationTask extends MediaWikiTask {
public GNDSetInformationTask(MediaWikiConnection con) {
super(con);
}
@Override
public void run() {
try{
MediaWikiConnection wikidata = getConnection();
SimpleDateFormat log = new SimpleDateFormat("HH:mm:ss");
WikidataQuery wdq = new WikidataQuery("CLAIM[31:5] AND CLAIM[227]");
List<Integer> l = wdq.request();
Collections.shuffle(l);
System.out.println(l.size()+" Einträge geladen");
for(Integer b : l){
if(isStopped())
return;
String wikibase = "Q"+b;
try{
if(getConnection().request(new GetSpecificStatementRequest(wikibase, new Claim(31, new ItemSnak(5)))).size() == 0){
System.err.println(wikibase+" isn't a human!");
increaseDone();
continue;
}
String gnd = (String) getConnection().request(new GetSpecificStatementRequest(wikibase, new Property(227))).get(0).getClaim().getSnak().getValue();
Reference ref = new Reference();
ref.addClaim(new Claim(248, 36578));
ref.addClaim(new Claim(new Property(813), new DateSnak(new WikibaseDate(WikibaseDate.ONE_DAY))));
// ref.addClaim(new Claim(new Property(227), new StringSnak(gnd)));
Element e = null;
try{
e = GND.getGNDEntry(gnd);
}catch(Exception e2){
continue;
}
System.out.println(Thread.currentThread().getName()+"\t["+log.format(new Date())+"]\t"+wikibase+"\t"+gnd);
MARC marc = null;
if(e.getChildren("dateOfBirth").size() > 0){
marc = GND.getMARCEntry(gnd);
Statement s = MediaWikiUtil.addTrustedStatement(wikidata, wikibase, new Claim(new Property(569), new DateSnak(GND.parseWikibaseDate(e.getChildren("dateOfBirth").get(0).getText()))), ref);
if(isGeborenCa(marc) && s != null){
getConnection().request(new AddQualifierRequest(s, new Qualifier(new Property(1480),new ItemSnak(5727902))));
System.out.println(Thread.currentThread().getName()+"\t["+log.format(new Date())+"]\tCirca-Qualifier created");
}
}
if(e.getChildren("dateOfDeath").size() > 0){
marc = (marc != null ? marc : GND.getMARCEntry(gnd));
Statement s = MediaWikiUtil.addTrustedStatement(wikidata, wikibase, new Claim(new Property(570), new DateSnak(GND.parseWikibaseDate(e.getChildren("dateOfDeath").get(0).getText()))), ref);
if(isGestorbenCa(marc) && s != null) {
getConnection().request(new AddQualifierRequest(s, new Qualifier(new Property(1480),new ItemSnak(5727902))));
System.out.println(Thread.currentThread().getName()+"\t["+log.format(new Date())+"]\tCirca-Qualifier created");
}
}
if(e.getChildren("gender").size() > 0){
ItemSnak i = null;
switch(e.getChildren("gender").get(0).getAttribute("resource").getValue()){
case "http://d-nb.info/standards/vocab/gnd/Gender#male" :
i = new ItemSnak(6581097);
break;
case "http://d-nb.info/standards/vocab/gnd/Gender#female" :
i = new ItemSnak(6581072);
break;
}
if(i != null){
Claim claim = new Claim(new Property(21),i);
MediaWikiUtil.addTrustedStatement(wikidata, wikibase, claim, ref);
}
}
/* VIAF disabled!
* if(e.getChildren("sameAs").size() > 0){
for(Element viaf : e.getChildren("sameAs")){
if(viaf.getAttribute("resource").getValue().startsWith("http://viaf.org/viaf/")){
addClaim(wikidata, wikibase, new Claim(new Property(214),new StringSnak(viaf.getAttribute("resource").getValue().substring("http://viaf.org/viaf/".length()))), ref);
}
}
} */
if(e.getChildren("preferredNameEntityForThePerson").size() > 0 && e.getChildren("preferredNameEntityForThePerson").get(0).getChildren("forename").size() == 1 && e.getChildren("preferredNameEntityForThePerson").get(0).getChildren("surname").size() == 1){
String name = e.getChildren("preferredNameEntityForThePerson").get(0).getChildren("forename").get(0).getText()+" ";
if(e.getChildren("preferredNameEntityForThePerson").get(0).getChildren("prefix").size() > 0){
name += e.getChildren("preferredNameEntityForThePerson").get(0).getChildren("prefix").get(0).getText()+" ";
}
name += e.getChildren("preferredNameEntityForThePerson").get(0).getChildren("surname").get(0).getText();
name = name.replaceAll("\\- ", "-");
name = name.replaceAll(" ", " ");
if(name.indexOf(".") == -1){
for(String lang : new String[]{"de","en","fr","es","nl"}){
if(wikidata.request(new GetLabelRequest(lang, wikibase)) == null){
wikidata.request(new SetLabelRequest(wikibase, lang, name));
System.out.println("Label für "+lang+" erstellt: "+name);
}
}
}
}
try{
handleComplexImport(wikidata, wikibase, e, "placeOfBirth", 19, ref, "place", "CLAIM[131]");
}catch(Exception exception){exception.printStackTrace(); continue;}
try{
handleComplexImport(wikidata, wikibase, e, "placeOfDeath", 20, ref, "place", "CLAIM[131]");
}catch(Exception exception){exception.printStackTrace(); continue;}
try{
handleComplexImport(wikidata, wikibase, e, "placeOfActivity", 937, ref, "place", "CLAIM[131]");
}catch(Exception exception){exception.printStackTrace(); continue;}
try{
handleComplexImport(wikidata, wikibase, e, "professionOrOccupation", 106, ref, "occupation", "claim[31:(TREE[13516667][][279])]");
}catch(Exception exception){exception.printStackTrace(); continue;}
if(e.getChildren("academicDegree").size() > 0){
switch(e.getChildren("academicDegree").get(0).getText()){
case "Doktor":
case "Dr." :
MediaWikiUtil.addTrustedStatement(wikidata, wikibase, new Claim(512,849697), ref);
break;
case "Dr. iur.":
case "Dr. jur.":
MediaWikiUtil.addTrustedStatement(wikidata, wikibase, new Claim(512,959320), ref);
break;
case "Dr. med":
case "Dr. med.":
MediaWikiUtil.addTrustedStatement(wikidata, wikibase, new Claim(512,913404), ref);
break;
case "Prof.":
case "Prof":
case "Professor":
case "Professorin":
MediaWikiUtil.addTrustedStatement(wikidata, wikibase, new Claim(106,121594), ref);
break;
case "Prof. Dr.":
case "Prof., Dr.":
case "Dr., Professor":
case "Prof.Dr.":
MediaWikiUtil.addTrustedStatement(wikidata, wikibase, new Claim(106,121594), ref);
MediaWikiUtil.addTrustedStatement(wikidata, wikibase, new Claim(512,849697), ref);
break;
case "Graf":
MediaWikiUtil.addTrustedStatement(wikidata, wikibase, new Claim(97,28989), ref);
break;
default:
System.out.println(e.getChildren("academicDegree").get(0).getText());
}
}
/* if(e.getChildren("variantNameEntityForThePerson").size() > 0){
ArrayList<String> aliases = ((TranslatedContent<ArrayList<String>>) wikidata.request(new GetAliasesRequest(wikibase, "de"))).get("de");
aliases = (aliases == null ? new ArrayList<String>() : aliases);
String title = (String)wikidata.request(new GetLabelRequest("de", wikibase));
aliases.add(title);
if(title != null){
String title2 = title;
title2 = title2.replaceAll("ä", "ae");
title2 = title2.replaceAll("ö", "oe");
title2 = title2.replaceAll("ü", "ue");
title2 = title2.replaceAll("ß", "ss");
aliases.add(title2);
}
ArrayList<String> newaliases = new ArrayList<>();
for(Element name : e.getChildren("variantNameEntityForThePerson")){
if(name.getAttribute("parseType") == null || !name.getAttribute("parseType").getValue().equals("Resource"))
continue;
String n = null;
if(name.getChildren("forename").size() > 0 || name.getChildren("surname").size() > 0){
n = name.getChildren("forename").get(0).getText()+" "+
name.getChildren("surname").get(0).getText();
}else if(name.getChildren("personalName").size() > 0){
n = name.getChildren("personalName").get(0).getText();
}
if(n == null)
continue;
n = n.replaceAll("\\- ", "-");
if(n.indexOf(".") != -1)
continue;
if(! aliases.contains(n) && ! newaliases.contains(n)){
newaliases.add(n);
String title2 = n;
title2 = title2.replaceAll("ä", "ae");
title2 = title2.replaceAll("ö", "oe");
title2 = title2.replaceAll("ü", "ue");
title2 = title2.replaceAll("ß", "ss");
aliases.add(title2);
}
}
if(newaliases.size() > 0){
wikidata.request(new AddAliasesRequest(wikibase, "de", "processed by KasparBot based on GND", newaliases.toArray(new String[newaliases.size()])));
System.out.println("Aliases für "+title+" erstellt: "+newaliases.toString());
}
} */
}catch(Exception e){
e.printStackTrace();
continue;
}
}
}catch(Exception e){
e.printStackTrace();
}
}
private static boolean isGeborenCa(MARC m){
for(HashMap<String, ArrayList<String>> f : m.getDatafield("548")){
if(! (f.get("i").get(0).equals("Lebensdaten") || f.get("i").get(0).equals("Exakte Lebensdaten"))){
continue;
}
if(f.get("9").size() <= 1)
continue;
String angabe = f.get("9").get(1);
angabe = angabe.substring("v:".length());
switch(angabe){
case "Geburts- u. Todesjahr ca." :
case "ca." : return true;
case "genaues Todesdatum unbekannt":
case "Geburtsjahr ca." : return true;
}
if(angabe.indexOf("Geburtsjahr ca.") >= 0 || angabe.indexOf("Geburts- u. Todesjahr ca.") >= 0)
return true;
if(f.get("a").get(0).matches("ca\\.[\\s\\/\\w.]+\\-[\\s\\/\\w.]+"))
return true;
}
return false;
}
private static boolean isGestorbenCa(MARC m){
for(HashMap<String, ArrayList<String>> f : m.getDatafield("548")){
if(! (f.get("i").get(0).equals("Lebensdaten") || f.get("i").get(0).equals("Exakte Lebensdaten"))){
continue;
}
if(f.get("9").size() <= 1)
continue;
String angabe = f.get("9").get(1);
angabe = angabe.substring("v:".length());
angabe = angabe.replaceAll("Todessjahr", "Todesjahr");
switch(angabe){
case "Geburts- u. Todesjahr ca." :
case "ca." : return true;
case "genaues Todesdatum unbekannt":
case "Todesjahr ca." :
case "Sterbejahr ca." : return true;
}
if(angabe.indexOf("Sterbejahr ca.") >= 0 || angabe.indexOf("genaues Todesdatum unbekannt") >= 0 || angabe.indexOf("Todesjahr ca.") >= 0 || angabe.indexOf("Geburts- u. Todesjahr ca.") >= 0)
return true;
if(f.get("a").get(0).matches("[\\s\\/\\w.]+\\-ca\\.[\\s\\/\\w.]+"))
return true;
}
return false;
}
private static void handleComplexImport(MediaWikiConnection wikidata, String base, Element e, String tag, int prop, Reference ref, String referer, String condition) throws IOException, JSONException, Exception{
if(! (e.getChildren(tag).size() > 0 && e.getChildren(tag).get(0).getChildren("Description").size() > 0))
return;
ArrayList<Claim> cs = new ArrayList<>();
for(Element e2 : e.getChildren(tag)){
String gnd = e2.getChildren("Description").get(0).getAttribute("about").getValue().substring("http://d-nb.info/gnd/".length());
WikidataQuery q = new WikidataQuery("STRING[227:\""+gnd+"\"] AND ("+condition+")");
List<Integer> wqresult = q.request();
if(wqresult.size() == 1){
int place = wqresult.get(0);
cs.add(new Claim(prop,place));
}
}
if(wikidata.request(new HasClaimRequest(base, new Property(prop))) ){
for(Claim c : cs){
MediaWikiUtil.addTrustedStatement(wikidata, base, c, ref);
System.out.println("created or referenced: "+c);
}
}else{
for(Claim c : cs){
Statement s = wikidata.request(new CreateClaimRequest(base, c));
wikidata.request(new SetReferenceRequest(s, ref));
System.out.println("created and referenced: "+c);
}
}
}
}