package mediawiki; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import mediawiki.info.HasProperty; import mediawiki.info.wikibase.Claim; import mediawiki.info.wikibase.Property; import mediawiki.info.wikibase.Reference; import mediawiki.info.wikibase.Statement; import mediawiki.info.wikibase.WikibaseDate; import mediawiki.info.wikibase.snaks.DateSnak; import mediawiki.request.wikibase.CreateClaimRequest; import mediawiki.request.wikibase.GetSpecificStatementRequest; import mediawiki.request.wikibase.HasClaimRequest; import mediawiki.request.wikibase.SetReferenceRequest; public class MediaWikiUtil { public static String formatLCCN(String lccn){ String[] parts = lccn.split("\\/"); if(parts.length != 3) return null; // throw new IllegalArgumentException("Malformed LCCN identifier: "+lccn); String flccn = parts[0]+parts[1]; for(int i = 0; i < 6-parts[2].length(); i++){ flccn += "0"; } flccn += parts[2]; return flccn; } public static String[] splitLCCN(String lccn) { if(lccn.matches("^(|n|nb|nr|no|ns|sh|sj|sn)(\\d+)(\\d{6})$")) return lccn.replaceAll("^(|n|nb|nr|no|ns|sh|sj|sn)(\\d+)(\\d{6})$", "$1/$2/$3").split("\\/"); if(lccn.matches("^(|n|nb|nr|no|ns|sh|sj|sn)\\/\\d{2,4}\\/\\d+$")) return lccn.split("\\/"); return null; } public static String convertBnF8toBnF9(String value) { if(value.matches("\\d{8}[0-9bcdfghjkmnpqrstvwxz]")) return value; value = value.replaceAll("^cb", ""); value = "cb" + value.replaceAll("[^\\d]" , "" ) ; String bnf_xdigits = "0123456789bcdfghjkmnpqrstvwxz"; int bnf_check_digit = 0; for (int i=0; i < value.length(); i++){ bnf_check_digit += bnf_xdigits.indexOf(value.charAt(i)) * (i+1); } value = value.substring(2)+bnf_xdigits.charAt(bnf_check_digit % bnf_xdigits.length()); return value; } public static Statement addTrustedStatement(MediaWikiConnection wikidata, String base, Claim c, Reference ref) throws Exception { ArrayList<Statement> s = wikidata.request(new GetSpecificStatementRequest(base, c.getProperty())); if(s.size() == 0) { Statement s1 = wikidata.request(new CreateClaimRequest(base, c)); wikidata.request(new SetReferenceRequest(s1, ref)); return s1; }else{ for(Statement s1 : s) { if(s1.getClaim().equals(c)){ if(! s1.hasReference(new Property(248) )) wikidata.request(new SetReferenceRequest(s1, ref)); return null; // sonst würden eventuell Qualifiers erstellt werden } } /** * mehrere zu der Property wurden gefunden, keiner mit diesem Wert */ return null; } } private static final String persianDigits = "۰۱۲۳۴۵۶۷۸۹"; private static final String arabicDigits = "0123456789"; public static String parsePersianNumber(String persian) { for(int i = 0; i < persianDigits.length() && i < arabicDigits.length(); i++) persian = persian.replaceAll(""+persianDigits.charAt(i), ""+arabicDigits.charAt(i)); return persian; } public static boolean containsPersianDigits(String s) { for(char c : persianDigits.toCharArray()) if(s.indexOf(c) >= 0) return true; return false; } /** * Detects whether a page can be edited using a bot. * @see https://en.wikipedia.org/wiki/Template:Bots#Java * @param text * @param user * @return */ public static boolean allowBots(String text, String user) { return !text.matches("(?si).*\\{\\{(nobots|bots\\|(allow=none|deny=(.*?" + Pattern.quote(user) + ".*?|all)|optout=all))\\}\\}.*"); } public static <T extends HasProperty> Map<Property, List<T>> groupByProperty(List<T> lc){ HashMap<Property, List<T>> h = new HashMap<>(); for(T c : lc) { if(! h.containsKey(c.getProperty())) h.put(c.getProperty(), new ArrayList<T>()); h.get(c.getProperty()).add(c); } return h; } public static <T extends HasProperty> List<Property> listByProperty(List<T> lc){ ArrayList<Property> a = new ArrayList<>(); for(T c : lc) if(! a.contains(c.getProperty())) a.add(c.getProperty()); return a; } public static mediawiki.info.wikibase.Statement importDate(MediaWikiConnection wikidata, String date, String base, Property p, Reference source) throws Exception{ WikibaseDate wbd = null; if(date.matches("\\d\\d\\d\\d")){ SimpleDateFormat sdf = new SimpleDateFormat("yyyy"); wbd = new WikibaseDate(sdf.parse(date), 0, 0, 0, WikibaseDate.ONE_YEAR); }else if(date.matches("\\d\\d\\d\\d-\\d\\d-\\d\\d")){ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); wbd = new WikibaseDate(sdf.parse(date), 0, 0, 0, WikibaseDate.ONE_DAY); }else{ return null; } return addClaim(wikidata, base, new Claim(p, new DateSnak(wbd)), source); } @Deprecated public static mediawiki.info.wikibase.Statement addClaim(MediaWikiConnection wikidata, String entity, Claim c, Reference source) throws Exception{ return addClaim(wikidata,entity,c,source,"processed by KasparBot based on GND"); } public static mediawiki.info.wikibase.Statement addClaim(MediaWikiConnection wikidata, String entity, Claim c, Reference source, String summary) throws Exception{ Boolean b = (Boolean) wikidata.request(new HasClaimRequest(entity,c.getProperty())); SimpleDateFormat log = new SimpleDateFormat("HH:mm:ss"); if(!b){ mediawiki.info.wikibase.Statement s = addWikidataProperty(wikidata, entity, c, source, summary); System.out.println(Thread.currentThread().getName()+"\t["+log.format(new Date())+"]\t"+entity+" has now "+c.getProperty()+": "+c.getSnak()); return s; } return null;} public static Statement addWikidataProperty(MediaWikiConnection wikidata, String entity, Claim attr, Reference source, String message) throws Exception{ CreateClaimRequest req = new CreateClaimRequest(entity, attr); req.setProperty("summary", message); Statement claim = (Statement) wikidata.request(req); if(source != null){ MediaWikiRequest req2 = new SetReferenceRequest(claim, source); req2.setProperty("summary", message); wikidata.request(req2); } return claim; } public static void addWikidataProperty(MediaWikiConnection wikidata, String entity, Claim attr, Claim source, String message) throws Exception{ addWikidataProperty(wikidata, entity, attr, new Reference(source), message); } }