/*
* (C) Copyright 2014 Milinda Pathirage.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.pathirage.freshet.helpers;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ParseWikipediaActivity {
public static Map<String, Object> parse(String line) {
System.out.println(line);
Pattern p = Pattern.compile("\\[\\[(.*)\\]\\]\\s(.*)\\s(.*)\\s\\*\\s(.*)\\s\\*\\s\\(\\+?(.\\d*)\\)\\s(.*)");
Matcher m = p.matcher(line);
if (m.find() && m.groupCount() == 6) {
String title = m.group(1);
String flags = m.group(2);
String diffUrl = m.group(3);
String user = m.group(4);
int byteDiff = Integer.parseInt(m.group(5));
String summary = m.group(6);
Map<String, Boolean> flagMap = new HashMap<String, Boolean>();
flagMap.put("is-minor", flags.contains("M"));
flagMap.put("is-new", flags.contains("N"));
flagMap.put("is-unpatrolled", flags.contains("!"));
flagMap.put("is-bot-edit", flags.contains("B"));
flagMap.put("is-special", title.startsWith("Special:"));
flagMap.put("is-talk", title.startsWith("Talk:"));
Map<String, Object> root = new HashMap<String, Object>();
root.put("title", title);
root.put("user", user);
root.put("unparsed-flags", flags);
root.put("diff-bytes", byteDiff);
root.put("diff-url", diffUrl);
root.put("summary", summary);
root.put("flags", flagMap);
return root;
} else {
return null;
}
}
}