/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.connectors.wikiedits;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WikipediaEditEvent {
// Metadata
private final long timestamp;
private final String channel;
// Edit attributes
private final String title;
private final String diffUrl;
private final String user;
private final int byteDiff;
private final String summary;
private final int flags;
public WikipediaEditEvent(
long timestamp,
String channel,
String title,
String diffUrl,
String user,
int byteDiff,
String summary,
boolean isMinor,
boolean isNew,
boolean isUnpatrolled,
boolean isBotEdit,
boolean isSpecial,
boolean isTalk) {
if (channel == null || title == null || diffUrl == null ||
user == null || summary == null) {
throw new NullPointerException();
}
this.timestamp = timestamp;
this.channel = channel;
this.title = title;
this.diffUrl = diffUrl;
this.user = user;
this.byteDiff = byteDiff;
this.summary = summary;
this.flags = getFlags(
isMinor,
isNew,
isUnpatrolled,
isBotEdit,
isSpecial,
isTalk);
}
/**
* Returns the timestamp when this event arrived at the source.
*
* @return The timestamp assigned at the source.
*/
public long getTimestamp() {
return timestamp;
}
public String getChannel() {
return channel;
}
public String getTitle() {
return title;
}
public String getDiffUrl() {
return diffUrl;
}
public String getUser() {
return user;
}
public int getByteDiff() {
return byteDiff;
}
public String getSummary() {
return summary;
}
public boolean isMinor() {
return (flags & IS_MINOR) > 0;
}
public boolean isNew() {
return (flags & IS_NEW) > 0;
}
public boolean isUnpatrolled() {
return (flags & IS_UNPATROLLED) > 0;
}
public boolean isBotEdit() {
return (flags & IS_BOT_EDIT) > 0;
}
public boolean isSpecial() {
return (flags & IS_SPECIAL) > 0;
}
public boolean isTalk() {
return (flags & IS_TALK) > 0;
}
@Override
public String toString() {
return "WikipediaEditEvent{" +
"timestamp=" + timestamp +
", channel='" + channel + '\'' +
", title='" + title + '\'' +
", diffUrl='" + diffUrl + '\'' +
", user='" + user + '\'' +
", byteDiff=" + byteDiff +
", summary='" + summary + '\'' +
", flags=" + flags +
'}';
}
// - Flags ----------------------------------------------------------------
private static final byte IS_MINOR = 0B000001;
private static final byte IS_NEW = 0B000010;
private static final byte IS_UNPATROLLED = 0B000100;
private static final byte IS_BOT_EDIT = 0B001000;
private static final byte IS_SPECIAL = 0B010000;
private static final byte IS_TALK = 0B100000;
private byte getFlags(
boolean isMinor,
boolean isNew,
boolean isUnpatrolled,
boolean isBotEdit,
boolean isSpecial,
boolean isTalk) {
byte flag = 0;
flag |= isMinor ? IS_MINOR : flag;
flag |= isNew ? IS_NEW : flag;
flag |= isUnpatrolled ? IS_UNPATROLLED : flag;
flag |= isBotEdit ? IS_BOT_EDIT : flag;
flag |= isSpecial ? IS_SPECIAL : flag;
flag |= isTalk ? IS_TALK : flag;
return flag;
}
// - Parser ---------------------------------------------------------------
/** Expected pattern of raw events. */
private static final Pattern p = Pattern.compile("\\[\\[(.*)\\]\\]\\s(.*)\\s(.*)\\s\\*\\s(.*)\\s\\*\\s\\(\\+?(.\\d*)\\)\\s(.*)");
public static WikipediaEditEvent fromRawEvent(
long timestamp,
String channel,
String rawEvent) {
final Matcher m = p.matcher(rawEvent);
if (m.find() && m.groupCount() == 6) {
String title = m.group(1);
String flags = m.group(2);
String diffUrl = m.group(3);
String user = m.group(4);
int byteDiff = Integer.parseInt(m.group(5));
String summary = m.group(6);
boolean isMinor = flags.contains("M");
boolean isNew = flags.contains("N");
boolean isUnpatrolled = flags.contains("!");
boolean isBotEdit = flags.contains("B");
boolean isSpecial = title.startsWith("Special:");
boolean isTalk = title.startsWith("Talk:");
return new WikipediaEditEvent(
timestamp,
channel,
title,
diffUrl,
user,
byteDiff,
summary,
isMinor,
isNew,
isUnpatrolled,
isBotEdit,
isSpecial,
isTalk);
}
return null;
}
}