package org.krakenapps.logdb.impl;
import static org.krakenapps.bnf.Syntax.k;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.apache.felix.ipojo.annotations.Component;
import org.apache.felix.ipojo.annotations.Invalidate;
import org.apache.felix.ipojo.annotations.Provides;
import org.apache.felix.ipojo.annotations.Requires;
import org.apache.felix.ipojo.annotations.ServiceProperty;
import org.apache.felix.ipojo.annotations.Validate;
import org.krakenapps.api.Primitive;
import org.krakenapps.bnf.Binding;
import org.krakenapps.bnf.Syntax;
import org.krakenapps.logdb.DataSource;
import org.krakenapps.logdb.DataSourceEventListener;
import org.krakenapps.logdb.DataSourceRegistry;
import org.krakenapps.logdb.LogQuery;
import org.krakenapps.logdb.LogQueryCommand;
import org.krakenapps.logdb.LogQueryCommand.LogMap;
import org.krakenapps.logdb.LogQueryEventListener;
import org.krakenapps.logdb.LogQueryParser;
import org.krakenapps.logdb.LogQueryService;
import org.krakenapps.logdb.LogQueryStatus;
import org.krakenapps.logdb.SyntaxProvider;
import org.krakenapps.logdb.mapreduce.MapQuery;
import org.krakenapps.logdb.mapreduce.MapReduceQueryStatus;
import org.krakenapps.logdb.mapreduce.MapReduceService;
import org.krakenapps.logdb.mapreduce.ReduceQuery;
import org.krakenapps.logdb.mapreduce.RemoteMapQuery;
import org.krakenapps.logdb.mapreduce.RemoteQuery;
import org.krakenapps.logdb.mapreduce.RemoteQueryKey;
import org.krakenapps.logdb.query.LogQueryImpl;
import org.krakenapps.logdb.query.StringPlaceholder;
import org.krakenapps.logdb.query.command.RpcFrom;
import org.krakenapps.logdb.query.command.RpcTo;
import org.krakenapps.rpc.RpcAgent;
import org.krakenapps.rpc.RpcClient;
import org.krakenapps.rpc.RpcConnection;
import org.krakenapps.rpc.RpcConnectionProperties;
import org.krakenapps.rpc.RpcContext;
import org.krakenapps.rpc.RpcException;
import org.krakenapps.rpc.RpcExceptionEvent;
import org.krakenapps.rpc.RpcMethod;
import org.krakenapps.rpc.RpcSession;
import org.krakenapps.rpc.RpcSessionEvent;
import org.krakenapps.rpc.SimpleRpcService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Component(name = "logdb-mapreduce")
@Provides
public class MapReduceRpcService extends SimpleRpcService implements MapReduceService, LogQueryEventListener,
DataSourceEventListener {
private final Logger logger = LoggerFactory.getLogger(MapReduceRpcService.class.getName());
private ConcurrentMap<String, MapReduceQueryStatus> queries;
@Requires
private RpcAgent agent;
@Requires
private SyntaxProvider syntaxProvider;
@Requires
private DataSourceRegistry dataSourceRegistry;
@Requires
private LogQueryService queryService;
@SuppressWarnings("unused")
@ServiceProperty(name = "rpc.name", value = "logdb-mapreduce")
private String name;
/**
* mapreduce query guid to rpcfrom mappings
*/
private ConcurrentMap<String, RpcFrom> rpcFromMap;
/**
* mapreduce query guid to rpcto mappings
*/
private ConcurrentMap<String, RpcTo> rpcToMap;
/**
* rpcfrom command parser
*/
private RpcFromParser rpcFromParser;
/**
* rpcto command parser
*/
private RpcToParser rpcToParser;
/**
* collected remote node's recent query statuses
*/
private ConcurrentMap<RemoteQueryKey, RemoteQuery> remoteQueries;
/**
* search node connections by peer guid
*/
private ConcurrentMap<String, RpcConnection> upstreams;
/**
* search connected remote nodes by peer guid. they push data source
* notifications, query status changes, and log data using separate data
* connection
*/
private ConcurrentMap<String, RpcConnection> downstreams;
/**
* mapreduce query guid to map query requests (from remote node)
*/
private ConcurrentMap<String, MapQuery> mapQueries;
/**
* mapreduce query guid to local waiting reduce queries
*/
private ConcurrentMap<String, ReduceQuery> reduceQueries;
/**
* remote map query to mapreduce query guid relation
*/
private ConcurrentMap<RemoteQueryKey, String> remoteQueryMappings;
public MapReduceRpcService() {
}
@Validate
public void start() {
queries = new ConcurrentHashMap<String, MapReduceQueryStatus>();
rpcFromMap = new ConcurrentHashMap<String, RpcFrom>();
rpcToMap = new ConcurrentHashMap<String, RpcTo>();
upstreams = new ConcurrentHashMap<String, RpcConnection>();
downstreams = new ConcurrentHashMap<String, RpcConnection>();
mapQueries = new ConcurrentHashMap<String, MapQuery>();
reduceQueries = new ConcurrentHashMap<String, ReduceQuery>();
remoteQueries = new ConcurrentHashMap<RemoteQueryKey, RemoteQuery>();
remoteQueryMappings = new ConcurrentHashMap<RemoteQueryKey, String>();
rpcFromParser = new RpcFromParser();
rpcToParser = new RpcToParser();
syntaxProvider.addParsers(Arrays.asList(rpcFromParser, rpcToParser));
dataSourceRegistry.addListener(this);
queryService.addListener(this);
}
@Invalidate
public void stop() {
if (syntaxProvider != null)
syntaxProvider.removeParsers(Arrays.asList(rpcFromParser, rpcToParser));
if (dataSourceRegistry != null)
dataSourceRegistry.removeListener(this);
}
private class RpcFromParser implements LogQueryParser {
@Override
public void addSyntax(Syntax syntax) {
syntax.add("rpcfrom", this, k("rpcfrom"), new StringPlaceholder());
syntax.addRoot("rpcfrom");
}
@Override
public Object parse(Binding b) {
String guid = (String) b.getChildren()[1].getValue();
RpcFrom rpc = new RpcFrom(guid);
rpcFromMap.put(guid, rpc);
return rpc;
}
}
private class RpcToParser implements LogQueryParser {
@Override
public void addSyntax(Syntax syntax) {
syntax.add("rpcto", this, k("rpcto"), new StringPlaceholder());
syntax.addRoot("rpcto");
}
@Override
public Object parse(Binding b) {
String guid = (String) b.getChildren()[1].getValue();
MapQuery mq = mapQueries.get(guid);
RpcTo rpc = new RpcTo(agent.getGuid(), mq.getConnection(), guid);
rpcToMap.put(guid, rpc);
return rpc;
}
}
@Override
public RpcFrom getRpcFrom(String guid) {
return rpcFromMap.get(guid);
}
@Override
public RpcTo getRpcTo(String guid) {
return rpcToMap.get(guid);
}
@RpcMethod(name = "setLogStream")
public void setLogStream(String guid) {
RpcSession session = RpcContext.getSession();
session.setProperty("guid", guid);
}
@RpcMethod(name = "push")
public void push(Map<String, Object> data) {
RpcSession session = RpcContext.getSession();
String queryGuid = (String) session.getProperty("guid");
RpcFrom rpc = rpcFromMap.get(queryGuid);
rpc.push(new LogMap(data));
if (logger.isDebugEnabled()) {
String s = Primitive.stringify(data);
logger.debug("kraken logdb: pushed [{}] data [{}]", queryGuid, s);
}
}
@RpcMethod(name = "eof")
public void eof(String queryGuid) {
// TODO: check if all mapper queries are ended
// for now, send eof if one mapper query is ended
RpcSession session = RpcContext.getSession();
String nodeGuid = session.getConnection().getPeerGuid();
RpcFrom rpc = rpcFromMap.get(queryGuid);
if (rpc != null)
rpc.eof();
else
logger.warn("kraken logdb: rpcfrom not found for mapreduce query [{}]", queryGuid);
}
@RpcMethod(name = "createMapQuery")
public int createMapQuery(String queryGuid, String query) {
try {
RpcSession session = RpcContext.getSession();
String guid = session.getConnection().getPeerGuid();
// map query should be set before rpc command parsing
MapQuery mq = new MapQuery(guid, session.getConnection());
mapQueries.put(queryGuid, mq);
mq.setQuery(queryService.createQuery(query));
logger.info("kraken logdb: created map query [{}]", queryGuid);
return mq.getQuery().getId();
} catch (Exception e) {
logger.error("kraken logdb: cannot create map query", e);
throw new RpcException(e.getMessage());
}
}
@RpcMethod(name = "startMapQuery")
public void startMapQuery(String queryGuid) {
MapQuery mq = mapQueries.get(queryGuid);
if (mq == null)
throw new RpcException("mapreduce query not found: " + queryGuid);
queryService.startQuery(mq.getQuery().getId());
logger.info("kraken logdb: started map query [{}]", queryGuid);
}
@RpcMethod(name = "removeMapQuery")
public void removeMapQuery(String queryGuid) {
MapQuery mq = mapQueries.remove(queryGuid);
if (mq == null)
throw new RpcException("mapreduce query not found: " + queryGuid);
logger.info("kraken logdb: removed map query [{}]", queryGuid);
}
@RpcMethod(name = "onDataSourceChange")
public void onDataSourceChange(String name, String action, Map<String, Object> metadata) {
RpcSession session = RpcContext.getSession();
String guid = session.getConnection().getPeerGuid();
if (action.equals("add") || action.equals("update")) {
logger.info("kraken logdb: on update data source [guid={}, name={}]", guid, name);
dataSourceRegistry.update(new RpcDataSource(guid, name, metadata));
} else if (action.equals("remove")) {
logger.info("kraken logdb: on remove data source [guid={}, name={}]", guid, name);
dataSourceRegistry.remove(new RpcDataSource(guid, name));
}
}
@RpcMethod(name = "onQueryStatusChange")
public void onQueryStatusChange(int queryId, String action, String queryString) {
RpcSession session = RpcContext.getSession();
String guid = session.getConnection().getPeerGuid();
RemoteQueryKey key = new RemoteQueryKey(guid, queryId);
LogQueryStatus status = LogQueryStatus.valueOf(action);
logger.info("kraken logdb: query status change [{}, status={}]", key, status);
switch (status) {
case Created:
remoteQueries.put(key, new RemoteQuery(guid, queryId, queryString));
break;
case Removed:
remoteQueries.remove(key);
break;
case Started: {
RemoteQuery q = remoteQueries.get(key);
if (q != null)
q.setRunning(true);
}
break;
case Stopped: {
RemoteQuery q = remoteQueries.get(key);
if (q != null)
q.setRunning(false);
}
break;
case Eof: {
RemoteQuery q = remoteQueries.get(key);
if (q != null)
q.setEnd(true);
}
break;
}
}
@Override
public void sessionClosed(RpcSessionEvent e) {
String guid = (String) e.getSession().getProperty("guid");
if (guid != null)
disconnect(guid);
}
@Override
public List<MapReduceQueryStatus> getQueries() {
return new ArrayList<MapReduceQueryStatus>(queries.values());
}
@Override
public MapReduceQueryStatus createQuery(String queryString) {
String queryGuid = UUID.randomUUID().toString();
LogQuery lq = new LogQueryImpl(syntaxProvider, queryString);
boolean foundReducer = false;
List<LogQueryCommand> mapCommands = new ArrayList<LogQueryCommand>();
List<LogQueryCommand> reduceCommands = new ArrayList<LogQueryCommand>();
for (LogQueryCommand c : lq.getCommands()) {
if (c.isReducer())
foundReducer = true;
if (foundReducer)
reduceCommands.add(c);
else
mapCommands.add(c);
}
String mapQueryString = buildQueryString(mapCommands);
String reduceQueryString = buildQueryString(reduceCommands);
mapQueryString = mapQueryString + "|rpcto " + queryGuid;
reduceQueryString = "rpcfrom " + queryGuid + "|" + reduceQueryString;
logger.trace("kraken logdb: map query [{}]", mapQueryString);
logger.trace("kraken logdb: reduce query [{}]", reduceQueryString);
// create map queries
List<RemoteMapQuery> mapQueries = new ArrayList<RemoteMapQuery>();
for (RpcConnection c : downstreams.values()) {
RpcSession session = null;
try {
session = c.createSession("logdb-mapreduce");
int id = (Integer) session.call("createMapQuery", queryGuid, mapQueryString);
mapQueries.add(new RemoteMapQuery(queryGuid, c.getPeerGuid(), id));
remoteQueryMappings.put(new RemoteQueryKey(c.getPeerGuid(), id), queryGuid);
} catch (Exception e) {
logger.error("kraken logdb: cannot create mapquery", e);
} finally {
if (session != null)
session.close();
}
}
// create and start reduce query
LogQuery q = queryService.createQuery(reduceQueryString);
ReduceQuery r = new ReduceQuery(queryGuid, q);
reduceQueries.put(queryGuid, r);
// add to mapreduce query table
MapReduceQueryStatus status = new MapReduceQueryStatus(queryGuid, queryString, mapQueries, r);
queries.put(queryGuid, status);
return status;
}
private String buildQueryString(List<LogQueryCommand> commands) {
StringBuilder sb = new StringBuilder();
int i = 0;
for (LogQueryCommand c : commands) {
if (i++ != 0)
sb.append("|");
sb.append(c.getQueryString());
}
return sb.toString();
}
@Override
public void startQuery(String guid) {
// start reduce query
ReduceQuery r = reduceQueries.get(guid);
queryService.startQuery(r.getQuery().getId());
// start map queries (pumping)
for (RpcConnection c : downstreams.values()) {
RpcSession session = null;
try {
session = c.createSession("logdb-mapreduce");
session.call("startMapQuery", guid);
} catch (Exception e) {
logger.error("kraken logdb: cannot start mapquery", e);
} finally {
if (session != null)
session.close();
}
}
}
@Override
public void removeQuery(String guid) {
rpcFromMap.remove(guid);
rpcToMap.remove(guid);
}
@Override
public List<RemoteQuery> getRemoteQueries() {
return new ArrayList<RemoteQuery>(remoteQueries.values());
}
@Override
public Collection<RpcConnection> getUpstreamConnections() {
return Collections.unmodifiableCollection(upstreams.values());
}
@Override
public Collection<RpcConnection> getDownstreamConnections() {
return Collections.unmodifiableCollection(downstreams.values());
}
/**
* register downstream connection when any logdb session opened
*/
@Override
public void sessionOpened(RpcSessionEvent e) {
RpcConnection conn = e.getSession().getConnection();
if (!upstreams.containsKey(conn.getPeerGuid()) && !downstreams.containsKey(conn.getPeerGuid())) {
downstreams.put(conn.getPeerGuid(), conn);
logger.info("kraken logdb: downstream connection [{}] opened", conn);
}
}
@Override
public void connectionClosed(RpcConnection conn) {
boolean removed = false;
removed |= upstreams.remove(conn.getPeerGuid()) != null;
removed |= downstreams.remove(conn.getPeerGuid()) != null;
if (removed)
logger.info("kraken logdb: downstream connection [{}] closed", conn);
}
@Override
public RpcConnection connect(RpcConnectionProperties props) {
try {
RpcClient client = new RpcClient(agent.getGuid());
RpcConnection conn = client.connect(props);
if (conn != null) {
// wait until peering completed
int i = 0;
while (conn.getPeerGuid() == null) {
if (i > 50)
break;
Thread.sleep(100);
i++;
}
upstreams.put(conn.getPeerGuid(), conn);
conn.bind("logdb-mapreduce", this);
for (DataSource ds : dataSourceRegistry.getAll())
if (!ds.getType().equals("rpc"))
notifyDataSourceChange(ds, "update", conn);
return conn;
}
return null;
} catch (InterruptedException e) {
throw new IllegalStateException("connection timeout");
}
}
@Override
public void disconnect(String guid) {
RpcConnection conn = upstreams.remove(guid);
if (conn != null && conn.isOpen())
conn.close();
}
//
// DataSourceEventListener callbacks
//
@Override
public void onUpdate(DataSource ds) {
for (RpcConnection conn : upstreams.values()) {
notifyDataSourceChange(ds, "update", conn);
}
}
@Override
public void onRemove(DataSource ds) {
for (RpcConnection conn : upstreams.values()) {
notifyDataSourceChange(ds, "remove", conn);
}
}
private void notifyDataSourceChange(DataSource ds, String action, RpcConnection conn) {
RpcSession session = null;
try {
session = conn.createSession("logdb-mapreduce");
session.post("onDataSourceChange", ds.getName(), action, ds.getMetadata());
logger.info("kraken logdb: notified data source [type={}, name={}, action={}] ",
new Object[] { ds.getType(), ds.getName(), action });
} catch (Exception e) {
logger.warn("kraken logdb: cannot update datasource info", e);
} finally {
if (session != null)
session.close();
}
}
//
// LogQueryEventListener
//
@Override
public void onQueryStatusChange(LogQuery query, LogQueryStatus status) {
notifyQueryStatus(query.getId(), status, query.getQueryString());
}
private void notifyQueryStatus(int id, LogQueryStatus status, String queryString) {
for (RpcConnection conn : upstreams.values()) {
RpcSession session = null;
try {
session = conn.createSession("logdb-mapreduce");
session.post("onQueryStatusChange", id, status.name(), queryString);
logger.info("kraken logdb: notified query status [id={}, status={}] to peer [{}]", new Object[] { id, status,
conn.getPeerGuid() });
} catch (Exception e) {
logger.warn("kraken logdb: cannot update datasource info", e);
} finally {
if (session != null)
session.close();
}
}
}
@Override
public void exceptionCaught(RpcExceptionEvent e) {
logger.error("kraken logdb: mapreduce rpc fail", e);
}
}