package ch.usi.da.dmap.server;
/*
* Copyright (c) 2017 Università della Svizzera italiana (USI)
*
* This file is part of URingPaxos.
*
* URingPaxos is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* URingPaxos is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with URingPaxos. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Random;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.log4j.Logger;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;
import org.apache.thrift.transport.TTransportException;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooKeeper;
import ch.usi.da.dmap.thrift.gen.Dmap;
import ch.usi.da.dmap.thrift.gen.MapError;
import ch.usi.da.dmap.thrift.gen.Partition;
import ch.usi.da.dmap.thrift.gen.RangeCommand;
import ch.usi.da.dmap.thrift.gen.RangeResponse;
import ch.usi.da.dmap.thrift.gen.RangeType;
import ch.usi.da.dmap.thrift.gen.Replica;
import ch.usi.da.dmap.thrift.gen.WrongPartition;
import ch.usi.da.dmap.utils.Pair;
import ch.usi.da.dmap.utils.Utils;
import ch.usi.da.paxos.lab.DummyWatcher;
/**
* Name: RecoveryClient<br>
* Description: <br>
*
* Creation date: Apr 11, 2017<br>
* $Id$
*
*
* @author Samuel Benz benz@geoid.ch
*/
public class RecoveryClient<K,V> {
private final static Logger logger = Logger.getLogger(RecoveryClient.class);
private final Random rand = new Random();
private ZooKeeper zoo;
private final int get_range_size = 5000;
private long partition_version = 0;
private SortedMap<Integer,Set<Replica>> partitions = new TreeMap<Integer,Set<Replica>>();
private Map<Integer,List<Dmap.Client>> clients = new HashMap<Integer,List<Dmap.Client>>();
public final String mapID;
public RecoveryClient(String mapID, String zookeeper_host) {
this.mapID = mapID;
final String path = "/dmap/" + mapID;
try {
zoo = new ZooKeeper(zookeeper_host,3000,new DummyWatcher());
while(partitions.isEmpty()){
// lookup one replica to initialize the partitions map
List<String> replicas = zoo.getChildren(path,false);
if(replicas.isEmpty()){
logger.warn(this + " can not locate any replica!");
Thread.sleep(1000);
}else{
int pos = rand.nextInt(replicas.size());
byte[] a = zoo.getData(path + "/" + replicas.get(pos),false,null);
String[] as = new String(a).split(";");
String ip = as[0];
int port = Integer.parseInt(as[1]);
TTransport transport = new TSocket(ip,port);
TProtocol protocol = new TBinaryProtocol(transport);
Dmap.Client client = new Dmap.Client(protocol);
try {
transport.open();
} catch (TTransportException e) {
}
readPartitions(client);
}
}
} catch (IOException | KeeperException | InterruptedException e) {
logger.error(this + " ZooKeeper init error!",e);
}
}
private Dmap.Client getClient(){
return getClient(null);
}
private Dmap.Client getClient(Object key){
if(key == null){ // random partition
return getClient(rand.nextInt());
}else{
return getClient(key.hashCode());
}
}
private Dmap.Client getClient(int hash){
Dmap.Client client = null;
int partition = 0;
SortedMap<Integer,Set<Replica>> tailMap = partitions.tailMap(hash);
partition = tailMap.isEmpty() ? partitions.firstKey() : tailMap.firstKey();
if(!clients.containsKey(partition)){
clients.put(partition,new ArrayList<Dmap.Client>());
}
List<Dmap.Client> c = clients.get(partition);
if(c.isEmpty()){
Set<Replica> replicas = partitions.get(partition);
for(Replica r : replicas){
try {
client = createClient(r.address);
c.add(client);
} catch (TTransportException e) {
logger.warn(this + " server connection error to " + r.address);
}
}
}else{
int pos = rand.nextInt(c.size());
client = c.get(pos);
}
return client;
}
private Dmap.Client createClient(String addr) throws TTransportException{
Dmap.Client client;
String[] as = new String(addr).split(";");
String ip = as[0];
int port = Integer.parseInt(as[1]);
TSocket socket = new TSocket(ip,port);
//socket.getSocket().getTcpNoDelay();
TTransport transport = socket;
TProtocol protocol = new TBinaryProtocol(transport);
client = new Dmap.Client(protocol);
transport.open();
return client;
}
private void removeClient(Dmap.Client client){
for(Entry<Integer, List<Dmap.Client>> e : clients.entrySet()){
if(e.getValue().contains(client)){
e.getValue().remove(client);
logger.warn(this + " server connection error. Remove this client.");
if(e.getValue().isEmpty()){
readPartitions(getClient());
}
break;
}
}
}
private long getCmdID(){
return rand.nextLong();
}
private void readPartitions(Dmap.Client client){
try {
Partition p = client.partition(getCmdID());
if(p.getVersion() != partition_version){
partitions.clear();
partitions.putAll(p.getPartitions());
partition_version = p.getVersion();
}
} catch (TException e) {
logger.error(this,e);
}
}
public long getPartitionVersion(){
return partition_version;
}
public SortedMap<Integer,Set<Replica>> getPartitions(){
return partitions;
}
public long snapshot() {
long snapshotID = 0;
RangeResponse ret = null;
Dmap.Client client = getClient();
try {
RangeCommand cmd = new RangeCommand();
cmd.setId(getCmdID());
cmd.setType(RangeType.CREATERANGE);
cmd.setPartition_version(partition_version);
ret = client.range(cmd);
if(ret.isSetSnapshot()){
snapshotID = ret.getSnapshot();
}
} catch (MapError e){
logger.error(this + " " + e.errorMsg);
} catch (WrongPartition p){
readPartitions(getClient());
return snapshot();
} catch (TTransportException e){
removeClient(client);
return snapshot();
} catch (TException e) {
logger.error(this,e);
}
return snapshotID;
}
public void removeSnapshot(Long snapshotID){
RangeCommand cmd = new RangeCommand();
Dmap.Client client = getClient();
cmd.setId(getCmdID());
cmd.setType(RangeType.DELETERANGE);
cmd.setSnapshot(snapshotID);
cmd.setPartition_version(partition_version);
try {
client.range(cmd);
logger.debug(this + " released snapshot " + snapshotID);
} catch (MapError e) {
logger.error(this + " error!",e);
} catch (WrongPartition p){
readPartitions(getClient());
removeSnapshot(snapshotID);
} catch (TTransportException e){
removeClient(client);
removeSnapshot(snapshotID);
} catch (TException e) {
logger.error(this + " error!",e);
}
}
public long partitionSize(int token,long snapshotID){
Dmap.Client client = getClient(token);
RangeCommand s = new RangeCommand();
s.setId(getCmdID());
s.setType(RangeType.PARTITIONSIZE);
s.setPartition_version(partition_version);
s.setSnapshot(snapshotID);
try{
RangeResponse r = client.range(s);
return r.getCount();
} catch (MapError e) {
logger.error(this + " error!",e);
return partitionSize(token,snapshotID); // must exist eventually
} catch (WrongPartition p){
readPartitions(getClient());
return partitionSize(token,snapshotID);
} catch (TTransportException e){
removeClient(client);
return partitionSize(token,snapshotID);
} catch (TException e) {
logger.error(this + " error!",e);
}
return 0;
}
public Iterator<Map.Entry<K,V>> iterator(int token,long snapshotID) {
long size = partitionSize(token,snapshotID);
LinkedBlockingQueue<Entry<K, V>> queue = new LinkedBlockingQueue<Map.Entry<K,V>>();
Thread t = new Thread(new QueueFiller(token,snapshotID,queue,size));
t.start();
return new EntryIterator<Map.Entry<K,V>>(size,queue);
}
class EntryIterator<T> implements Iterator<T> {
private final long size;
private long delivered = 0;
private final BlockingQueue<T> queue;
T last = null;
EntryIterator(Long size, BlockingQueue<T> queue) {
this.size = size;
this.queue = queue;
}
public final boolean hasNext() {
return delivered < size ? true : false;
}
public void remove() {
throw new IllegalStateException();
}
@Override
public T next() {
if(delivered < size){
delivered++;
try {
return queue.take();
} catch (InterruptedException e) {
return null;
}
}else{
throw new NoSuchElementException();
}
}
}
class QueueFiller implements Runnable {
private final BlockingQueue<Map.Entry<K,V>> queue;
private final long snapshotID;
private final int token;
private final long size;
public QueueFiller(int token,long snapshotID,BlockingQueue<Map.Entry<K,V>> queue,long size){
this.token = token;
this.snapshotID = snapshotID;
this.queue = queue;
this.size = size;
}
@Override
public void run() {
Set<Replica> replicas = partitions.get(token);
Dmap.Client client = null;
while(client == null){
try {
Replica replica = (Replica)replicas.toArray()[rand.nextInt(replicas.size())];
client = createClient(replica.address);
} catch (TTransportException e1) {
}
}
long retreived = 0;
int from = 0;
do{
try {
RangeCommand cmd = new RangeCommand();
cmd.setId(getCmdID());
cmd.setType(RangeType.GETRANGE);
cmd.setSnapshot(snapshotID);
cmd.setFromid(from);
cmd.setToid(from+get_range_size);
cmd.setPartition_version(partition_version);
from = from+get_range_size;
if(client == null){
throw new TTransportException();
}
RangeResponse ret = client.range(cmd); //Idea: ask multiple replicas with different offset
if(ret != null){
if(ret.isSetValues()){
@SuppressWarnings("unchecked")
List<Pair<K,V>> sublist = (List<Pair<K,V>>) Utils.getObject(ret.getValues());
for(Pair<K,V> e : sublist){
queue.put(e);
retreived++;
}
}
}
} catch (MapError e){
/*if(!view.closed){
logger.error(view + " error!",e);
}*/
} catch (WrongPartition p){
} catch (TTransportException e){
removeClient(client);
try {
Replica replica = (Replica)replicas.toArray()[rand.nextInt(replicas.size())];
client = createClient(replica.address);
} catch (TTransportException e1) {
}
} catch (TException | ClassNotFoundException | IOException e) {
logger.error("QueueFiller error!",e);
} catch (InterruptedException e){
Thread.currentThread().interrupt();
break;
}
}while(retreived < size);
client.getInputProtocol().getTransport().close();
client.getOutputProtocol().getTransport().close();
}
}
}