revision 0d397efc4b781ef5b60108708fa1131467d2c3c8
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
* You can obtain a copy of the license at
* trunk/opends/resource/legal-notices/OpenDS.LICENSE
* or
* See the License for the specific language governing permissions
* and limitations under the License.
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at
* trunk/opends/resource/legal-notices/OpenDS.LICENSE. If applicable,
* add the following below this CDDL HEADER, with the fields enclosed
* by brackets "[]" replaced with your own identifying information:
* Portions Copyright [yyyy] [name of copyright owner]
* Copyright 2006-2009 Sun Microsystems, Inc.
package org.opends.server.replication.service;
import static org.opends.messages.ReplicationMessages.*;
import static org.opends.server.loggers.ErrorLogger.logError;
import static org.opends.server.loggers.debug.DebugLogger.getTracer;
import static org.opends.server.util.StaticUtils.stackTraceToSingleLineString;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import org.opends.messages.Category;
import org.opends.messages.Message;
import org.opends.messages.MessageBuilder;
import org.opends.messages.Severity;
import org.opends.server.api.DirectoryThread;
import org.opends.server.loggers.debug.DebugTracer;
import org.opends.server.replication.common.ChangeNumber;
import org.opends.server.replication.common.DSInfo;
import org.opends.server.replication.common.RSInfo;
import org.opends.server.replication.common.ServerState;
import org.opends.server.replication.common.ServerStatus;
import org.opends.server.replication.protocol.ChangeStatusMsg;
import org.opends.server.replication.protocol.HeartbeatMonitor;
import org.opends.server.replication.protocol.ProtocolSession;
import org.opends.server.replication.protocol.ProtocolVersion;
import org.opends.server.replication.protocol.ReplServerStartMsg;
import org.opends.server.replication.protocol.ReplSessionSecurity;
import org.opends.server.replication.protocol.ReplicationMsg;
import org.opends.server.replication.protocol.ServerStartECLMsg;
import org.opends.server.replication.protocol.ServerStartMsg;
import org.opends.server.replication.protocol.StartECLSessionMsg;
import org.opends.server.replication.protocol.StartSessionMsg;
import org.opends.server.replication.protocol.TopologyMsg;
import org.opends.server.replication.protocol.UpdateMsg;
import org.opends.server.replication.protocol.WindowMsg;
import org.opends.server.replication.protocol.WindowProbeMsg;
import org.opends.server.util.ServerConstants;
import org.opends.server.replication.server.ReplicationServer;
* The broker for Multi-master Replication.
public class ReplicationBroker
* The tracer object for the debug logger.
private static final DebugTracer TRACER = getTracer();
private boolean shutdown = false;
private Collection<String> servers;
private boolean connected = false;
private String replicationServer = "Not connected";
private ProtocolSession session = null;
private final ServerState state;
private final String baseDn;
private final int serverId;
private Semaphore sendWindow;
private int maxSendWindow;
private int rcvWindow = 100;
private int halfRcvWindow = rcvWindow/2;
private int maxRcvWindow = rcvWindow;
private int timeout = 0;
private short protocolVersion;
private ReplSessionSecurity replSessionSecurity;
// My group id
private byte groupId = (byte) -1;
// The group id of the RS we are connected to
private byte rsGroupId = (byte) -1;
// The server id of the RS we are connected to
private Integer rsServerId = -1;
// The server URL of the RS we are connected to
private String rsServerUrl = null;
// Our replication domain
private ReplicationDomain domain = null;
// Trick for avoiding a inner class for many parameters return for
// performPhaseOneHandshake method.
private String tmpReadableServerName = null;
* The expected duration in milliseconds between heartbeats received
* from the replication server. Zero means heartbeats are off.
private long heartbeatInterval = 0;
* A thread to monitor heartbeats on the session.
private HeartbeatMonitor heartbeatMonitor = null;
* The number of times the connection was lost.
private int numLostConnections = 0;
* When the broker cannot connect to any replication server
* it log an error and keeps continuing every second.
* This boolean is set when the first failure happens and is used
* to avoid repeating the error message for further failure to connect
* and to know that it is necessary to print a new message when the broker
* finally succeed to connect.
private boolean connectionError = false;
private final Object connectPhaseLock = new Object();
// Same group id poller thread
private SameGroupIdPoller sameGroupIdPoller = null;
* The thread that publishes messages to the RS containing the current
* change time of this DS.
private CTHeartbeatPublisherThread ctHeartbeatPublisherThread = null;
* The expected period in milliseconds between these messages are sent
* to the replication server. Zero means heartbeats are off.
private long changeTimeHeartbeatSendInterval = 0;
* Properties for the last topology info received from the network.
// Info for other DSs.
// Warning: does not contain info for us (for our server id)
private List<DSInfo> dsList = new ArrayList<DSInfo>();
// Info for other RSs.
private List<RSInfo> rsList = new ArrayList<RSInfo>();
private long generationID;
private int updateDoneCount = 0;
* Creates a new ReplicationServer Broker for a particular ReplicationDomain.
* @param replicationDomain The replication domain that is creating us.
* @param state The ServerState that should be used by this broker
* when negotiating the session with the replicationServer.
* @param baseDn The base DN that should be used by this broker
* when negotiating the session with the replicationServer.
* @param serverID2 The server ID that should be used by this broker
* when negotiating the session with the replicationServer.
* @param window The size of the send and receive window to use.
* @param generationId The generationId for the server associated to the
* provided serverId and for the domain associated to the provided baseDN.
* @param heartbeatInterval The interval (in ms) between heartbeats requested
* from the replicationServer, or zero if no heartbeats are requested.
* @param replSessionSecurity The session security configuration.
* @param groupId The group id of our domain.
* @param changeTimeHeartbeatInterval The interval (in ms) between Change
* time heartbeats are sent to the RS,
* or zero if no CN heartbeat shoud be sent.
public ReplicationBroker(ReplicationDomain replicationDomain,
ServerState state, String baseDn, int serverID2, int window,
long generationId, long heartbeatInterval,
ReplSessionSecurity replSessionSecurity, byte groupId,
long changeTimeHeartbeatInterval)
this.domain = replicationDomain;
this.baseDn = baseDn;
this.serverId = serverID2;
this.state = state;
this.protocolVersion = ProtocolVersion.getCurrentVersion();
this.replSessionSecurity = replSessionSecurity;
this.groupId = groupId;
this.generationID = generationId;
this.heartbeatInterval = heartbeatInterval;
this.maxRcvWindow = window;
this.maxRcvWindow = window;
this.halfRcvWindow = window /2;
this.changeTimeHeartbeatSendInterval = changeTimeHeartbeatInterval;
* Start the ReplicationBroker.
public void start()
shutdown = false;
this.rcvWindow = this.maxRcvWindow;
* Start the ReplicationBroker.
* @param servers list of servers used
public void start(Collection<String> servers)
* Open Socket to the ReplicationServer
* Send the Start message
shutdown = false;
this.servers = servers;
if (servers.size() < 1)
this.rcvWindow = this.maxRcvWindow;
* Gets the group id of the RS we are connected to.
* @return The group id of the RS we are connected to
public byte getRsGroupId()
return rsGroupId;
* Gets the server id of the RS we are connected to.
* @return The server id of the RS we are connected to
public Integer getRsServerId()
return rsServerId;
* Gets the server id.
* @return The server id
public int getServerId()
return serverId;
* Gets the server id.
* @return The server id
private long getGenerationID()
if (domain != null)
return domain.getGenerationID();
return generationID;
* Gets the server url of the RS we are connected to.
* @return The server url of the RS we are connected to
public String getRsServerUrl()
return rsServerUrl;
* Bag class for keeping info we get from a server in order to compute the
* best one to connect to.
public static class ServerInfo
private ServerState serverState = null;
private byte groupId = (byte) -1;
* Constructor.
* @param serverState Server state of the RS
* @param groupId Group id of the RS
public ServerInfo(ServerState serverState, byte groupId)
this.serverState = serverState;
this.groupId = groupId;
* Get the server state.
* @return The server state
public ServerState getServerState()
return serverState;
* get the group id.
* @return The group id
public byte getGroupId()
return groupId;
private void connect()
if (this.baseDn.compareToIgnoreCase(
* Special aspects of connecting as ECL compared to connecting as data server
* are :
* - 1 single RS configured
* - so no choice of the prefered RS
* - No same groupID polling
* - ?? Heartbeat
* - Start handshake is :
* Broker ---> StartECLMsg ---> RS
* <---- ReplServerStartMsg ---
* ---> StartSessionECLMsg --> RS
private void connectAsECL()
// FIXME:ECL List of RS to connect is for now limited to one RS only
String bestServer = this.servers.iterator().next();
ReplServerStartMsg inReplServerStartMsg
= performECLPhaseOneHandshake(bestServer, true);
if (inReplServerStartMsg!=null)
* Connect to a ReplicationServer.
* Handshake sequences between a DS and a RS is divided into 2 logical
* consecutive phases (phase 1 and phase 2). DS always initiates connection
* and always sends first message:
* DS<->RS:
* -------
* phase 1:
* DS --- ServerStartMsg ---> RS
* DS <--- ReplServerStartMsg --- RS
* phase 2:
* DS --- StartSessionMsg ---> RS
* DS <--- TopologyMsg --- RS
* Before performing a full handshake sequence, DS searches for best suitable
* RS by making only phase 1 handshake to every RS he knows then closing
* connection. This allows to gather information on available RSs and then
* decide with which RS the full handshake (phase 1 then phase 2) will be
* finally performed.
* @throws NumberFormatException address was invalid
private void connectAsDataServer()
HashMap<String, ServerInfo> rsInfos = new HashMap<String, ServerInfo>();
// May have created a broker with null replication domain for
// unit test purpose.
if (domain != null)
// If a first connect or a connection failure occur, we go through here.
// force status machine to NOT_CONNECTED_STATUS so that monitoring can
// see that we are not connected.
// Stop any existing poller and heartbeat monitor from a previous session.
boolean newServerWithSameGroupId = false;
synchronized (connectPhaseLock)
* Connect to each replication server and get their ServerState then find
* out which one is the best to connect to.
if (debugEnabled())
TRACER.debugInfo("phase 1 : will perform PhaseOneH with each RS in " +
" order to elect the prefered one");
for (String server : servers)
// Connect to server and get reply message
ReplServerStartMsg replServerStartMsg =
performPhaseOneHandshake(server, false);
// Store reply message info in list
if (replServerStartMsg != null)
ServerInfo serverInfo =
new ServerInfo(replServerStartMsg.getServerState(),
rsInfos.put(server, serverInfo);
} // for servers
ReplServerStartMsg replServerStartMsg = null;
if (rsInfos.size() > 0)
// At least one server answered, find the best one.
String bestServer = computeBestReplicationServer(state, rsInfos,
serverId, baseDn, groupId);
// Best found, now initialize connection to this one (handshake phase 1)
if (debugEnabled())
"phase 2 : will perform PhaseOneH with the prefered RS.");
replServerStartMsg = performPhaseOneHandshake(bestServer, true);
if (replServerStartMsg != null) // Handshake phase 1 exchange went well
ServerInfo bestServerInfo = rsInfos.get(bestServer);
// Compute in which status we are starting the session to tell the RS
ServerStatus initStatus =
// Perfom session start (handshake phase 2)
TopologyMsg topologyMsg = performPhaseTwoHandshake(bestServer,
if (topologyMsg != null) // Handshake phase 2 exchange went well
* If we just connected to a RS with a different group id than us
* (because for instance a RS with our group id was unreachable
* while connecting to each RS) but the just received TopologyMsg
* shows that in the same time a RS with our group id connected,
* we must give up the connection to force reconnection that will
* certainly go back to a server with our group id as server with
* our group id have a greater priority for connection (in
* computeBestReplicationServer). In other words, we disconnect to
* connect to a server with our group id. If a server with our
* group id comes back later in the topology, we will be advised
* upon reception of a new TopologyMsg message and we will force
* reconnection at that time to retrieve a server with our group
* id.
byte tmpRsGroupId = bestServerInfo.getGroupId();
boolean someServersWithSameGroupId =
// Really no other server with our group id ?
if ((tmpRsGroupId == groupId) ||
((tmpRsGroupId != groupId) && !someServersWithSameGroupId))
replicationServer = tmpReadableServerName;
maxSendWindow = replServerStartMsg.getWindowSize();
rsGroupId = replServerStartMsg.getGroupId();
rsServerId = replServerStartMsg.getServerId();
rsServerUrl = bestServer;
// May have created a broker with null replication domain for
// unit test purpose.
if (domain != null)
initStatus, replServerStartMsg.getServerState(),
connected = true;
if (getRsGroupId() != groupId)
// Connected to replication server with wrong group id:
// warn user and start poller to recover when a server with
// right group id arrives...
Message message =
Byte.toString(groupId), Integer.toString(rsServerId),
bestServer, Byte.toString(getRsGroupId()),
baseDn.toString(), Integer.toString(serverId));
if (replServerStartMsg.getVersion()
} else
// Detected new RS with our group id: log disconnection to
// inform administrator
Byte.toString(groupId), baseDn.toString(),
// Do not log connection error
newServerWithSameGroupId = true;
} catch (Exception e)
Message message = ERR_COMPUTING_FAKE_OPS.get(
baseDn, bestServer,
e.getLocalizedMessage() + stackTraceToSingleLineString(e));
} finally
if (connected == false)
if (session != null)
} catch (IOException e)
// The session was already closed, just ignore.
session = null;
} // Could perform handshake phase 2 with best
} // Could perform handshake phase 1 with best
} // Reached some servers
if (connected)
// Log a message to let the administrator know that the failure was
// resolved.
// Wakeup all the thread that were waiting on the window
// on the previous connection.
connectionError = false;
if (sendWindow != null)
sendWindow = new Semaphore(maxSendWindow);
rcvWindow = maxRcvWindow;
if ((replServerStartMsg.getGenerationId() == this.getGenerationID()) ||
(replServerStartMsg.getGenerationId() == -1))
Message message =
} else
Message message =
} else
* This server could not find any replicationServer. It's going to start
* in degraded mode. Log a message.
if (!connectionError && !newServerWithSameGroupId)
connectionError = true;
Message message =
* Has the passed RS info list some servers with our group id ?
* @return true if at least one server has the same group id
private boolean hasSomeServerWithSameGroupId(List<RSInfo> rsInfos)
for (RSInfo rsInfo : rsInfos)
if (rsInfo.getGroupId() == this.groupId)
return true;
return false;
* Determines the status we are starting with according to our state and the
* RS state.
* @param rsGenId The generation id of the RS
* @param rsState The server state of the RS
* @param degradedStatusThreshold The degraded status threshold of the RS
* @param dsGenId The local generation id
* @return The initial status
public ServerStatus computeInitialServerStatus(long rsGenId,
ServerState rsState, int degradedStatusThreshold, long dsGenId)
if (rsGenId == -1)
// RS has no generation id
return ServerStatus.NORMAL_STATUS;
} else
if (rsGenId == dsGenId)
// DS and RS have same generation id
// Determine if we are late or not to replay changes. RS uses a
// threshold value for pending changes to be replayed by a DS to
// determine if the DS is in normal status or in degraded status.
// Let's compare the local and remote server state using this threshold
// value to determine if we are late or not
ServerStatus initStatus = ServerStatus.INVALID_STATUS;
int nChanges = ServerState.diffChanges(rsState, state);
if (debugEnabled())
TRACER.debugInfo("RB for dn " + baseDn +
" and with server id " + Integer.toString(serverId) + " computed " +
Integer.toString(nChanges) + " changes late.");
// Check status to know if it is relevant to change the status. Do not
// take RSD lock to test. If we attempt to change the status whereas
// we are in a status that do not allows that, this will be noticed by
// the changeStatusFromStatusAnalyzer method. This allows to take the
// lock roughly only when needed versus every sleep time timeout.
if (degradedStatusThreshold > 0)
if (nChanges >= degradedStatusThreshold)
initStatus = ServerStatus.DEGRADED_STATUS;
} else
initStatus = ServerStatus.NORMAL_STATUS;
} else
// 0 threshold value means no degrading system used (no threshold):
// force normal status
initStatus = ServerStatus.NORMAL_STATUS;
return initStatus;
} else
// DS and RS do not have same generation id
return ServerStatus.BAD_GEN_ID_STATUS;
* Connect to the provided server performing the first phase handshake
* (start messages exchange) and return the reply message from the replication
* server.
* @param server Server to connect to.
* @param keepConnection Do we keep session opened or not after handshake.
* Use true if want to perform handshake phase 2 with the same session
* and keep the session to create as the current one.
* @return The ReplServerStartMsg the server replied. Null if could not
* get an answer.
private ReplServerStartMsg performPhaseOneHandshake(String server,
boolean keepConnection)
ReplServerStartMsg replServerStartMsg = null;
// Parse server string.
int separator = server.lastIndexOf(':');
String port = server.substring(separator + 1);
String hostname = server.substring(0, separator);
ProtocolSession localSession = null;
boolean error = false;
* Open a socket connection to the next candidate.
int intPort = Integer.parseInt(port);
InetSocketAddress serverAddr = new InetSocketAddress(
InetAddress.getByName(hostname), intPort);
if (keepConnection)
tmpReadableServerName = serverAddr.toString();
Socket socket = new Socket();
socket.connect(serverAddr, 500);
localSession = replSessionSecurity.createClientSession(server, socket,
boolean isSslEncryption =
* Send our ServerStartMsg.
ServerStartMsg serverStartMsg = new ServerStartMsg(serverId, baseDn,
maxRcvWindow, heartbeatInterval, state,
ProtocolVersion.getCurrentVersion(), this.getGenerationID(),
* Read the ReplServerStartMsg that should come back.
replServerStartMsg = (ReplServerStartMsg) localSession.receive();
if (debugEnabled())
TRACER.debugInfo("In RB for " + baseDn +
"\nRB HANDSHAKE SENT:\n" + serverStartMsg.toString() +
"\nAND RECEIVED:\n" + replServerStartMsg.toString());
// Sanity check
String repDn = replServerStartMsg.getBaseDn();
if (!(this.baseDn.equals(repDn)))
Message message = ERR_DS_DN_DOES_NOT_MATCH.get(repDn.toString(),
error = true;
* We have sent our own protocol version to the replication server.
* The replication server will use the same one (or an older one
* if it is an old replication server).
protocolVersion = ProtocolVersion.minWithCurrent(
if (!isSslEncryption)
} catch (ConnectException e)
* There was no server waiting on this host:port
* Log a notice and try the next replicationServer in the list
if (!connectionError)
Message message = NOTE_NO_CHANGELOG_SERVER_LISTENING.get(server);
if (keepConnection) // Log error message only for final connection
// the error message is only logged once to avoid overflowing
// the error log
} else if (debugEnabled())
error = true;
} catch (Exception e)
if ( (e instanceof SocketTimeoutException) && debugEnabled() )
TRACER.debugInfo("Timeout trying to connect to RS " + server +
" for dn: " + baseDn);
baseDn, server, e.getLocalizedMessage() +
if (keepConnection) // Log error message only for final connection
} else if (debugEnabled())
error = true;
// Close session if requested
if (!keepConnection || error)
if (localSession != null)
if (debugEnabled())
TRACER.debugInfo("In RB, closing session after phase 1");
} catch (IOException e)
// The session was already closed, just ignore.
localSession = null;
if (error)
replServerStartMsg = null;
} // Be sure to return null.
// If this connection as the one to use for sending and receiving updates,
// store it.
if (keepConnection)
session = localSession;
return replServerStartMsg;
* Connect to the provided server performing the first phase handshake
* (start messages exchange) and return the reply message from the replication
* server.
* @param server Server to connect to.
* @param keepConnection Do we keep session opened or not after handshake.
* Use true if want to perform handshake phase 2 with the same session
* and keep the session to create as the current one.
* @return The ReplServerStartMsg the server replied. Null if could not
* get an answer.
private ReplServerStartMsg performECLPhaseOneHandshake(String server,
boolean keepConnection)
ReplServerStartMsg replServerStartMsg = null;
// Parse server string.
int separator = server.lastIndexOf(':');
String port = server.substring(separator + 1);
String hostname = server.substring(0, separator);
ProtocolSession localSession = null;
boolean error = false;
* Open a socket connection to the next candidate.
int intPort = Integer.parseInt(port);
InetSocketAddress serverAddr = new InetSocketAddress(
InetAddress.getByName(hostname), intPort);
if (keepConnection)
tmpReadableServerName = serverAddr.toString();
Socket socket = new Socket();
socket.connect(serverAddr, 500);
localSession = replSessionSecurity.createClientSession(server, socket,
boolean isSslEncryption =
// Send our start msg.
ServerStartECLMsg serverStartECLMsg = new ServerStartECLMsg(
baseDn, 0, 0, 0, 0,
maxRcvWindow, heartbeatInterval, state,
ProtocolVersion.getCurrentVersion(), this.getGenerationID(),
// Read the ReplServerStartMsg that should come back.
replServerStartMsg = (ReplServerStartMsg) localSession.receive();
if (debugEnabled())
TRACER.debugInfo("In RB for " + baseDn +
"\nRB HANDSHAKE SENT:\n" + serverStartECLMsg.toString() +
"\nAND RECEIVED:\n" + replServerStartMsg.toString());
// Sanity check
String repDn = replServerStartMsg.getBaseDn();
if (!(this.baseDn.equals(repDn)))
Message message = ERR_DS_DN_DOES_NOT_MATCH.get(repDn.toString(),
error = true;
* We have sent our own protocol version to the replication server.
* The replication server will use the same one (or an older one
* if it is an old replication server).
if (keepConnection)
protocolVersion = ProtocolVersion.minWithCurrent(
if (!isSslEncryption)
} catch (ConnectException e)
* There was no server waiting on this host:port
* Log a notice and try the next replicationServer in the list
if (!connectionError)
Message message = NOTE_NO_CHANGELOG_SERVER_LISTENING.get(server);
if (keepConnection) // Log error message only for final connection
// the error message is only logged once to avoid overflowing
// the error log
} else if (debugEnabled())
error = true;
} catch (Exception e)
if ( (e instanceof SocketTimeoutException) && debugEnabled() )
TRACER.debugInfo("Timeout trying to connect to RS " + server +
" for dn: " + baseDn);
baseDn, server, e.getLocalizedMessage() +
if (keepConnection) // Log error message only for final connection
} else if (debugEnabled())
error = true;
// Close session if requested
if (!keepConnection || error)
if (localSession != null)
if (debugEnabled())
TRACER.debugInfo("In RB, closing session after phase 1");
} catch (IOException e)
// The session was already closed, just ignore.
localSession = null;
if (error)
replServerStartMsg = null;
} // Be sure to return null.
// If this connection as the one to use for sending and receiving updates,
// store it.
if (keepConnection)
session = localSession;
return replServerStartMsg;
* Performs the second phase handshake (send StartSessionMsg and receive
* TopologyMsg messages exchange) and return the reply message from the
* replication server.
* @param server Server we are connecting with.
* @param initStatus The status we are starting with
* @return The ReplServerStartMsg the server replied. Null if could not
* get an answer.
private TopologyMsg performECLPhaseTwoHandshake(String server)
TopologyMsg topologyMsg = null;
// Send our Start Session
StartECLSessionMsg startECLSessionMsg = null;
startECLSessionMsg = new StartECLSessionMsg();
/* FIXME:ECL In the handshake phase two, should RS send back a topo msg ?
* Read the TopologyMsg that should come back.
topologyMsg = (TopologyMsg) session.receive();
if (debugEnabled())
TRACER.debugInfo("In RB for " + baseDn +
"\nRB HANDSHAKE SENT:\n" + startECLSessionMsg.toString());
// + "\nAND RECEIVED:\n" + topologyMsg.toString());
// Alright set the timeout to the desired value
connected = true;
} catch (Exception e)
baseDn, server, e.getLocalizedMessage() +
if (session != null)
} catch (IOException ex)
// The session was already closed, just ignore.
session = null;
// Be sure to return null.
topologyMsg = null;
return topologyMsg;
* Performs the second phase handshake (send StartSessionMsg and receive
* TopologyMsg messages exchange) and return the reply message from the
* replication server.
* @param server Server we are connecting with.
* @param initStatus The status we are starting with
* @return The ReplServerStartMsg the server replied. Null if could not
* get an answer.
private TopologyMsg performPhaseTwoHandshake(String server,
ServerStatus initStatus)
TopologyMsg topologyMsg = null;
* Send our StartSessionMsg.
StartSessionMsg startSessionMsg = null;
// May have created a broker with null replication domain for
// unit test purpose.
if (domain != null)
startSessionMsg =
new StartSessionMsg(
} else
startSessionMsg =
new StartSessionMsg(initStatus, new ArrayList<String>());
* Read the TopologyMsg that should come back.
topologyMsg = (TopologyMsg) session.receive();
if (debugEnabled())
TRACER.debugInfo("In RB for " + baseDn +
"\nRB HANDSHAKE SENT:\n" + startSessionMsg.toString() +
"\nAND RECEIVED:\n" + topologyMsg.toString());
// Alright set the timeout to the desired value
} catch (Exception e)
baseDn, server, e.getLocalizedMessage() +
if (session != null)
} catch (IOException ex)
// The session was already closed, just ignore.
session = null;
// Be sure to return null.
topologyMsg = null;
return topologyMsg;
* Returns the replication server that best fits our need so that we can
* connect to it.
* This methods performs some filtering on the group id, then call
* the real search for best server algorithm (searchForBestReplicationServer).
* Note: this method put as public static for unit testing purpose.
* @param myState The local server state.
* @param rsInfos The list of available replication servers and their
* associated information (choice will be made among them).
* @param serverId2 The server id for the suffix we are working for.
* @param baseDn The suffix for which we are working for.
* @param groupId The groupId we prefer being connected to if possible
* @return The computed best replication server.
public static String computeBestReplicationServer(ServerState myState,
HashMap<String, ServerInfo> rsInfos, int serverId2, String baseDn,
byte groupId)
* Preference is given to servers with the requested group id:
* If there are some servers with the requested group id in the provided
* server list, then we run the search algorithm only on them. If no server
* with the requested group id, consider all of them.
// Filter for servers with same group id
HashMap<String, ServerInfo> sameGroupIdRsInfos =
new HashMap<String, ServerInfo>();
for (String repServer : rsInfos.keySet())
ServerInfo serverInfo = rsInfos.get(repServer);
if (serverInfo.getGroupId() == groupId)
sameGroupIdRsInfos.put(repServer, serverInfo);
// Some servers with same group id ?
if (sameGroupIdRsInfos.size() > 0)
return searchForBestReplicationServer(myState, sameGroupIdRsInfos,
serverId2, baseDn);
} else
return searchForBestReplicationServer(myState, rsInfos,
serverId2, baseDn);
* Returns the replication server that best fits our need so that we can
* connect to it.
* Note: this method put as public static for unit testing purpose.
* @param myState The local server state.
* @param rsInfos The list of available replication servers and their
* associated information (choice will be made among them).
* @param serverId2 The server id for the suffix we are working for.
* @param baseDn The suffix for which we are working for.
* @return The computed best replication server.
private static String searchForBestReplicationServer(ServerState myState,
HashMap<String, ServerInfo> rsInfos, int serverId2, String baseDn)
* Find replication servers who are up to date (or more up to date than us,
* if for instance we failed and restarted, having sent some changes to the
* RS but without having time to store our own state) regarding our own
* server id. Then, among them, choose the server that is the most up to
* date regarding the whole topology.
* If no server is up to date regarding our own server id, find the one who
* is the most up to date regarding our server id.
// Should never happen (sanity check)
if ((myState == null) || (rsInfos == null) || (rsInfos.size() < 1) ||
(baseDn == null))
return null;
// Shortcut, if only one server, this is the best
if (rsInfos.size() == 1)
for (String repServer : rsInfos.keySet())
return repServer;
String bestServer = null;
// Servers up to dates with regard to our changes
HashMap<String, ServerState> upToDateServers =
new HashMap<String, ServerState>();
// Servers late with regard to our changes
HashMap<String, ServerState> lateOnes = new HashMap<String, ServerState>();
* Start loop to differenciate up to date servers from late ones.
ChangeNumber myChangeNumber = myState.getMaxChangeNumber(serverId2);
if (myChangeNumber == null)
myChangeNumber = new ChangeNumber(0, 0, serverId2);
for (String repServer : rsInfos.keySet())
ServerState rsState = rsInfos.get(repServer).getServerState();
ChangeNumber rsChangeNumber = rsState.getMaxChangeNumber(serverId2);
if (rsChangeNumber == null)
rsChangeNumber = new ChangeNumber(0, 0, serverId2);
// Store state in right list
if (myChangeNumber.olderOrEqual(rsChangeNumber))
upToDateServers.put(repServer, rsState);
} else
lateOnes.put(repServer, rsState);
if (upToDateServers.size() > 0)
* Some up to date servers, among them, choose either :
* - The local one
* - The one that has the maximum number of changes to send us.
* This is the most up to date one regarding the whole topology.
* This server is the one which has the less
* difference with the topology server state.
* For comparison, we need to compute the difference for each
* server id with the topology server state.
upToDateServers.size(), baseDn, Integer.toString(serverId2));
* If there are local Replication Servers, remove all the other one
* from the list so that we are sure that we choose a local one.
boolean localRS = false;
for (String upServer : upToDateServers.keySet())
if (ReplicationServer.isLocalReplicationServer(upServer))
localRS = true;
if (localRS)
Iterator<String> it = upToDateServers.keySet().iterator();
while (it.hasNext())
if (!ReplicationServer.isLocalReplicationServer(
* First of all, compute the virtual server state for the whole topology,
* which is composed of the most up to date change numbers for
* each server id in the topology.
ServerState topoState = new ServerState();
for (ServerState curState : upToDateServers.values())
Iterator<Integer> it = curState.iterator();
while (it.hasNext())
Integer sId =;
ChangeNumber curSidCn = curState.getMaxChangeNumber(sId);
if (curSidCn == null)
curSidCn = new ChangeNumber(0, 0, sId);
// Update topology state
} // For up to date servers
// Min of the max shifts
long minShift = -1L;
for (String upServer : upToDateServers.keySet())
* Compute the maximum difference between the time of a server id's
* change number and the time of the matching server id's change
* number in the topology server state.
* Note: we could have used the sequence number here instead of the
* timestamp, but this would have caused a problem when the sequence
* number loops and comes back to 0 (computation would have becomen
* meaningless).
long shift = -1L;
ServerState curState = upToDateServers.get(upServer);
Iterator<Integer> it = curState.iterator();
while (it.hasNext())
Integer sId =;
ChangeNumber curSidCn = curState.getMaxChangeNumber(sId);
if (curSidCn == null)
curSidCn = new ChangeNumber(0, 0, sId);
// Cannot be null as checked at construction time
ChangeNumber topoCurSidCn = topoState.getMaxChangeNumber(sId);
// Cannot be negative as topoState computed as being the max CN
// for each server id in the topology
long tmpShift = topoCurSidCn.getTime() - curSidCn.getTime();
if (tmpShift > shift)
shift = tmpShift;
if ((minShift < 0) // First time in loop
|| (shift < minShift))
// This server is even closer to topo state
bestServer = upServer;
minShift = shift;
} // For up to date servers
} else
* We could not find a replication server that has seen all the
* changes that this server has already processed,
// lateOnes cannot be empty
baseDn, lateOnes.size());
// Min of the shifts
long minShift = -1L;
for (String lateServer : lateOnes.keySet())
* Choose the server who is the closest to us regarding our server id
* (this is the most up to date regarding our server id).
ServerState curState = lateOnes.get(lateServer);
ChangeNumber ourSidCn = curState.getMaxChangeNumber(serverId2);
if (ourSidCn == null)
ourSidCn = new ChangeNumber(0, 0, serverId2);
// Cannot be negative as our Cn for our server id is strictly
// greater than those of the servers in late server list
long tmpShift = myChangeNumber.getTime() - ourSidCn.getTime();
if ((minShift < 0) // First time in loop
|| (tmpShift < minShift))
// This sever is even closer to topo state
bestServer = lateServer;
minShift = tmpShift;
} // For late servers
return bestServer;
* Start the heartbeat monitor thread.
private void startRSHeartBeatMonitoring()
// Start a heartbeat monitor thread.
if (heartbeatInterval > 0)
heartbeatMonitor =
new HeartbeatMonitor("Replication Heartbeat Monitor on RS " +
getReplicationServer() + " " + rsServerId + " for " + baseDn +
" in DS " + serverId,
session, heartbeatInterval);
* Starts the same group id poller.
private void startSameGroupIdPoller()
sameGroupIdPoller = new SameGroupIdPoller();
* Stops the same group id poller.
private void stopSameGroupIdPoller()
if (sameGroupIdPoller != null)
sameGroupIdPoller = null;
* Stop the heartbeat monitor thread.
void stopRSHeartBeatMonitoring()
if (heartbeatMonitor != null)
heartbeatMonitor = null;
* restart the ReplicationBroker.
public void reStart()
* Restart the ReplicationServer broker after a failure.
* @param failingSession the socket which failed
public void reStart(ProtocolSession failingSession)
if (failingSession != null)
} catch (IOException e1)
// ignore
if (failingSession == session)
this.connected = false;
rsGroupId = (byte) -1;
rsServerId = -1;
rsServerUrl = null;
while (!this.connected && (!this.shutdown))
} catch (Exception e)
MessageBuilder mb = new MessageBuilder();
baseDn, e.getLocalizedMessage()));
if ((!connected) && (!shutdown))
} catch (InterruptedException e)
// ignore
* Publish a message to the other servers.
* @param msg the message to publish
public void publish(ReplicationMsg msg)
boolean done = false;
while (!done && !shutdown)
if (connectionError)
// It was not possible to connect to any replication server.
// Since the operation was already processed, we have no other
// choice than to return without sending the ReplicationMsg
// and relying on the resend procedure of the connect phase to
// fix the problem when we finally connect.
if (debugEnabled())
debugInfo("ReplicationBroker.publish() Publishing a " +
"message is not possible due to existing connection error.");
boolean credit;
ProtocolSession current_session;
Semaphore currentWindowSemaphore;
// save the session at the time when we acquire the
// sendwindow credit so that we can make sure later
// that the session did not change in between.
// This is necessary to make sure that we don't publish a message
// on a session with a credit that was acquired from a previous
// session.
synchronized (connectPhaseLock)
current_session = session;
currentWindowSemaphore = sendWindow;
if (msg instanceof UpdateMsg)
// Acquiring the window credit must be done outside of the
// connectPhaseLock because it can be blocking and we don't
// want to hold off reconnection in case the connection dropped.
credit =
(long) 500, TimeUnit.MILLISECONDS);
} else
credit = true;
if (credit)
synchronized (connectPhaseLock)
// check the session. If it has changed, some
// deconnection/reconnection happened and we need to restart from
// scratch.
if (session == current_session)
done = true;
if ((!credit) && (currentWindowSemaphore.availablePermits() == 0))
// the window is still closed.
// Send a WindowProbeMsg message to wakeup the receiver in case the
// window update message was lost somehow...
// then loop to check again if connection was closed.
session.publish(new WindowProbeMsg());
} catch (IOException e)
// The receive threads should handle reconnection or
// mark this broker in error. Just retry.
synchronized (connectPhaseLock)
} catch (InterruptedException e1)
// ignore
if (debugEnabled())
debugInfo("ReplicationBroker.publish() " +
"Interrupted exception raised : " + e.getLocalizedMessage());
} catch (InterruptedException e)
// just loop.
if (debugEnabled())
debugInfo("ReplicationBroker.publish() " +
"Interrupted exception raised." + e.getLocalizedMessage());
* Receive a message.
* This method is not multithread safe and should either always be
* called in a single thread or protected by a locking mechanism
* before being called.
* @return the received message
* @throws SocketTimeoutException if the timeout set by setSoTimeout
* has expired
public ReplicationMsg receive() throws SocketTimeoutException
while (shutdown == false)
if (!connected)
ProtocolSession failingSession = session;
ReplicationMsg msg = session.receive();
if (msg instanceof UpdateMsg)
synchronized (this)
if (msg instanceof WindowMsg)
WindowMsg windowMsg = (WindowMsg) msg;
else if (msg instanceof TopologyMsg)
TopologyMsg topoMsg = (TopologyMsg)msg;
return msg;
} catch (SocketTimeoutException e)
throw e;
} catch (Exception e)
if (shutdown == false)
if ((session == null) || (!session.closeInitiated()))
* If we did not initiate the close on our side, log a message.
Message message =
Integer.toString(rsServerId), baseDn.toString(),
return null;
* This method allows to do the necessary computing for the window
* management after treatment by the worker threads.
* This should be called once the replay thread have done their job
* and the window can be open again.
public synchronized void updateWindowAfterReplay()
updateDoneCount ++;
if ((updateDoneCount >= halfRcvWindow) && (session != null))
session.publish(new WindowMsg(updateDoneCount));
rcvWindow += updateDoneCount;
updateDoneCount = 0;
} catch (IOException e)
// Any error on the socket will be handled by the thread calling receive()
// just ignore.
* stop the server.
public void stop()
if (debugEnabled())
debugInfo("ReplicationBroker " + serverId + " is stopping and will" +
" close the connection to replication server " + rsServerId + " for"
+ " domain " + baseDn);
replicationServer = "stopped";
shutdown = true;
connected = false;
rsGroupId = (byte) -1;
rsServerId = -1;
rsServerUrl = null;
if (session != null)
} catch (IOException e)
* Set a timeout value.
* With this option set to a non-zero value, calls to the receive() method
* block for only this amount of time after which a
* is raised.
* The Broker is valid and usable even after such an Exception is raised.
* @param timeout the specified timeout, in milliseconds.
* @throws SocketException if there is an error in the underlying protocol,
* such as a TCP error.
public void setSoTimeout(int timeout) throws SocketException
this.timeout = timeout;
if (session != null)
* Get the name of the replicationServer to which this broker is currently
* connected.
* @return the name of the replicationServer to which this domain
* is currently connected.
public String getReplicationServer()
return replicationServer;
* Get the maximum receive window size.
* @return The maximum receive window size.
public int getMaxRcvWindow()
return maxRcvWindow;
* Get the current receive window size.
* @return The current receive window size.
public int getCurrentRcvWindow()
return rcvWindow;
* Get the maximum send window size.
* @return The maximum send window size.
public int getMaxSendWindow()
return maxSendWindow;
* Get the current send window size.
* @return The current send window size.
public int getCurrentSendWindow()
if (connected)
return sendWindow.availablePermits();
} else
return 0;
* Get the number of times the connection was lost.
* @return The number of times the connection was lost.
public int getNumLostConnections()
return numLostConnections;
* Change some configuration parameters.
* @param replicationServers The new list of replication servers.
* @param window The max window size.
* @param heartbeatInterval The heartBeat interval.
* @return A boolean indicating if the changes
* requires to restart the service.
* @param groupId The new group id to use
public boolean changeConfig(
Collection<String> replicationServers, int window, long heartbeatInterval,
byte groupId)
// These parameters needs to be renegociated with the ReplicationServer
// so if they have changed, that requires restarting the session with
// the ReplicationServer.
Boolean needToRestartSession = false;
// A new session is necessary only when information regarding
// the connection is modified
if ((servers == null) ||
(!(replicationServers.size() == servers.size()
&& replicationServers.containsAll(servers))) ||
window != this.maxRcvWindow ||
heartbeatInterval != this.heartbeatInterval ||
(groupId != this.groupId))
needToRestartSession = true;
this.servers = replicationServers;
this.rcvWindow = window;
this.maxRcvWindow = window;
this.halfRcvWindow = window / 2;
this.heartbeatInterval = heartbeatInterval;
this.groupId = groupId;
return needToRestartSession;
* Get the version of the replication protocol.
* @return The version of the replication protocol.
public short getProtocolVersion()
return protocolVersion;
* Check if the broker is connected to a ReplicationServer and therefore
* ready to received and send Replication Messages.
* @return true if the server is connected, false if not.
public boolean isConnected()
return connected;
private boolean debugEnabled()
return true;
private static final void debugInfo(String s)
logError(Message.raw(Category.SYNC, Severity.NOTICE, s));
* Determine whether the connection to the replication server is encrypted.
* @return true if the connection is encrypted, false otherwise.
public boolean isSessionEncrypted()
boolean isEncrypted = false;
if (session != null)
return session.isEncrypted();
return isEncrypted;
* In case we are connected to a RS with a different group id, we use this
* thread to poll presence of a RS with the same group id as ours. If a RS
* with the same group id is available, we close the session to force
* reconnection. Reconnection will choose a server with the same group id.
private class SameGroupIdPoller extends DirectoryThread
private boolean sameGroupIdPollershutdown = false;
private boolean terminated = false;
// Sleep interval in ms
private static final int SAME_GROUP_ID_POLLER_PERIOD = 5000;
public SameGroupIdPoller()
super("Replication Broker Same Group Id Poller for " + baseDn.toString() +
" and group id " + groupId + " in server id " + serverId);
* Wait for the completion of the same group id poller.
public void waitForShutdown()
while (terminated == false)
} catch (InterruptedException e)
// exit the loop if this thread is interrupted.
* Shutdown the same group id poller.
public void shutdown()
sameGroupIdPollershutdown = true;
* Permanently look for RS with our group id and if found, break current
* connection to force reconnection to a new server with the right group id.
public void run()
boolean done = false;
if (debugEnabled())
TRACER.debugInfo("SameGroupIdPoller for: " + baseDn.toString() +
" started.");
while ((!done) && (!sameGroupIdPollershutdown))
// Sleep some time between checks
} catch (InterruptedException e)
// Stop as we are interrupted
sameGroupIdPollershutdown = true;
synchronized (connectPhaseLock)
if (debugEnabled())
TRACER.debugInfo("Running SameGroupIdPoller for: " +
if (session != null) // Check only if not already disconnected
for (String server : servers)
// Do not ask the RS we are connected to as it has for sure the
// wrong group id
if (server.equals(rsServerUrl))
// Connect to server and get reply message
ReplServerStartMsg replServerStartMsg =
performPhaseOneHandshake(server, false);
// Store reply message info in list
if (replServerStartMsg != null)
if (groupId == replServerStartMsg.getGroupId())
// Found one server with the same group id as us, disconnect
// session to force reconnection to a server with same group
// id.
Byte.toString(groupId), baseDn.toString(),
} catch (Exception e)
// The session was already closed, just ignore.
session = null;
done = true; // Terminates thread as did its job.
} // for server
terminated = true;
if (debugEnabled())
TRACER.debugInfo("SameGroupIdPoller for: " + baseDn.toString() +
" terminated.");
* Signals the RS we just entered a new status.
* @param newStatus The status the local DS just entered
public void signalStatusChange(ServerStatus newStatus)
ChangeStatusMsg csMsg = new ChangeStatusMsg(ServerStatus.INVALID_STATUS,
} catch (IOException ex)
Message message = ERR_EXCEPTION_SENDING_CS.get(
ex.getLocalizedMessage() + stackTraceToSingleLineString(ex));
* Sets the group id of the broker.
* @param groupId The new group id.
public void setGroupId(byte groupId)
this.groupId = groupId;
* Gets the info for DSs in the topology (except us).
* @return The info for DSs in the topology (except us)
public List<DSInfo> getDsList()
return dsList;
* Gets the info for RSs in the topology (except the one we are connected
* to).
* @return The info for RSs in the topology (except the one we are connected
* to)
public List<RSInfo> getRsList()
return rsList;
* Processes an incoming TopologyMsg.
* Updates the structures for the local view of the topology.
* @param topoMsg The topology information received from RS.
public void receiveTopo(TopologyMsg topoMsg)
// Store new lists
dsList = topoMsg.getDsList();
rsList = topoMsg.getRsList();
if (domain != null)
for (DSInfo info : dsList)
for (String attr : info.getEclIncludes())
if (debugEnabled())
TRACER.debugInfo("domain: " + domain.getServiceID() +
" EclIncludes" + domain.getEclInclude());
* Check if the broker could not find any Replication Server and therefore
* connection attempt failed.
* @return true if the server could not connect to any Replication Server.
public boolean hasConnectionError()
return connectionError;
* Starts publishing to the RS the current timestamp used in this server.
public void startChangeTimeHeartBeatPublishing()
// Start a CN heartbeat thread.
if (changeTimeHeartbeatSendInterval > 0)
ctHeartbeatPublisherThread =
new CTHeartbeatPublisherThread(
"Replication CN Heartbeat Thread started for " +
baseDn + " with " + getReplicationServer(),
session, changeTimeHeartbeatSendInterval, serverId);
TRACER.debugInfo(this +
" is not configured to send CN heartbeat interval");
* Stops publishing to the RS the current timestamp used in this server.
public void stopChangeTimeHeartBeatPublishing()
if (ctHeartbeatPublisherThread != null)
ctHeartbeatPublisherThread = null;