ReplicationBroker.java revision e371bf13d3f899f1de594880fafe67b990d65899
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at
* trunk/opends/resource/legal-notices/OpenDS.LICENSE. If applicable,
* add the following below this CDDL HEADER, with the fields enclosed
* by brackets "[]" replaced with your own identifying information:
* Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
*
* Copyright 2006-2010 Sun Microsystems, Inc.
* Portions Copyright 2011-2013 ForgeRock AS
*/
/**
* The broker for Multi-master Replication.
*/
public class ReplicationBroker
{
/**
* The tracer object for the debug logger.
*/
private volatile boolean shutdown = false;
/**
* Replication server URLs under this format: "<code>hostname:port</code>".
*/
private volatile boolean connected = false;
/**
* String reported under CSN=monitor when there is no connected RS.
*/
private final ServerState state;
private final int serverId;
private Semaphore sendWindow;
private int maxSendWindow;
private int rcvWindow = 100;
private int maxRcvWindow = rcvWindow;
private int timeout = 0;
private short protocolVersion;
private ReplSessionSecurity replSessionSecurity;
/** My group id. */
private byte groupId = -1;
/** The group id of the RS we are connected to. */
private byte rsGroupId = -1;
/** The server id of the RS we are connected to. */
/** The server URL of the RS we are connected to. */
/** Our replication domain. */
/**
* This object is used as a conditional event to be notified about
* the reception of monitor information from the Replication Server.
*/
/**
* A Map containing the ServerStates of all the replicas in the topology
* as seen by the ReplicationServer the last time it was polled or the last
* time it published monitoring information.
*/
/**
* The expected duration in milliseconds between heartbeats received
* from the replication server. Zero means heartbeats are off.
*/
private long heartbeatInterval = 0;
/**
* A thread to monitor heartbeats on the session.
*/
/**
* The number of times the connection was lost.
*/
private int numLostConnections = 0;
/**
* When the broker cannot connect to any replication server
* it log an error and keeps continuing every second.
* This boolean is set when the first failure happens and is used
* to avoid repeating the error message for further failure to connect
* and to know that it is necessary to print a new message when the broker
* finally succeed to connect.
*/
private volatile boolean connectionError = false;
/**
* The thread that publishes messages to the RS containing the current
* change time of this DS.
*/
/**
* The expected period in milliseconds between these messages are sent
* to the replication server. Zero means heartbeats are off.
*/
private long changeTimeHeartbeatSendInterval = 0;
/*
* Properties for the last topology info received from the network.
*/
// Info for other DSs.
// Warning: does not contain info for us (for our server id)
private volatile long generationID;
private volatile int updateDoneCount = 0;
private volatile boolean connectRequiresRecovery = false;
/**
* The map of replication server info initialized at connection time and
* regularly updated. This is used to decide to which best suitable
* replication server one wants to connect. Key: replication server id Value:
* replication server info for the matching replication server id
*/
= null;
/**
* This integer defines when the best replication server checking algorithm
* should be engaged.
* Every time a monitoring message (each monitoring publisher period) is
* received, it is incremented. When it reaches 2, we run the checking
* algorithm to see if we must reconnect to another best replication server.
* Then we reset the value to 0. But when a topology message is received, the
* integer is reset to 0. This ensures that we wait at least one monitoring
* publisher period before running the algorithm, but also that we wait at
* least for a monitoring period after the last received topology message
* (topology stabilization).
*/
private int mustRunBestServerCheckingAlgorithm = 0;
/**
* The monitor provider for this replication domain. The name of the monitor
* includes the local address and must therefore be re-registered every time
* the session is re-established or destroyed. The monitor provider can only
* be created (i.e. non-null) if there is a replication domain, which is not
* the case in unit tests.
*/
private final ReplicationMonitor monitor;
/**
* Creates a new ReplicationServer Broker for a particular ReplicationDomain.
*
* @param replicationDomain The replication domain that is creating us.
* @param state The ServerState that should be used by this broker
* when negotiating the session with the replicationServer.
* @param baseDn The base DN that should be used by this broker
* when negotiating the session with the replicationServer.
* @param serverID2 The server ID that should be used by this broker
* when negotiating the session with the replicationServer.
* @param window The size of the send and receive window to use.
* @param generationId The generationId for the server associated to the
* provided serverId and for the domain associated to the provided baseDN.
* @param heartbeatInterval The interval (in ms) between heartbeats requested
* from the replicationServer, or zero if no heartbeats are requested.
* @param replSessionSecurity The session security configuration.
* @param groupId The group id of our domain.
* @param changeTimeHeartbeatInterval The interval (in ms) between Change
* time heartbeats are sent to the RS,
* or zero if no CSN heartbeat should be sent.
*/
long generationId, long heartbeatInterval,
{
this.domain = replicationDomain;
this.generationID = generationId;
this.heartbeatInterval = heartbeatInterval;
this.maxRcvWindow = window;
/*
* Only create a monitor if there is a replication domain (this is not the
* case in some unit tests).
*/
}
/**
* Start the ReplicationBroker.
*/
public void start()
{
synchronized (startStopLock)
{
shutdown = false;
this.rcvWindow = this.maxRcvWindow;
this.connect();
}
}
/**
* Start the ReplicationBroker.
*
* @param replicationServers list of servers used
*/
{
synchronized (startStopLock)
{
// Open Socket to the ReplicationServer Send the Start message
shutdown = false;
{
}
this.rcvWindow = this.maxRcvWindow;
this.connect();
}
}
/**
* Gets the group id of the RS we are connected to.
* @return The group id of the RS we are connected to
*/
public byte getRsGroupId()
{
return rsGroupId;
}
/**
* Gets the server id of the RS we are connected to.
* @return The server id of the RS we are connected to
*/
public Integer getRsServerId()
{
return rsServerId;
}
/**
* Gets the server id.
* @return The server id
*/
public int getServerId()
{
return serverId;
}
/**
* Gets the server id.
* @return The server id
*/
private long getGenerationID()
{
{
// Update the generation id
}
return generationID;
}
/**
* Set the generation id - for test purpose.
* @param generationID The generation id
*/
public void setGenerationID(long generationID)
{
this.generationID = generationID;
}
/**
* Gets the server url of the RS we are connected to.
* @return The server url of the RS we are connected to
*/
public String getRsServerUrl()
{
return rsServerUrl;
}
/**
* Sets the locally configured flag for the passed ReplicationServerInfo
* object, analyzing the local configuration.
* @param replicationServerInfo the Replication server to check and update
*/
private void updateRSInfoLocallyConfiguredStatus(
{
// Determine if the passed ReplicationServerInfo has a URL that is present
// in the locally configured replication servers
{
// The ReplicationServerInfo has been generated from a server with
// no URL in TopologyMsg (i.e: with replication protocol version < 4):
// ignore this server as we do not know how to connect to it
return;
}
{
{
// This RS is locally configured, mark this
return;
}
}
}
/**
* Compares 2 replication servers addresses and returns true if they both
* represent the same replication server instance.
* @param rs1Url Replication server 1 address
* @param rs2Url Replication server 2 address
* @return True if both replication server addresses represent the same
* replication server instance, false otherwise.
*/
{
// Get and compare ports of RS1 and RS2
if (separator1 < 0)
{
// Not a RS url: should not happen
return false;
}
if (separator2 < 0)
{
// Not a RS url: should not happen
return false;
}
{
return false;
}
// Get and compare addresses of RS1 and RS2
final InetAddress[] rs1Addresses;
try
{
// Normalize local address to null.
}
catch (UnknownHostException ex)
{
// Unknown RS: should not happen
return false;
}
final InetAddress[] rs2Addresses;
try
{
// Normalize local address to null.
}
catch (UnknownHostException ex)
{
// Unknown RS: should not happen
return false;
}
// Now compare addresses, if at least one match, this is the same server.
{
// Both local addresses.
return true;
}
{
// One local address and one non-local.
return false;
}
else
{
// Both non-local addresses: check for overlap.
{
{
{
return true;
}
}
}
}
return false;
}
/**
* Bag class for keeping info we get from a replication server in order to
* compute the best one to connect to. This is in fact a wrapper to a
* ReplServerStartMsg (V3) or a ReplServerStartDSMsg (V4). This can also be
* updated with a info coming from received topology messages or monitoring
* messages.
*/
public static class ReplicationServerInfo
{
private short protocolVersion;
private long generationId;
private byte groupId = -1;
private int serverId;
// Received server URL
private int windowSize;
private boolean sslEncryption;
private int degradedStatusThreshold = -1;
// Keeps the 1 value if created with a ReplServerStartMsg
private int weight = 1;
// Keeps the 0 value if created with a ReplServerStartMsg
private int connectedDSNumber = 0;
// Is this RS locally configured ? (the RS is recognized as a usable server)
private boolean locallyConfigured = true;
/**
* Create a new instance of ReplicationServerInfo wrapping the passed
* message.
* @param msg Message to wrap.
* @param server Override serverURL.
* @return The new instance wrapping the passed message.
* @throws IllegalArgumentException If the passed message has an unexpected
* type.
*/
public static ReplicationServerInfo newInstance(
{
return rsInfo;
}
/**
* Create a new instance of ReplicationServerInfo wrapping the passed
* message.
* @param msg Message to wrap.
* @return The new instance wrapping the passed message.
* @throws IllegalArgumentException If the passed message has an unexpected
* type.
*/
public static ReplicationServerInfo newInstance(
{
if (msg instanceof ReplServerStartMsg)
{
// This is a ReplServerStartMsg (RS uses protocol V3 or under)
return new ReplicationServerInfo(replServerStartMsg);
} else if (msg instanceof ReplServerStartDSMsg)
{
// This is a ReplServerStartDSMsg (RS uses protocol V4 or higher)
return new ReplicationServerInfo(replServerStartDSMsg);
}
// Unsupported message type: should not happen
throw new IllegalArgumentException("Unexpected PDU type: " +
}
/**
* Constructs a ReplicationServerInfo object wrapping a
* {@link ReplServerStartMsg}.
*
* @param replServerStartMsg
* The {@link ReplServerStartMsg} this object will wrap.
*/
{
this.degradedStatusThreshold =
}
/**
* Constructs a ReplicationServerInfo object wrapping a
* {@link ReplServerStartDSMsg}.
*
* @param replServerStartDSMsg
* The {@link ReplServerStartDSMsg} this object will wrap.
*/
{
this.degradedStatusThreshold =
}
/**
* Get the server state.
* @return The server state
*/
public ServerState getServerState()
{
return serverState;
}
/**
* get the group id.
* @return The group id
*/
public byte getGroupId()
{
return groupId;
}
/**
* Get the server protocol version.
* @return the protocolVersion
*/
public short getProtocolVersion()
{
return protocolVersion;
}
/**
* Get the generation id.
* @return the generationId
*/
public long getGenerationId()
{
return generationId;
}
/**
* Get the server id.
* @return the serverId
*/
public int getServerId()
{
return serverId;
}
/**
* Get the server URL.
* @return the serverURL
*/
public String getServerURL()
{
return serverURL;
}
/**
* Get the base dn.
* @return the baseDn
*/
{
return baseDn;
}
/**
* Get the window size.
* @return the windowSize
*/
public int getWindowSize()
{
return windowSize;
}
/**
* Get the ssl encryption.
* @return the sslEncryption
*/
public boolean isSslEncryption()
{
return sslEncryption;
}
/**
* Get the degraded status threshold.
* @return the degradedStatusThreshold
*/
public int getDegradedStatusThreshold()
{
return degradedStatusThreshold;
}
/**
* Get the weight.
* @return the weight. Null if this object is a wrapper for
* a ReplServerStartMsg.
*/
public int getWeight()
{
return weight;
}
/**
* Get the connected DS number.
* @return the connectedDSNumber. Null if this object is a wrapper for
* a ReplServerStartMsg.
*/
public int getConnectedDSNumber()
{
return connectedDSNumber;
}
/**
* Constructs a new replication server info with the passed RSInfo
* internal values and the passed connected DSs.
* @param rsInfo The RSinfo to use for the update
* @param connectedDSs The new connected DSs
*/
{
this.connectedDSs = connectedDSs;
this.serverState = new ServerState();
}
/**
* Converts the object to a RSInfo object.
* @return The RSInfo object matching this object.
*/
{
}
/**
* Updates replication server info with the passed RSInfo internal values
* and the passed connected DSs.
* @param rsInfo The RSinfo to use for the update
* @param connectedDSs The new connected DSs
*/
{
this.connectedDSs = connectedDSs;
}
/**
* Updates replication server info with the passed server state.
* @param serverState The ServerState to use for the update
*/
{
if (this.serverState != null)
{
} else
{
this.serverState = serverState;
}
}
/**
* Get the getConnectedDSs.
* @return the getConnectedDSs
*/
{
return connectedDSs;
}
/**
* Gets the locally configured status for this RS.
* @return the locallyConfigured
*/
public boolean isLocallyConfigured()
{
return locallyConfigured;
}
/**
* Sets the locally configured status for this RS.
* @param locallyConfigured the locallyConfigured to set
*/
public void setLocallyConfigured(boolean locallyConfigured)
{
this.locallyConfigured = locallyConfigured;
}
/**
* Returns a string representation of this object.
* @return A string representation of this object.
*/
{
+ " GroupId:" + this.groupId;
}
}
private void connect()
{
if (this.baseDn.compareToIgnoreCase(
{
connectAsECL();
} else
{
}
}
/**
* Contacts all replication servers to get information from them and being
* able to choose the more suitable.
* @return the collected information.
*/
{
{
// Connect to server and get info about it
performPhaseOneHandshake(serverUrl, false, false);
// Store server info in list
if (replicationServerInfo != null)
{
}
}
return rsInfos;
}
/**
* Special aspects of connecting as ECL (External Change Log) compared to
* connecting as data server are :
* <ul>
* <li>1 single RS configured</li>
* <li>so no choice of the preferred RS</li>
* <li>?? Heartbeat</li>
* <li>Start handshake is :
*
* <pre>
* Broker ---> StartECLMsg ---> RS
* <---- ReplServerStartMsg ---
* ---> StartSessionECLMsg --> RS
* </pre>
*
* </li>
* </ul>
*/
private void connectAsECL()
{
// FIXME:ECL List of RS to connect is for now limited to one RS only
{
}
}
/**
* Connect to a ReplicationServer.
*
* Handshake sequences between a DS and a RS is divided into 2 logical
* consecutive phases (phase 1 and phase 2). DS always initiates connection
* and always sends first message:
*
* DS<->RS:
* -------
*
* phase 1:
* DS --- ServerStartMsg ---> RS
* DS <--- ReplServerStartDSMsg --- RS
* phase 2:
* DS --- StartSessionMsg ---> RS
* DS <--- TopologyMsg --- RS
*
* Before performing a full handshake sequence, DS searches for best suitable
* RS by making only phase 1 handshake to every RS he knows then closing
* connection. This allows to gather information on available RSs and then
* decide with which RS the full handshake (phase 1 then phase 2) will be
* finally performed.
*
* @throws NumberFormatException address was invalid
*/
private void connectAsDataServer()
{
/*
May have created a broker with null replication domain for
unit test purpose.
*/
{
/*
If a first connect or a connection failure occur, we go through here.
force status machine to NOT_CONNECTED_STATUS so that monitoring can
see that we are not connected.
*/
}
/*
Stop any existing heartbeat monitor and changeTime publisher
from a previous session.
*/
synchronized (connectPhaseLock)
{
/*
* Connect to each replication server and get their ServerState then find
* out which one is the best to connect to.
*/
if (debugEnabled())
+ " phase 1 : will perform PhaseOneH with each RS in "
+ " order to elect the preferred one");
// Get info from every available replication servers
{
// At least one server answered, find the best one.
// Best found, now initialize connection to this one (handshake phase 1)
if (debugEnabled())
+ " phase 2 : will perform PhaseOneH with the preferred RS="
+ electedRsInfo);
electedRsInfo.getServerURL(), true, false);
if (electedRsInfo != null)
{
/*
Update replication server info with potentially more up to date
data (server state for instance may have changed)
*/
// Handshake phase 1 exchange went well
// Compute in which status we are starting the session to tell the RS
getGenerationID());
// Perform session start (handshake phase 2)
{
} // Could perform handshake phase 2 with best
} // Could perform handshake phase 1 with best
} // Reached some servers
// connected is set by connectToReplicationServer()
// and electedRsInfo isn't null then. Check anyway
{
{
getGenerationID());
} else
{
}
} else
{
/*
* This server could not find any replicationServer. It's going to start
* in degraded mode. Log a message.
*/
connected = false;
if (!connectionError)
{
connectionError = true;
{
", "));
}
else
{
}
}
}
}
}
/**
* Connects to a replication server.
*
* @param rsInfo
* the Replication Server to connect to
* @param initStatus
* The status to enter the state machine with
* @param topologyMsg
* the message containing the topology information
*/
{
try
{
/*
Log a message to let the administrator know that the failure
was resolved.
Wake up all the thread that were waiting on the window
on the previous connection.
*/
connectionError = false;
if (sendWindow != null)
{
/*
* Fix (hack) for OPENDJ-401: we want to ensure that no threads holding
* this semaphore will get blocked when they acquire it. However, we
* also need to make sure that we don't overflow the semaphore by
* releasing too many permits.
*/
{
/*
* At least 2^29 acquisitions would need to occur for this to be
* insufficient. In addition, at least 2^30 releases would need to
* occur for this to potentially overflow. Hopefully this is unlikely
* to happen.
*/
}
}
connected = true;
/*
May have created a broker with null replication domain for
unit test purpose.
*/
{
.getGenerationId(), session);
}
if (getRsGroupId() != groupId)
{
/*
Connected to replication server with wrong group id:
warn user and start heartbeat monitor to recover when a server
with the right group id shows up.
*/
}
if (rsInfo.getProtocolVersion() >=
{
}
}
catch (Exception e)
{
+ stackTraceToSingleLineString(e));
}
finally
{
if (!connected)
{
}
}
}
/**
* Determines the status we are starting with according to our state and the
* RS state.
*
* @param rsGenId The generation id of the RS
* @param rsState The server state of the RS
* @param degradedStatusThreshold The degraded status threshold of the RS
* @param dsGenId The local generation id
* @return The initial status
*/
{
if (rsGenId == -1)
{
// RS has no generation id
return ServerStatus.NORMAL_STATUS;
} else
{
{
/*
DS and RS have same generation id
Determine if we are late or not to replay changes. RS uses a
threshold value for pending changes to be replayed by a DS to
determine if the DS is in normal status or in degraded status.
Let's compare the local and remote server state using this threshold
value to determine if we are late or not
*/
if (debugEnabled())
{
}
/*
Check status to know if it is relevant to change the status. Do not
take RSD lock to test. If we attempt to change the status whereas
we are in a status that do not allows that, this will be noticed by
the changeStatusFromStatusAnalyzer method. This allows to take the
lock roughly only when needed versus every sleep time timeout.
*/
if (degradedStatusThreshold > 0)
{
if (nChanges >= degradedStatusThreshold)
{
} else
{
}
} else
{
/*
0 threshold value means no degrading system used (no threshold):
force normal status
*/
}
return initStatus;
} else
{
// DS and RS do not have same generation id
return ServerStatus.BAD_GEN_ID_STATUS;
}
}
}
/**
* Connect to the provided server performing the first phase handshake (start
* messages exchange) and return the reply message from the replication
* server, wrapped in a ReplicationServerInfo object.
*
* @param server
* Server to connect to.
* @param keepConnection
* Do we keep session opened or not after handshake. Use true if want
* to perform handshake phase 2 with the same session and keep the
* session to create as the current one.
* @param isECL
* Indicates whether or not the an ECL handshake is to be performed.
* @return The answer from the server . Null if could not get an answer.
*/
{
boolean hasConnected = false;
try
{
/*
* Open a socket connection to the next candidate.
*/
socket.setTcpNoDelay(true);
boolean isSslEncryption = replSessionSecurity
// Send our ServerStartMsg.
+ socket.getLocalPort();
if (!isECL)
{
}
else
{
}
// Read the ReplServerStartMsg or ReplServerStartDSMsg that should
// come back.
if (debugEnabled())
{
}
// Wrap received message in a server info object
// Sanity check
{
this.baseDn);
return null;
}
/*
* We have sent our own protocol version to the replication server. The
* replication server will use the same one (or an older one if it is an
* old replication server).
*/
.getProtocolVersion());
if (keepConnection)
{
}
if (!isSslEncryption)
{
}
hasConnected = true;
// If this connection as the one to use for sending and receiving
// updates, store it.
if (keepConnection)
{
}
return replServerInfo;
}
catch (ConnectException e)
{
return null;
}
catch (SocketTimeoutException e)
{
return null;
}
catch (Exception e)
{
return null;
}
finally
{
if (!hasConnected || !keepConnection)
{
if (localSession != null)
{
}
{
try
{
}
catch (IOException e)
{
// Ignore.
}
}
}
{
// There was no server waiting on this host:port Log a notice and try
// the next replicationServer in the list
if (!connectionError)
{
if (keepConnection) // Log error message only for final connection
{
// the error message is only logged once to avoid overflowing
// the error log
}
if (debugEnabled())
{
}
}
}
}
}
/**
* Performs the second phase handshake for External Change Log (send
* StartSessionMsg and receive TopologyMsg messages exchange) and return the
* reply message from the replication server.
*
* @param server Server we are connecting with.
* @return The ReplServerStartMsg the server replied. Null if could not
* get an answer.
*/
{
try
{
// Send our Start Session
/* FIXME:ECL In the handshake phase two, should RS send back a topo msg ?
* Read the TopologyMsg that should come back.
topologyMsg = (TopologyMsg) session.receive();
*/
if (debugEnabled())
{
}
// Alright set the timeout to the desired value
connected = true;
} catch (Exception e)
{
// Be sure to return null.
topologyMsg = null;
}
return topologyMsg;
}
/**
* Performs the second phase handshake (send StartSessionMsg and receive
* TopologyMsg messages exchange) and return the reply message from the
* replication server.
*
* @param server Server we are connecting with.
* @param initStatus The status we are starting with
* @return The ReplServerStartMsg the server replied. Null if could not
* get an answer.
*/
{
try
{
/*
* Send our StartSessionMsg.
*/
// May have created a broker with null replication domain for
// unit test purpose.
{
new StartSessionMsg(
domain.getRefUrls(),
}
else
{
}
/*
* Read the TopologyMsg that should come back.
*/
if (debugEnabled())
{
}
// Alright set the timeout to the desired value
} catch (Exception e)
{
// Be sure to return null.
topologyMsg = null;
}
return topologyMsg;
}
/**
* Returns the replication server that best fits our need so that we can
* connect to it or determine if we must disconnect from current one to
* re-connect to best server.
*
* Note: this method is static for test purpose (access from unit tests)
*
*
* @param firstConnection True if we run this method for the very first
* connection of the broker. False if we run this method to determine if the
* replication server we are currently connected to is still the best or not.
* @param rsServerId The id of the replication server we are currently
* connected to. Only used when firstConnection is false.
* @param myState The local server state.
* @param rsInfos The list of available replication servers and their
* associated information (choice will be made among them).
* @param localServerId The server id for the suffix we are working for.
* @param groupId The groupId we prefer being connected to if possible
* @param generationId The generation id we are using
* @return The computed best replication server. If the returned value is
* null, the best replication server is undetermined but the local server must
* disconnect (so the best replication server is another one than the current
* one). Null can only be returned when firstConnection is false.
*/
public static ReplicationServerInfo computeBestReplicationServer(
byte groupId, long generationId)
{
// Shortcut, if only one server, this is the best
{
}
/**
* Apply some filtering criteria to determine the best servers list from
* the available ones. The ordered list of criteria is (from more important
* to less important):
* - replication server has the same group id as the local DS one
* - replication server has the same generation id as the local DS one
* - replication server is up to date regarding changes generated by the
* local DS
* - replication server in the same VM as local DS one
*/
/*
The list of best replication servers is filtered with each criteria. At
each criteria, the list is replaced with the filtered one if there
are some servers from the filtering, otherwise, the list is left as is
and the new filtering for the next criteria is applied and so on.
Use only servers locally configured: those are servers declared in
the local configuration. When the current method is called, for
sure, at least one server from the list is locally configured
*/
// Some servers with same group id ?
// Some servers with same generation id ?
{
// If some servers with the right generation id this is useful to
// run the local DS change criteria
}
// Some servers in the local VM or local host?
/**
* Now apply the choice base on the weight to the best servers list
*/
{
}
if (firstConnection)
{
// We are not connected to a server yet
} else
{
/*
We are already connected to a RS: compute the best RS as far as the
weights is concerned. If this is another one, some DS must
disconnect.
*/
}
}
/**
* If the filtered Map is not empty then it is returned, else return the
* original unfiltered Map.
*
* @return the best fit Map between the filtered Map and the original
* unfiltered Map.
*/
Map<K, V> unfilteredMap)
{
if (!filteredMap.isEmpty())
{
return filteredMap;
}
return unfilteredMap;
}
/**
* Creates a new list that contains only replication servers that are locally
* configured.
* @param bestServers The list of replication servers to filter
* @return The sub list of replication servers locally configured
*/
{
{
{
}
}
return result;
}
/**
* Creates a new list that contains only replication servers that have the
* passed group id, from a passed replication server list.
* @param bestServers The list of replication servers to filter
* @param groupId The group id that must match
* @return The sub list of replication servers matching the requested group id
* (which may be empty)
*/
{
{
{
}
}
return result;
}
/**
* Creates a new list that contains only replication servers that have the
* provided generation id, from a provided replication server list.
* When the selected replication servers have no change (empty serverState)
* then the 'empty'(generationId==-1) replication servers are also included
* in the result list.
*
* @param bestServers The list of replication servers to filter
* @param generationId The generation id that must match
* @return The sub list of replication servers matching the requested
* generation id (which may be empty)
*/
{
boolean emptyState = true;
{
{
emptyState = false;
}
}
if (emptyState)
{
// If the RS with a generationId have all an empty state,
// then the 'empty'(genId=-1) RSes are also candidate
{
{
}
}
}
return result;
}
/**
* Creates a new list that contains only replication servers that have the
* latest changes from the passed DS, from a passed replication server list.
* @param bestServers The list of replication servers to filter
* @param localState The state of the local DS
* @param localServerId The server id to consider for the changes
* @return The sub list of replication servers that have the latest changes
* from the passed DS (which may be empty)
*/
int localServerId)
{
// Extract the CSN of the latest change generated by the local server
{
}
/**
* Find replication servers who are up to date (or more up to date than us,
* if for instance we failed and restarted, having sent some changes to the
* RS but without having time to store our own state) regarding our own
* server id. If some servers more up to date, prefer this list but take
* only the latest CSN.
*/
{
{
}
// Has this replication server the latest local change ?
{
{
// This replication server has exactly the latest change from the
// local server
} else
{
// This replication server is even more up to date than the local
// server
if (latestRsCSN == null)
{
// Initialize the latest CSN
latestRsCSN = rsCSN;
}
{
{
} else
{
// This RS is even more up to date, clear the list and store this
// new RS
latestRsCSN = rsCSN;
}
}
}
}
}
{
// Prefer servers more up to date than local server
return moreUpToDateServers;
}
return upToDateServers;
}
/**
* Creates a new list that contains only replication servers that are on the
* same host as the local DS, from a passed replication server list. This
* method will gives priority to any replication server which is in the same
* VM as this DS.
*
* @param bestServers
* The list of replication servers to filter
* @return The sub list of replication servers being on the same host as the
* local DS (which may be empty)
*/
{
/*
* Initially look for all servers on the same host. If we find one in the
* same VM, then narrow the search.
*/
boolean filterServersInSameVM = false;
{
if (separator > 0)
{
if (isLocalAddress(hostname))
{
{
// An RS in the same VM will always have priority.
if (!filterServersInSameVM)
{
// Narrow the search to only include servers in this VM.
filterServersInSameVM = true;
}
}
else if (!filterServersInSameVM)
{
}
else
{
// Skip: we have found some RSs in the same VM, but this RS is not.
}
}
}
}
return result;
}
/**
* Computes the best replication server the local server should be connected
* to so that the load is correctly spread across the topology, following the
* weights guidance.
* Warning: This method is expected to be called with at least 2 servers in
* bestServers
* Note: this method is static for test purpose (access from unit tests)
* @param bestServers The list of replication servers to consider
* @param currentRsServerId The replication server the local server is
* currently connected to. -1 if the local server is not yet connected
* to any replication server.
* @param localServerId The server id of the local server. This is not used
* when it is not connected to a replication server
* (currentRsServerId = -1)
* @return The replication server the local server should be connected to
* as far as the weight is concerned. This may be the currently used one if
* the weight is correctly spread. If the returned value is null, the best
* replication server is undetermined but the local server must disconnect
* (so the best replication server is another one than the current one).
*/
public static ReplicationServerInfo computeBestServerForWeight(
int localServerId)
{
/*
* - Compute the load goal of each RS, deducing it from the weights affected
* to them.
* - Compute the current load of each RS, deducing it from the DSs
* currently connected to them.
* - Compute the differences between the load goals and the current loads of
* the RSs.
*/
// Sum of the weights
int sumOfWeights = 0;
// Sum of the connected DSs
int sumOfConnectedDSs = 0;
{
}
// Distance (difference) of the current loads to the load goals of each RS:
// key:server id, value: distance
// Precision for the operations (number of digits after the dot)
{
// load goal = rs weight / sum of weights
if (sumOfConnectedDSs != 0)
{
// current load = number of connected DSs / total number of DSs
}
// load distance = load goal - current load
}
if (currentRsServerId == -1)
{
// The local server is not connected yet
/*
* Find the server with the current highest distance to its load goal and
* choose it. Make an exception if every server is correctly balanced,
* that is every current load distances are equal to 0, in that case,
* choose the server with the highest weight
*/
boolean allRsWithZeroDistance = true;
int highestWeightRsId = -1;
int highestWeight = -1;
{
if (loadDistance > highestDistance)
{
// This server is far more from its balance point
}
if (loadDistance != 0)
{
allRsWithZeroDistance = false;
}
if (weight > highestWeight)
{
// This server has a higher weight
}
}
// All servers with a 0 distance ?
{
// Choose server with the highest weight
}
} else
{
// The local server is currently connected to a RS, let's see if it must
// disconnect or not, taking the weights into account.
float currentLoadDistance =
if (currentLoadDistance < 0)
{
/*
Too much DSs connected to the current RS, compared with its load
goal:
Determine the potential number of DSs to disconnect from the current
RS and see if the local DS is part of them: the DSs that must
disconnect are those with the lowest server id.
Compute the sum of the distances of the load goals of the other RSs
*/
{
if (rsId != currentRsServerId)
{
}
}
{
/*
The average distance of the other RSs shows a lack of DSs.
Compute the number of DSs to disconnect from the current RS,
rounding to the nearest integer number. Do only this if there is
no risk of yoyo effect: when the exact balance cannot be
established due to the current number of DSs connected, do not
disconnect a DS. A simple example where the balance cannot be
reached is:
- RS1 has weight 1 and 2 DSs
- RS2 has weight 1 and 1 DS
=> disconnecting a DS from RS1 to reconnect it to RS2 would have no
sense as this would lead to the reverse situation. In that case,
the perfect balance cannot be reached and we must stick to the
current situation, otherwise the DS would keep move between the 2
RSs
*/
.floatValue();
// Avoid yoyo effect
if (overloadingDSsNumber == 1)
{
// What would be the new load distance for the current RS if
// we disconnect some DSs ?
if (sumOfConnectedDSs != 0)
{
}
// What would be the new load distance for the other RSs ?
/*
Now compare both values: we must no disconnect the DS if this
is for going in a situation where the load distance of the other
RSs is the opposite of the future load distance of the local RS
or we would evaluate that we should disconnect just after being
arrived on the new RS. But we should disconnect if we reach the
perfect balance (both values are 0).
*/
{
// Avoid the yoyo effect, and keep the local DS connected to its
// current RS
}
}
// Prepare a sorted list (from lowest to highest) or DS server ids
// connected to the current RS
// Go through the list of DSs to disconnect and see if the local
// server is part of them.
int index = 0;
while (overloadingDSsNumber > 0)
{
if (severToDisconnectId == localServerId)
{
// The local server is part of the DSs to disconnect
return null;
}
index++;
}
// The local server is not part of the servers to disconnect from the
// current RS.
} else {
// The average distance of the other RSs does not show a lack of DSs:
// no need to disconnect any DS from the current RS.
}
} else {
// The RS load goal is reached or there are not enough DSs connected to
// it to reach it: do not disconnect from this RS and return rsInfo for
// this RS
}
}
}
/**
* Start the heartbeat monitor thread.
*/
private void startRSHeartBeatMonitoring()
{
// Start a heartbeat monitor thread.
if (heartbeatInterval > 0)
{
}
}
/**
* Stop the heartbeat monitor thread.
*/
synchronized void stopRSHeartBeatMonitoring()
{
if (heartbeatMonitor != null)
{
}
}
/**
* restart the ReplicationBroker.
* @param infiniteTry the socket which failed
*/
public void reStart(boolean infiniteTry)
{
}
/**
* Restart the ReplicationServer broker after a failure.
*
* @param failingSession the socket which failed
* @param infiniteTry the socket which failed
*/
{
if (failingSession != null)
{
}
if (failingSession == session)
{
connected = false;
rsGroupId = -1;
rsServerId = -1;
rsServerUrl = null;
}
while (true)
{
// Synchronize inside the loop in order to allow shutdown.
synchronized (startStopLock)
{
{
break;
}
try
{
connect();
}
catch (Exception e)
{
e.getLocalizedMessage()));
}
if (connected || !infiniteTry)
{
break;
}
}
try
{
}
catch (InterruptedException e)
{
// ignore
}
}
if (debugEnabled())
{
}
}
/**
* Publish a message to the other servers.
* @param msg the message to publish
*/
{
}
/**
* Publish a message to the other servers.
* @param msg The message to publish.
* @param retryOnFailure Whether reconnect should automatically be done.
* @return Whether publish succeeded.
*/
{
}
/**
* Publish a recovery message to the other servers.
* @param msg the message to publish
*/
{
}
/**
* Publish a message to the other servers.
* @param msg the message to publish
* @param recoveryMsg the message is a recovery Message
* @param retryOnFailure whether retry should be done on failure
* @return whether the message was successfully sent.
*/
boolean retryOnFailure)
{
boolean done = false;
{
if (connectionError)
{
/*
It was not possible to connect to any replication server.
Since the operation was already processed, we have no other
choice than to return without sending the ReplicationMsg
and relying on the resend procedure of the connect phase to
fix the problem when we finally connect.
*/
if (debugEnabled())
{
+ "message is not possible due to existing connection error.");
}
return false;
}
try
{
boolean credit;
/*
save the session at the time when we acquire the
sendwindow credit so that we can make sure later
that the session did not change in between.
This is necessary to make sure that we don't publish a message
on a session with a credit that was acquired from a previous
session.
*/
synchronized (connectPhaseLock)
{
}
/*
If the Replication domain has decided that there is a need to
recover some changes then it is not allowed to send this
change but it will be the responsibility of the recovery thread to
do it.
*/
if (!recoveryMsg & connectRequiresRecovery)
{
return false;
}
{
/*
Acquiring the window credit must be done outside of the
connectPhaseLock because it can be blocking and we don't
want to hold off reconnection in case the connection dropped.
*/
credit =
} else
{
credit = true;
}
if (credit)
{
synchronized (connectPhaseLock)
{
/*
session may have been set to null in the connection phase
when restarting the broker for example.
Check the session. If it has changed, some disconnection or
reconnection happened and we need to restart from scratch.
*/
(session == current_session))
{
done = true;
}
}
}
{
synchronized (connectPhaseLock)
{
/*
the window is still closed.
Send a WindowProbeMsg message to wake up the receiver in case the
window update message was lost somehow...
then loop to check again if connection was closed.
*/
}
}
}
} catch (IOException e)
{
if (!retryOnFailure)
return false;
// The receive threads should handle reconnection or
// mark this broker in error. Just retry.
synchronized (connectPhaseLock)
{
try
{
} catch (InterruptedException e1)
{
// ignore
if (debugEnabled())
{
+ "Interrupted exception raised : " + e.getLocalizedMessage());
}
}
}
} catch (InterruptedException e)
{
// just loop.
if (debugEnabled())
{
+ "Interrupted exception raised." + e.getLocalizedMessage());
}
}
}
return true;
}
/**
* Receive a message.
* This method is not thread-safe and should either always be
* called in a single thread or protected by a locking mechanism
* before being called. This is a wrapper to the method with a boolean version
* so that we do not have to modify existing tests.
*
* @return the received message
* @throws SocketTimeoutException if the timeout set by setSoTimeout
* has expired
*/
{
return receive(false, true, false);
}
/**
* Receive a message.
* This method is not thread-safe and should either always be
* called in a single thread or protected by a locking mechanism
* before being called.
*
* @param reconnectToTheBestRS Whether broker will automatically switch
* to the best suitable RS.
* @param reconnectOnFailure Whether broker will automatically reconnect
* on failure.
* @param returnOnTopoChange Whether broker should return TopologyMsg
* received.
* @return the received message
*
* @throws SocketTimeoutException if the timeout set by setSoTimeout
* has expired
*/
boolean reconnectOnFailure, boolean returnOnTopoChange)
throws SocketTimeoutException
{
while (!shutdown)
{
if (reconnectOnFailure && !connected)
{
// infinite try to reconnect
}
// Save session information for later in case we need it for log messages
if (savedSession == null)
{
// Must be shutting down.
break;
}
final int replicationServerID = rsServerId;
try
{
{
synchronized (this)
{
rcvWindow--;
}
}
{
}
else if (msg instanceof TopologyMsg)
{
if (reconnectToTheBestRS)
{
// Reset wait time before next computation of best server
}
// Caller wants to check what's changed
if (returnOnTopoChange)
return msg;
}
{
/*
* RS performs a proper disconnection
*/
// Try to find a suitable RS
this.reStart(savedSession, true);
}
else if (msg instanceof MonitorMsg)
{
// This is the response to a MonitorRequest that was sent earlier or
// the regular message of the monitoring publisher of the RS.
// Extract and store replicas ServerStates
{
}
// Notify the sender that the response was received.
synchronized (monitorResponse)
{
monitorResponse.set(true);
}
// Update the replication servers ServerStates with new received info
{
{
}
}
/*
Now if it is allowed, compute the best replication server to see if
it is still the one we are currently connected to. If not,
disconnect properly and let the connection algorithm re-connect to
best replication server
*/
if (reconnectToTheBestRS)
{
if (mustRunBestServerCheckingAlgorithm == 2)
{
// Stable topology (no topo msg since few seconds): proceed with
// best server checking.
{
// The best replication server is no more the one we are
// currently using. Disconnect properly then reconnect.
if (bestServerInfo == null)
{
baseDn);
}
else
{
}
reStart(true);
}
// Reset wait time before next computation of best server
}
}
}
else
{
return msg;
}
}
catch (SocketTimeoutException e)
{
throw e;
}
catch (Exception e)
{
if (debugEnabled())
{
}
if (!shutdown)
{
{
/*
* We did not initiate the close on our side, log an error message.
*/
}
if (reconnectOnFailure)
{
reStart(savedSession, true);
}
else
{
break; // does not seem necessary to explicitly disconnect ..
}
}
}
} // while !shutdown
return null;
}
/**
* Gets the States of all the Replicas currently in the
* Topology.
* When this method is called, a Monitoring message will be sent
* to the Replication Server to which this domain is currently connected
* so that it computes a table containing information about
* all Directory Servers in the topology.
* This Computation involves communications will all the servers
* currently connected and
*
* @return The States of all Replicas in the topology (except us)
*/
{
monitorResponse.set(false);
// publish Monitor Request Message to the Replication Server
// wait for Response up to 10 seconds.
try
{
synchronized (monitorResponse)
{
if (!monitorResponse.get())
{
}
}
} catch (InterruptedException e)
{
}
return replicaStates;
}
/**
* This method allows to do the necessary computing for the window
* management after treatment by the worker threads.
*
* This should be called once the replay thread have done their job
* and the window can be open again.
*/
public synchronized void updateWindowAfterReplay()
{
try
{
{
updateDoneCount = 0;
}
} catch (IOException e)
{
// Any error on the socket will be handled by the thread calling receive()
// just ignore.
}
}
/**
* stop the server.
*/
public void stop()
{
if (debugEnabled())
+ " domain " + baseDn);
synchronized (startStopLock)
{
shutdown = true;
connected = false;
replicationServer = "stopped";
rsGroupId = -1;
rsServerId = -1;
rsServerUrl = null;
}
}
/**
* Set a timeout value.
* With this option set to a non-zero value, calls to the receive() method
* block for only this amount of time after which a
* java.net.SocketTimeoutException is raised.
* The Broker is valid and usable even after such an Exception is raised.
*
* @param timeout the specified timeout, in milliseconds.
* @throws SocketException if there is an error in the underlying protocol,
* such as a TCP error.
*/
{
{
}
}
/**
* Get the name of the replicationServer to which this broker is currently
* connected.
*
* @return the name of the replicationServer to which this domain
* is currently connected.
*/
public String getReplicationServer()
{
return replicationServer;
}
/**
* Get the maximum receive window size.
*
* @return The maximum receive window size.
*/
public int getMaxRcvWindow()
{
return maxRcvWindow;
}
/**
* Get the current receive window size.
*
* @return The current receive window size.
*/
public int getCurrentRcvWindow()
{
return rcvWindow;
}
/**
* Get the maximum send window size.
*
* @return The maximum send window size.
*/
public int getMaxSendWindow()
{
return maxSendWindow;
}
/**
* Get the current send window size.
*
* @return The current send window size.
*/
public int getCurrentSendWindow()
{
if (connected)
{
return sendWindow.availablePermits();
} else
{
return 0;
}
}
/**
* Get the number of times the connection was lost.
* @return The number of times the connection was lost.
*/
public int getNumLostConnections()
{
return numLostConnections;
}
/**
* Change some configuration parameters.
*
* @param replicationServers The new list of replication servers.
* @param window The max window size.
* @param heartbeatInterval The heartBeat interval.
*
* @return A boolean indicating if the changes
* requires to restart the service.
* @param groupId The new group id to use
*/
public boolean changeConfig(
byte groupId)
{
// These parameters needs to be renegotiated with the ReplicationServer
// so if they have changed, that requires restarting the session with
// the ReplicationServer.
// A new session is necessary only when information regarding
// the connection is modified
boolean needToRestartSession =
this.replicationServerUrls == null
|| window != this.maxRcvWindow
|| heartbeatInterval != this.heartbeatInterval
this.maxRcvWindow = window;
this.heartbeatInterval = heartbeatInterval;
return needToRestartSession;
}
/**
* Get the version of the replication protocol.
* @return The version of the replication protocol.
*/
public short getProtocolVersion()
{
return protocolVersion;
}
/**
* Check if the broker is connected to a ReplicationServer and therefore
* ready to received and send Replication Messages.
*
* @return true if the server is connected, false if not.
*/
public boolean isConnected()
{
return connected;
}
/**
* Determine whether the connection to the replication server is encrypted.
* @return true if the connection is encrypted, false otherwise.
*/
public boolean isSessionEncrypted()
{
}
/**
* Signals the RS we just entered a new status.
* @param newStatus The status the local DS just entered
*/
{
try
{
} catch (IOException ex)
{
}
}
/**
* Sets the group id of the broker.
* @param groupId The new group id.
*/
public void setGroupId(byte groupId)
{
}
/**
* Gets the info for DSs in the topology (except us).
* @return The info for DSs in the topology (except us)
*/
{
return dsList;
}
/**
* Gets the info for RSs in the topology (except the one we are connected
* to).
* @return The info for RSs in the topology (except the one we are connected
* to)
*/
{
{
}
return result;
}
/**
* Computes the list of DSs connected to a particular RS.
* @param rsId The RS id of the server one wants to know the connected DSs
* @param dsList The list of DSinfo from which to compute things
* @return The list of connected DSs to the server rsId
*/
{
if (rsServerId == rsId)
{
/*
If we are computing connected DSs for the RS we are connected
to, we should count the local DS as the DSInfo of the local DS is not
sent by the replication server in the topology message. We must count
ourselves as a connected server.
*/
}
{
}
return connectedDSs;
}
/**
* Processes an incoming TopologyMsg.
* Updates the structures for the local view of the topology.
*
* @param topoMsg The topology information received from RS.
*/
{
if (debugEnabled())
// Store new DS list
// Update replication server info list with the received topology
// information
{
if (replicationServerInfo == null)
{
// New replication server, create info for it add it to the list
// Set the locally configured flag for this new RS only if it is
// configured
} else
{
// Update the existing info for the replication server
}
}
/**
* Now remove any replication server that may have disappeared from the
* topology.
*/
{
{
// This replication server has quit the topology, remove it from the
// list
}
}
{
{
}
}
}
/**
* Check if the broker could not find any Replication Server and therefore
* connection attempt failed.
*
* @return true if the server could not connect to any Replication Server.
*/
public boolean hasConnectionError()
{
return connectionError;
}
/**
* Starts publishing to the RS the current timestamp used in this server.
*/
public void startChangeTimeHeartBeatPublishing()
{
// Start a CSN heartbeat thread.
if (changeTimeHeartbeatSendInterval > 0)
{
+ this.getServerId()
+ ") change time heartbeat publisher for domain \""
serverId);
} else
{
if (debugEnabled())
+ " is not configured to send CSN heartbeat interval");
}
}
/**
* Stops publishing to the RS the current timestamp used in this server.
*/
public synchronized void stopChangeTimeHeartBeatPublishing()
{
if (ctHeartbeatPublisherThread != null)
{
}
}
/**
* Set a new change time heartbeat interval to this broker.
* @param changeTimeHeartbeatInterval The new interval (in ms).
*/
public void setChangeTimeHeartbeatInterval(int changeTimeHeartbeatInterval)
{
}
/**
* Set the connectRequiresRecovery to the provided value.
* This flag is used to indicate if a recovery of Update is necessary
* after a reconnection to a RS.
* It is the responsibility of the ReplicationDomain to set it during the
* sessionInitiated phase.
*
* @param b the new value of the connectRequiresRecovery.
*/
public void setRecoveryRequired(boolean b)
{
}
/**
* Returns whether the broker is shutting down.
* @return whether the broker is shutting down.
*/
public boolean shuttingDown()
{
return shutdown;
}
/**
* Returns the local address of this replication domain, or the empty string
* if it is not yet connected.
*
* @return The local address.
*/
{
}
/**
* Returns the replication monitor associated with this broker.
*
* @return The replication monitor.
*/
{
// Only invoked by replication domain so always non-null.
return monitor;
}
{
// De-register the monitor with the old name.
if (oldSession != null)
{
oldSession.close();
}
// Re-register the monitor with the new name.
}
private void registerReplicationMonitor()
{
/*
* The monitor should not be registered if this is a unit test because the
* replication domain is null.
*/
{
}
}
private void deregisterReplicationMonitor()
{
/*
* The monitor should not be deregistered if this is a unit test because the
* replication domain is null.
*/
{
}
}
}