/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at legal-notices/CDDLv1_0.txt
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at legal-notices/CDDLv1_0.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information:
* Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
*
* Copyright 2006-2010 Sun Microsystems, Inc.
* Portions Copyright 2011-2015 ForgeRock AS
*/
/**
* The broker for Multi-master Replication.
*/
public class ReplicationBroker
{
/**
* Immutable class containing information about whether the broker is
* connected to an RS and data associated to this connected RS.
*/
// @Immutable
private static final class ConnectedRS
{
/** The info of the RS we are connected to. */
/** Contains a connected session to the RS if any exist, null otherwise. */
{
this.replicationServer = replicationServer;
}
{
}
{
return new ConnectedRS("stopped");
}
{
return NO_CONNECTED_RS;
}
public int getServerId()
{
}
private byte getGroupId()
{
}
private boolean isConnected()
{
}
/** {@inheritDoc} */
{
}
{
if (!isConnected())
{
}
else
{
.append(")");
}
}
}
private volatile boolean shutdown;
/**
* String reported under CSN=monitor when there is no connected RS.
*/
private int maxSendWindow;
private int timeout;
/**
* The RS this DS is currently connected to.
* <p>
* Always use {@link #setConnectedRS(ConnectedRS)} to set a new
* connected RS.
*/
// @NotNull // for the reference
private final AtomicReference<ConnectedRS> connectedRS = new AtomicReference<>(ConnectedRS.noConnectedRS());
/** Our replication domain. */
/**
* This object is used as a conditional event to be notified about
* the reception of monitor information from the Replication Server.
*/
/**
* A Map containing the ServerStates of all the replicas in the topology
* as seen by the ReplicationServer the last time it was polled or the last
* time it published monitoring information.
*/
/** A thread to monitor heartbeats on the session. */
/** The number of times the connection was lost. */
private int numLostConnections;
/**
* When the broker cannot connect to any replication server
* it log an error and keeps continuing every second.
* This boolean is set when the first failure happens and is used
* to avoid repeating the error message for further failure to connect
* and to know that it is necessary to print a new message when the broker
* finally succeed to connect.
*/
private volatile boolean connectionError;
/**
* The thread that publishes messages to the RS containing the current
* change time of this DS.
*/
/*
* Properties for the last topology info received from the network.
*/
/** Contains the last known state of the replication topology. */
/** <pre>@GuardedBy("this")</pre>. */
private volatile int updateDoneCount;
private volatile boolean connectRequiresRecovery;
/**
* This integer defines when the best replication server checking algorithm
* should be engaged.
* Every time a monitoring message (each monitoring publisher period) is
* received, it is incremented. When it reaches 2, we run the checking
* algorithm to see if we must reconnect to another best replication server.
* Then we reset the value to 0. But when a topology message is received, the
* integer is reset to 0. This ensures that we wait at least one monitoring
* publisher period before running the algorithm, but also that we wait at
* least for a monitoring period after the last received topology message
* (topology stabilization).
*/
private int mustRunBestServerCheckingAlgorithm;
/**
* The monitor provider for this replication domain.
* <p>
* The name of the monitor includes the local address and must therefore be
* re-registered every time the session is re-established or destroyed. The
* monitor provider can only be created (i.e. non-null) if there is a
* replication domain, which is not the case in unit tests.
*/
/**
* Creates a new ReplicationServer Broker for a particular ReplicationDomain.
*
* @param replicationDomain The replication domain that is creating us.
* @param state The ServerState that should be used by this broker
* when negotiating the session with the replicationServer.
* @param config The configuration to use.
* @param replSessionSecurity The session security configuration.
*/
{
this.domain = replicationDomain;
this.rcvWindow = getMaxRcvWindow();
this.shutdown = true;
/*
* Only create a monitor if there is a replication domain (this is not the
* case in some unit tests).
*/
}
/**
* Start the ReplicationBroker.
*/
public void start()
{
synchronized (startStopLock)
{
if (!shutdown)
{
return;
}
shutdown = false;
this.rcvWindow = getMaxRcvWindow();
}
}
/**
* Gets the group id of the RS we are connected to.
* @return The group id of the RS we are connected to
*/
public byte getRsGroupId()
{
}
/**
* Gets the server id of the RS we are connected to.
* @return The server id of the RS we are connected to
*/
public int getRsServerId()
{
}
/**
* Gets the server id.
* @return The server id
*/
public int getServerId()
{
return config.getServerId();
}
{
}
{
return config.getReplicationServer();
}
private byte getGroupId()
{
return (byte) config.getGroupId();
}
/**
* Gets the server id.
* @return The server id
*/
private long getGenerationID()
{
return domain.getGenerationID();
}
/**
* Set the generation id - for test purpose.
* @param generationID The generation id
*/
{
}
/**
* Compares 2 replication servers addresses and returns true if they both
* represent the same replication server instance.
* @param rs1Url Replication server 1 address
* @param rs2Url Replication server 2 address
* @return True if both replication server addresses represent the same
* replication server instance, false otherwise.
*/
{
try
{
}
catch (RuntimeException ex)
{
// Not a RS url or not a valid port number: should not happen
return false;
}
}
/**
* Bag class for keeping info we get from a replication server in order to
* compute the best one to connect to. This is in fact a wrapper to a
* ReplServerStartMsg (V3) or a ReplServerStartDSMsg (V4). This can also be
* updated with a info coming from received topology messages or monitoring
* messages.
*/
static class ReplicationServerInfo
{
private final short protocolVersion;
private final int windowSize;
// @NotNull
private final boolean sslEncryption;
private final int degradedStatusThreshold;
/** Keeps the 0 value if created with a ReplServerStartMsg. */
private int connectedDSNumber;
// @NotNull
/**
* Is this RS locally configured? (the RS is recognized as a usable server).
*/
private boolean locallyConfigured = true;
/**
* Create a new instance of ReplicationServerInfo wrapping the passed
* message.
* @param msg LocalizableMessage to wrap.
* @param newServerURL Override serverURL.
* @return The new instance wrapping the passed message.
* @throws IllegalArgumentException If the passed message has an unexpected
* type.
*/
{
return rsInfo;
}
/**
* Create a new instance of ReplicationServerInfo wrapping the passed
* message.
* @param msg LocalizableMessage to wrap.
* @return The new instance wrapping the passed message.
* @throws IllegalArgumentException If the passed message has an unexpected
* type.
*/
throws IllegalArgumentException
{
if (msg instanceof ReplServerStartMsg)
{
// RS uses protocol V3 or lower
}
else if (msg instanceof ReplServerStartDSMsg)
{
// RS uses protocol V4 or higher
}
// Unsupported message type: should not happen
throw new IllegalArgumentException("Unexpected PDU type: "
}
/**
* Constructs a ReplicationServerInfo object wrapping a
* {@link ReplServerStartMsg}.
*
* @param msg
* The {@link ReplServerStartMsg} this object will wrap.
*/
{
}
/**
* Constructs a ReplicationServerInfo object wrapping a
* {@link ReplServerStartDSMsg}.
*
* @param msg
* The {@link ReplServerStartDSMsg} this object will wrap.
*/
{
}
/**
* Constructs a new replication server info with the passed RSInfo internal
* values and the passed connected DSs.
*
* @param rsInfo
* The RSinfo to use for the update
* @param connectedDSs
* The new connected DSs
*/
{
this.rsInfo =
this.protocolVersion = 0;
this.windowSize = 0;
this.connectedDSs = connectedDSs;
this.sslEncryption = false;
this.degradedStatusThreshold = -1;
this.serverState = new ServerState();
}
/**
* Get the server state.
* @return The server state
*/
{
return serverState;
}
/**
* Get the group id.
* @return The group id
*/
public byte getGroupId()
{
return rsInfo.getGroupId();
}
/**
* Get the server protocol version.
* @return the protocolVersion
*/
public short getProtocolVersion()
{
return protocolVersion;
}
/**
* Get the generation id.
* @return the generationId
*/
public long getGenerationId()
{
return rsInfo.getGenerationId();
}
/**
* Get the server id.
* @return the serverId
*/
public int getServerId()
{
}
/**
* Get the server URL.
* @return the serverURL
*/
{
return rsInfo.getServerUrl();
}
/**
* Get the base DN.
*
* @return the base DN
*/
{
return baseDN;
}
/**
* Get the window size.
* @return the windowSize
*/
public int getWindowSize()
{
return windowSize;
}
/**
* Get the ssl encryption.
* @return the sslEncryption
*/
public boolean isSslEncryption()
{
return sslEncryption;
}
/**
* Get the degraded status threshold.
* @return the degradedStatusThreshold
*/
public int getDegradedStatusThreshold()
{
return degradedStatusThreshold;
}
/**
* Get the weight.
* @return the weight. Null if this object is a wrapper for
* a ReplServerStartMsg.
*/
public int getWeight()
{
}
/**
* Get the connected DS number.
* @return the connectedDSNumber. Null if this object is a wrapper for
* a ReplServerStartMsg.
*/
public int getConnectedDSNumber()
{
return connectedDSNumber;
}
/**
* Converts the object to a RSInfo object.
* @return The RSInfo object matching this object.
*/
{
return rsInfo;
}
/**
* Updates replication server info with the passed RSInfo internal values
* and the passed connected DSs.
* @param rsInfo The RSinfo to use for the update
* @param connectedDSs The new connected DSs
*/
{
this.connectedDSs = connectedDSs;
}
{
}
/**
* Updates replication server info with the passed server state.
* @param serverState The ServerState to use for the update
*/
{
}
/**
* Get the getConnectedDSs.
* @return the getConnectedDSs
*/
{
return connectedDSs;
}
/**
* Gets the locally configured status for this RS.
* @return the locallyConfigured
*/
public boolean isLocallyConfigured()
{
return locallyConfigured;
}
/**
* Sets the locally configured status for this RS.
* @param locallyConfigured the locallyConfigured to set
*/
{
this.locallyConfigured = locallyConfigured;
}
/**
* Returns a string representation of this object.
* @return A string representation of this object.
*/
{
return "ReplServerInfo Url:" + getServerURL()
+ " ServerId:" + getServerId()
+ " GroupId:" + getGroupId()
+ " connectedDSs:" + connectedDSs;
}
}
/**
* Contacts all replication servers to get information from them and being
* able to choose the more suitable.
* @return the collected information.
*/
{
{
// Connect to server + get and store info about it
{
}
}
return rsInfos;
}
/**
* Connect to a ReplicationServer.
*
* Handshake sequences between a DS and a RS is divided into 2 logical
* consecutive phases (phase 1 and phase 2). DS always initiates connection
* and always sends first message:
*
* DS<->RS:
* -------
*
* phase 1:
* DS --- ServerStartMsg ---> RS
* DS <--- ReplServerStartDSMsg --- RS
* phase 2:
* DS --- StartSessionMsg ---> RS
* DS <--- TopologyMsg --- RS
*
* Before performing a full handshake sequence, DS searches for best suitable
* RS by making only phase 1 handshake to every RS he knows then closing
* connection. This allows to gather information on available RSs and then
* decide with which RS the full handshake (phase 1 then phase 2) will be
* finally performed.
*
* @throws NumberFormatException address was invalid
*/
private void connectAsDataServer()
{
/*
* If a first connect or a connection failure occur, we go through here.
* force status machine to NOT_CONNECTED_STATUS so that monitoring can see
* that we are not connected.
*/
/*
Stop any existing heartbeat monitor and changeTime publisher
from a previous session.
*/
synchronized (connectPhaseLock)
{
final int serverId = getServerId();
/*
* Connect to each replication server and get their ServerState then find
* out which one is the best to connect to.
*/
if (logger.isTraceEnabled())
{
debugInfo("phase 1 : will perform PhaseOneH with each RS in order to elect the preferred one");
}
// Get info from every available replication servers
{
}
else
{
// At least one server answered, find the best one.
// Best found, now initialize connection to this one (handshake phase 1)
if (logger.isTraceEnabled())
{
}
if (electedRsInfo != null)
{
/*
Update replication server info with potentially more up to date
data (server state for instance may have changed)
*/
// Handshake phase 1 exchange went well
// Compute in which status we are starting the session to tell the RS
// Perform session start (handshake phase 2)
final TopologyMsg topologyMsg =
{
} // Could perform handshake phase 2 with best
} // Could perform handshake phase 1 with best
}
// connectedRS has been updated by calls above, reload it
if (rs.isConnected())
{
{
}
else
{
}
}
else
{
// This server could not find any replicationServer.
// It's going to start in degraded mode. Log a message.
if (!connectionError)
{
connectionError = true;
{
}
else
{
}
}
}
}
}
{
final int rsServerId = getRsServerId();
do
{
}
if (logger.isTraceEnabled())
{
}
}
{
{
}
else
{
}
return sb;
}
/**
* Connects to a replication server.
*
* @param rs
* the Replication Server to connect to
* @param initStatus
* The status to enter the state machine with
* @param topologyMsg
* the message containing the topology information
*/
{
boolean connectCompleted = false;
try
{
/*
Log a message to let the administrator know that the failure was resolved.
Wake up all the thread that were waiting on the window
on the previous connection.
*/
connectionError = false;
if (sendWindow != null)
{
/*
* Fix (hack) for OPENDJ-401: we want to ensure that no threads holding
* this semaphore will get blocked when they acquire it. However, we
* also need to make sure that we don't overflow the semaphore by
* releasing too many permits.
*/
{
/*
* At least 2^29 acquisitions would need to occur for this to be
* insufficient. In addition, at least 2^30 releases would need to
* occur for this to potentially overflow. Hopefully this is unlikely
* to happen.
*/
}
}
rcvWindow = getMaxRcvWindow();
final byte groupId = getGroupId();
{
/*
Connected to replication server with wrong group id:
warn user and start heartbeat monitor to recover when a server
with the right group id shows up.
*/
}
if (rsInfo.getProtocolVersion() >=
{
}
connectCompleted = true;
}
catch (Exception e)
{
}
finally
{
if (!connectCompleted)
{
}
}
}
/**
* Determines the status we are starting with according to our state and the
* RS state.
*
* @param rsGenId The generation id of the RS
* @param rsState The server state of the RS
* @param degradedStatusThreshold The degraded status threshold of the RS
* @param dsGenId The local generation id
* @return The initial status
*/
{
if (rsGenId == -1)
{
// RS has no generation id
return ServerStatus.NORMAL_STATUS;
}
{
// DS and RS do not have same generation id
return ServerStatus.BAD_GEN_ID_STATUS;
}
else
{
/*
DS and RS have same generation id
Determine if we are late or not to replay changes. RS uses a
threshold value for pending changes to be replayed by a DS to
determine if the DS is in normal status or in degraded status.
Let's compare the local and remote server state using this threshold
value to determine if we are late or not
*/
if (logger.isTraceEnabled())
{
}
/*
Check status to know if it is relevant to change the status. Do not
take RSD lock to test. If we attempt to change the status whereas
we are in a status that do not allows that, this will be noticed by
the changeStatusFromStatusAnalyzer method. This allows to take the
lock roughly only when needed versus every sleep time timeout.
*/
{
return ServerStatus.DEGRADED_STATUS;
}
// degradedStatusThreshold value of '0' means no degrading system used
// (no threshold): force normal status
return ServerStatus.NORMAL_STATUS;
}
}
/**
* Connect to the provided server performing the first phase handshake (start
* messages exchange) and return the reply message from the replication
* server, wrapped in a ReplicationServerInfo object.
*
* @param serverURL
* Server to connect to.
* @param keepSession
* Do we keep session opened or not after handshake. Use true if want
* to perform handshake phase 2 with the same session and keep the
* session to create as the current one.
* @return The answer from the server . Null if could not get an answer.
*/
{
boolean hasConnected = false;
try
{
// Open a socket connection to the next candidate.
socket.setTcpNoDelay(true);
{
}
// Send our ServerStartMsg.
// Read the ReplServerStartMsg or ReplServerStartDSMsg that should
// come back.
if (logger.isTraceEnabled())
{
+ msg);
}
// Wrap received message in a server info object
final ReplicationServerInfo replServerInfo =
// Sanity check
{
}
/*
* We have sent our own protocol version to the replication server. The
* replication server will use the same one (or an older one if it is an
* old replication server).
*/
if (!isSslEncryption)
{
}
hasConnected = true;
if (keepSession)
{
// cannot store it yet,
// only store after a successful phase two handshake
}
}
catch (ConnectException e)
{
logger.traceException(e);
}
catch (SocketTimeoutException e)
{
logger.traceException(e);
}
catch (Exception e)
{
logger.traceException(e);
}
finally
{
if (!hasConnected || !keepSession)
{
}
{
// There was no server waiting on this host:port
// Log a notice and will try the next replicationServer in the list
if (keepSession) // Log error message only for final connection
{
// log the error message only once to avoid overflowing the error log
}
}
}
}
/**
* Performs the second phase handshake (send StartSessionMsg and receive
* TopologyMsg messages exchange) and return the reply message from the
* replication server.
*
* @param electedRS Server we are connecting with.
* @param initStatus The status we are starting with
* @return The ReplServerStartMsg the server replied. Null if could not
* get an answer.
*/
{
try
{
// Send our StartSessionMsg.
final StartSessionMsg startSessionMsg;
startSessionMsg = new StartSessionMsg(
domain.getRefUrls(),
// Read the TopologyMsg that should come back.
if (logger.isTraceEnabled())
{
+ "\nAND RECEIVED:\n" + topologyMsg);
}
// Alright set the timeout to the desired value
return topologyMsg;
}
catch (Exception e)
{
return null;
}
}
/**
* Class holding evaluation results for electing the best replication server
* for the local directory server.
*/
static class RSEvaluations
{
private final int localServerId;
/**
* Ctor.
*
* @param localServerId
* the serverId for the local directory server
* @param rsInfos
* a Map of serverId => {@link ReplicationServerInfo} with all the
* candidate replication servers
*/
{
this.localServerId = localServerId;
}
{
if (eval.hasAcceptedAny())
{
return true;
}
return false;
}
/**
* Sets the elected best replication server, rejecting all the other
* replication servers with the supplied evaluation.
*
* @param bestRsId
* the serverId of the elected replication server
* @param rejectedRSsEval
* the evaluation for all the rejected replication servers
*/
{
{
{
}
}
}
{
{
}
}
private boolean foundBestRS()
{
}
/**
* Returns the {@link ReplicationServerInfo} for the best replication
* server.
*
* @return the {@link ReplicationServerInfo} for the best replication server
*/
{
if (foundBestRS())
{
}
return null;
}
/**
* Returns the evaluations for all the candidate replication servers.
*
* @return a Map of serverId => LocalizableMessage containing the evaluation for each
* candidate replication servers.
*/
{
if (foundBestRS())
{
{
}
}
}
/**
* Returns the evaluation for the supplied replication server Id.
* <p>
* Note: "unknown RS" message is returned if the supplied replication server
* was not part of the candidate replication servers.
*
* @param rsServerId
* the supplied replication server Id
* @return the evaluation {@link LocalizableMessage} for the supplied replication
* server Id
*/
{
if (evaluation != null)
{
return evaluation;
}
}
/** {@inheritDoc} */
{
+ ", Evaluation of connected replication servers"
+ ", Any replication server not appearing here"
+ " could not be contacted.";
}
}
/**
* Evaluation local to one filter.
*/
private static class LocalEvaluation
{
{
// forget previous eval, including undoing reject
}
{
}
{
return accepted;
}
{
}
{
{
}
return result;
}
private boolean hasAcceptedAny()
{
}
}
/**
* Returns the replication server that best fits our need so that we can
* connect to it or determine if we must disconnect from current one to
* re-connect to best server.
* <p>
* Note: this method is static for test purpose (access from unit tests)
*
* @param firstConnection True if we run this method for the very first
* connection of the broker. False if we run this method to determine if the
* replication server we are currently connected to is still the best or not.
* @param rsServerId The id of the replication server we are currently
* connected to. Only used when firstConnection is false.
* @param myState The local server state.
* @param rsInfos The list of available replication servers and their
* associated information (choice will be made among them).
* @param localServerId The server id for the suffix we are working for.
* @param groupId The groupId we prefer being connected to if possible
* @param generationId The generation id we are using
* @return The computed best replication server. If the returned value is
* null, the best replication server is undetermined but the local server must
* disconnect (so the best replication server is another one than the current
* one). Null can only be returned when firstConnection is false.
*/
byte groupId, long generationId)
{
// Shortcut, if only one server, this is the best
if (evals.foundBestRS())
{
return evals;
}
/**
* Apply some filtering criteria to determine the best servers list from
* the available ones. The ordered list of criteria is (from more important
* to less important):
* - replication server has the same group id as the local DS one
* - replication server has the same generation id as the local DS one
* - replication server is up to date regarding changes generated by the
* local DS
* - replication server in the same VM as local DS one
*/
/*
The list of best replication servers is filtered with each criteria. At
each criteria, the list is replaced with the filtered one if there
are some servers from the filtering, otherwise, the list is left as is
and the new filtering for the next criteria is applied and so on.
Use only servers locally configured: those are servers declared in
the local configuration. When the current method is called, for
sure, at least one server from the list is locally configured
*/
// Some servers with same group id ?
// Some servers with same generation id ?
final boolean rssWithSameGenerationIdExist =
{
// If some servers with the right generation id this is useful to
// run the local DS change criteria
}
// Some servers in the local VM or local host?
if (evals.foundBestRS())
{
return evals;
}
/**
* Now apply the choice based on the weight to the best servers list
*/
if (firstConnection)
{
// We are not connected to a server yet
}
else
{
/*
* We are already connected to a RS: compute the best RS as far as the
* weights is concerned. If this is another one, some DS must disconnect.
*/
}
return evals;
}
/**
* Creates a new list that contains only replication servers that are locally
* configured.
* @param evals The evaluation object
*/
int localServerId)
{
{
if (rsInfo.isLocallyConfigured())
{
}
else
{
}
}
}
/**
* Creates a new list that contains only replication servers that have the
* passed group id, from a passed replication server list.
* @param evals The evaluation object
* @param groupId The group id that must match
*/
int localServerId, byte groupId)
{
{
{
}
else
{
}
}
}
/**
* Creates a new list that contains only replication servers that have the
* provided generation id, from a provided replication server list.
* When the selected replication servers have no change (empty serverState)
* then the 'empty'(generationId==-1) replication servers are also included
* in the result list.
*
* @param evals The evaluation object
* @param generationId The generation id that must match
* @return whether some replication server passed the filter
*/
private static boolean filterServersWithSameGenerationId(
{
boolean emptyState = true;
{
{
{
emptyState = false;
}
}
{
}
else
{
}
}
if (emptyState)
{
// If the RS with a generationId have all an empty state,
// then the 'empty'(genId=-1) RSes are also candidate
{
{
// will undo the reject of previously rejected RSs
}
}
}
}
/**
* Creates a new list that contains only replication servers that have the
* latest changes from the passed DS, from a passed replication server list.
* @param evals The evaluation object
* @param localState The state of the local DS
* @param localServerId The server id to consider for the changes
*/
private static void filterServersWithAllLocalDSChanges(
{
// Extract the CSN of the latest change generated by the local server
/**
* Find replication servers that are up to date (or more up to date than us,
* if for instance we failed and restarted, having sent some changes to the
* RS but without having time to store our own state) regarding our own
* server id. If some servers are more up to date, prefer this list but take
* only the latest CSN.
*/
boolean foundRSMoreUpToDateThanLocalDS = false;
{
// Has this replication server the latest local change ?
{
}
{
// This replication server has exactly the latest change from the
// local server
{
}
else
{
}
}
{
// This replication server is even more up to date than the local server
if (latestRsCSN == null)
{
foundRSMoreUpToDateThanLocalDS = true;
// all previous results are now outdated, reject them all
localCSN);
// Initialize the latest CSN
latestRsCSN = rsCSN;
}
{
}
{
// This RS is even more up to date, reject all previously accepted RSs
// and store this new RS
localCSN);
latestRsCSN = rsCSN;
}
else
{
}
}
}
}
{
{
return csn;
}
}
private static void rejectAllWithRSIsLaterThanBestRS(
{
{
final LocalizableMessage reason =
}
}
/**
* Creates a new list that contains only replication servers that are on the
* same host as the local DS, from a passed replication server list. This
* method will gives priority to any replication server which is in the same
* VM as this DS.
*
* @param evals The evaluation object
*/
int localServerId)
{
/*
* Initially look for all servers on the same host. If we find one in the
* same VM, then narrow the search.
*/
boolean foundRSInSameVM = false;
{
if (hp.isLocalAddress())
{
{
if (!foundRSInSameVM)
{
// An RS in the same VM will always have priority.
// Narrow the search to only include servers in this VM.
foundRSInSameVM = true;
}
}
else if (!foundRSInSameVM)
{
// OK, accept RSs on the same machine because we have not found an RS
// in the same VM yet
}
else
{
// Skip: we have found some RSs in the same VM, but this RS is not.
}
}
else
{
}
}
}
int localServerId)
{
{
}
}
/**
* Computes the best replication server the local server should be connected
* to so that the load is correctly spread across the topology, following the
* weights guidance.
* Warning: This method is expected to be called with at least 2 servers in
* bestServers
* Note: this method is static for test purpose (access from unit tests)
* @param evals The evaluation object
* @param currentRsServerId The replication server the local server is
* currently connected to. -1 if the local server is not yet connected
* to any replication server.
* @param localServerId The server id of the local server. This is not used
* when it is not connected to a replication server
* (currentRsServerId = -1)
*/
int currentRsServerId, int localServerId)
{
/*
* - Compute the load goal of each RS, deducing it from the weights affected
* to them.
* - Compute the current load of each RS, deducing it from the DSs
* currently connected to them.
* - Compute the differences between the load goals and the current loads of
* the RSs.
*/
// Sum of the weights
int sumOfWeights = 0;
// Sum of the connected DSs
int sumOfConnectedDSs = 0;
{
}
// Distance (difference) of the current loads to the load goals of each RS:
// key:server id, value: distance
// Precision for the operations (number of digits after the dot)
{
// load goal = rs weight / sum of weights
if (sumOfConnectedDSs != 0)
{
// current load = number of connected DSs / total number of DSs
}
// load distance = load goal - current load
}
if (currentRsServerId == -1)
{
// The local server is not connected yet, find best server to connect to,
// taking the weights into account.
}
else
{
// The local server is currently connected to a RS, let's see if it must
// disconnect or not, taking the weights into account.
}
}
{
/*
* Find the server with the current highest distance to its load goal and
* choose it. Make an exception if every server is correctly balanced,
* that is every current load distances are equal to 0, in that case,
* choose the server with the highest weight
*/
boolean allRsWithZeroDistance = true;
int highestWeightRsId = -1;
int highestWeight = -1;
{
if (loadDistance > highestDistance)
{
// This server is far more from its balance point
}
if (loadDistance != 0)
{
allRsWithZeroDistance = false;
}
if (weight > highestWeight)
{
// This server has a higher weight
}
}
// All servers with a 0 distance ?
{
// Choose server with the highest weight
}
bestRsId));
}
{
float currentLoadDistance =
if (currentLoadDistance < 0)
{
/*
Too much DSs connected to the current RS, compared with its load
goal:
Determine the potential number of DSs to disconnect from the current
RS and see if the local DS is part of them: the DSs that must
disconnect are those with the lowest server id.
Compute the sum of the distances of the load goals of the other RSs
*/
{
if (rsId != currentRsServerId)
{
}
}
{
/*
The average distance of the other RSs shows a lack of DSs.
Compute the number of DSs to disconnect from the current RS,
rounding to the nearest integer number. Do only this if there is
no risk of yoyo effect: when the exact balance cannot be
established due to the current number of DSs connected, do not
disconnect a DS. A simple example where the balance cannot be
reached is:
- RS1 has weight 1 and 2 DSs
- RS2 has weight 1 and 1 DS
=> disconnecting a DS from RS1 to reconnect it to RS2 would have no
sense as this would lead to the reverse situation. In that case,
the perfect balance cannot be reached and we must stick to the
current situation, otherwise the DS would keep move between the 2
RSs
*/
.floatValue();
// Avoid yoyo effect
if (overloadingDSsNumber == 1)
{
// What would be the new load distance for the current RS if
// we disconnect some DSs ?
if (sumOfConnectedDSs != 0)
{
}
// What would be the new load distance for the other RSs ?
/*
Now compare both values: we must not disconnect the DS if this
is for going in a situation where the load distance of the other
RSs is the opposite of the future load distance of the local RS
or we would evaluate that we should disconnect just after being
arrived on the new RS. But we should disconnect if we reach the
perfect balance (both values are 0).
*/
{
// Avoid the yoyo effect, and keep the local DS connected to its
// current RS
return;
}
}
{
// The local server is part of the DSs to disconnect
}
else
{
// The local server is not part of the servers to disconnect from the
// current RS.
}
} else {
// The average distance of the other RSs does not show a lack of DSs:
// no need to disconnect any DS from the current RS.
}
} else {
// The RS load goal is reached or there are not enough DSs connected to
// it to reach it: do not disconnect from this RS and return rsInfo for
// this RS
}
}
{
final BigDecimal otherRSsSumOfLoadDistances =
}
/**
* Returns whether the local DS is overloading the RS.
* <p>
* There are an "overloadingDSsNumber" of DS overloading the RS. The list of
* DSs connected to this RS is ordered by serverId to use a consistent
* ordering across all nodes in the topology. The serverIds which index in the
* List are lower than "overloadingDSsNumber" will be evicted first.
* <p>
* This ordering is unfair since nodes with the lower serverIds will be
* evicted more often than nodes with higher serverIds. However, it is a
* consistent and reliable ordering applicable anywhere in the topology.
*/
{
}
/**
* Start the heartbeat monitor thread.
*/
{
if (heartbeatInterval > 0)
{
}
}
/**
* Stop the heartbeat monitor thread.
*/
private synchronized void stopRSHeartBeatMonitoring()
{
if (heartbeatMonitor != null)
{
}
}
/**
* Restart the ReplicationBroker.
* @param infiniteTry the socket which failed
*/
{
}
/**
* Restart the ReplicationServer broker after a failure.
*
* @param failingSession the socket which failed
* @param infiniteTry the socket which failed
*/
{
if (failingSession != null)
{
}
{
}
while (true)
{
// Synchronize inside the loop in order to allow shutdown.
synchronized (startStopLock)
{
{
break;
}
try
{
}
catch (Exception e)
{
}
{
break;
}
}
try
{
}
catch (InterruptedException ignored)
{
// ignore
}
}
if (logger.isTraceEnabled())
{
}
}
/**
* Publish a message to the other servers.
* @param msg the message to publish
*/
{
}
/**
* Publish a message to the other servers.
* @param msg The message to publish.
* @param retryOnFailure Whether reconnect should automatically be done.
* @return Whether publish succeeded.
*/
{
}
/**
* Publish a recovery message to the other servers.
* @param msg the message to publish
*/
{
}
/**
* Publish a message to the other servers.
* @param msg the message to publish
* @param recoveryMsg the message is a recovery LocalizableMessage
* @param retryOnFailure whether retry should be done on failure
* @return whether the message was successfully sent.
*/
boolean retryOnFailure)
{
boolean done = false;
{
if (connectionError)
{
/*
It was not possible to connect to any replication server.
Since the operation was already processed, we have no other
choice than to return without sending the ReplicationMsg
and relying on the resend procedure of the connect phase to
fix the problem when we finally connect.
*/
if (logger.isTraceEnabled())
{
debugInfo("publish(): Publishing a message is not possible due to"
+ " existing connection error.");
}
return false;
}
try
{
/*
save the session at the time when we acquire the
sendwindow credit so that we can make sure later
that the session did not change in between.
This is necessary to make sure that we don't publish a message
on a session with a credit that was acquired from a previous
session.
*/
synchronized (connectPhaseLock)
{
}
/*
If the Replication domain has decided that there is a need to
recover some changes then it is not allowed to send this
change but it will be the responsibility of the recovery thread to
do it.
*/
if (!recoveryMsg & connectRequiresRecovery)
{
return false;
}
boolean credit;
{
/*
Acquiring the window credit must be done outside of the
connectPhaseLock because it can be blocking and we don't
want to hold off reconnection in case the connection dropped.
*/
credit =
}
else
{
credit = true;
}
if (credit)
{
synchronized (connectPhaseLock)
{
/*
session may have been set to null in the connection phase
when restarting the broker for example.
Check the session. If it has changed, some disconnection or
reconnection happened and we need to restart from scratch.
*/
{
done = true;
}
}
}
{
synchronized (connectPhaseLock)
{
/*
the window is still closed.
Send a WindowProbeMsg message to wake up the receiver in case the
window update message was lost somehow...
then loop to check again if connection was closed.
*/
{
}
}
}
}
catch (IOException e)
{
if (logger.isTraceEnabled())
{
debugInfo("publish(): IOException caught: "
+ stackTraceToSingleLineString(e));
}
if (!retryOnFailure)
{
return false;
}
// The receive threads should handle reconnection or
// mark this broker in error. Just retry.
synchronized (connectPhaseLock)
{
try
{
}
catch (InterruptedException ignored)
{
if (logger.isTraceEnabled())
{
debugInfo("publish(): InterruptedException caught 1: "
}
}
}
}
catch (InterruptedException ignored)
{
// just loop.
if (logger.isTraceEnabled())
{
debugInfo("publish(): InterruptedException caught 2: "
}
}
}
return true;
}
/**
* Receive a message.
* This method is not thread-safe and should either always be
* called in a single thread or protected by a locking mechanism
* before being called. This is a wrapper to the method with a boolean version
* so that we do not have to modify existing tests.
*
* @return the received message
* @throws SocketTimeoutException if the timeout set by setSoTimeout
* has expired
*/
{
return receive(false, true, false);
}
/**
* Receive a message.
* This method is not thread-safe and should either always be
* called in a single thread or protected by a locking mechanism
* before being called.
*
* @param reconnectToTheBestRS Whether broker will automatically switch
* to the best suitable RS.
* @param reconnectOnFailure Whether broker will automatically reconnect
* on failure.
* @param returnOnTopoChange Whether broker should return TopologyMsg
* received.
* @return the received message
*
* @throws SocketTimeoutException if the timeout set by setSoTimeout
* has expired
*/
boolean reconnectOnFailure, boolean returnOnTopoChange)
throws SocketTimeoutException
{
while (!shutdown)
{
{
// infinite try to reconnect
continue;
}
// Save session information for later in case we need it for log messages
{
// Must be shutting down.
break;
}
final int serverId = getServerId();
try
{
{
synchronized (this)
{
rcvWindow--;
}
}
{
}
else if (msg instanceof TopologyMsg)
{
if (reconnectToTheBestRS)
{
// Reset wait time before next computation of best server
}
// Caller wants to check what's changed
if (returnOnTopoChange)
{
return msg;
}
}
{
// RS performs a proper disconnection
logger.warn(WARN_REPLICATION_SERVER_PROPERLY_DISCONNECTED, previousRsServerID, rs.replicationServer,
// Try to find a suitable RS
}
else if (msg instanceof MonitorMsg)
{
// This is the response to a MonitorRequest that was sent earlier or
// the regular message of the monitoring publisher of the RS.
// Extract and store replicas ServerStates
{
}
// Notify the sender that the response was received.
synchronized (monitorResponse)
{
monitorResponse.set(true);
}
// Update the replication servers ServerStates with new received info
{
{
}
}
/*
Now if it is allowed, compute the best replication server to see if
it is still the one we are currently connected to. If not,
disconnect properly and let the connection algorithm re-connect to
best replication server
*/
if (reconnectToTheBestRS)
{
if (mustRunBestServerCheckingAlgorithm == 2)
{
// Stable topology (no topo msg since few seconds): proceed with
// best server checking.
false, previousRsServerID, state,
if (previousRsServerID != -1
&& (bestServerInfo == null
{
// The best replication server is no more the one we are
// currently using. Disconnect properly then reconnect.
if (bestServerInfo == null)
{
}
else
{
}
if (logger.isTraceEnabled())
{
}
reStart(true);
}
// Reset wait time before next computation of best server
}
}
}
else
{
return msg;
}
}
catch (SocketTimeoutException e)
{
throw e;
}
catch (Exception e)
{
logger.traceException(e);
if (!shutdown)
{
{
// We did not initiate the close on our side, log an error message.
}
if (!reconnectOnFailure)
{
break; // does not seem necessary to explicitly disconnect ..
}
}
}
} // while !shutdown
return null;
}
/**
* Gets the States of all the Replicas currently in the Topology. When this
* method is called, a Monitoring message will be sent to the Replication
* Server to which this domain is currently connected so that it computes a
* table containing information about all Directory Servers in the topology.
* This Computation involves communications will all the servers currently
* connected and
*
* @return The States of all Replicas in the topology (except us)
*/
{
monitorResponse.set(false);
// publish Monitor Request LocalizableMessage to the Replication Server
// wait for Response up to 10 seconds.
try
{
synchronized (monitorResponse)
{
if (!monitorResponse.get())
{
}
}
} catch (InterruptedException e)
{
}
return replicaStates;
}
/**
* This method allows to do the necessary computing for the window
* management after treatment by the worker threads.
*
* This should be called once the replay thread have done their job
* and the window can be open again.
*/
public synchronized void updateWindowAfterReplay()
{
try
{
{
updateDoneCount = 0;
}
} catch (IOException e)
{
// Any error on the socket will be handled by the thread calling receive()
// just ignore.
}
}
/** Stop the server. */
public void stop()
{
{
}
synchronized (startStopLock)
{
if (shutdown)
{
return;
}
shutdown = true;
}
}
/**
* Set a timeout value.
* With this option set to a non-zero value, calls to the receive() method
* block for only this amount of time after which a
* java.net.SocketTimeoutException is raised.
* The Broker is valid and usable even after such an Exception is raised.
*
* @param timeout the specified timeout, in milliseconds.
* @throws SocketException if there is an error in the underlying protocol,
* such as a TCP error.
*/
{
{
}
}
/**
* Get the name of the replicationServer to which this broker is currently
* connected.
*
* @return the name of the replicationServer to which this domain
* is currently connected.
*/
{
}
/**
* Get the maximum receive window size.
*
* @return The maximum receive window size.
*/
public int getMaxRcvWindow()
{
return config.getWindowSize();
}
/**
* Get the current receive window size.
*
* @return The current receive window size.
*/
public int getCurrentRcvWindow()
{
return rcvWindow;
}
/**
* Get the maximum send window size.
*
* @return The maximum send window size.
*/
public int getMaxSendWindow()
{
return maxSendWindow;
}
/**
* Get the current send window size.
*
* @return The current send window size.
*/
public int getCurrentSendWindow()
{
if (isConnected())
{
return sendWindow.availablePermits();
}
return 0;
}
/**
* Get the number of times the connection was lost.
* @return The number of times the connection was lost.
*/
public int getNumLostConnections()
{
return numLostConnections;
}
/**
* Change some configuration parameters.
*
* @param newConfig The new config to use.
* @return A boolean indicating if the changes
* requires to restart the service.
*/
{
// These parameters needs to be renegotiated with the ReplicationServer
// so if they have changed, that requires restarting the session with
// the ReplicationServer.
// A new session is necessary only when information regarding
// the connection is modified
boolean needToRestartSession =
return needToRestartSession;
}
/**
* Get the version of the replication protocol.
* @return The version of the replication protocol.
*/
public short getProtocolVersion()
{
{
return session.getProtocolVersion();
}
return ProtocolVersion.getCurrentVersion();
}
/**
* Check if the broker is connected to a ReplicationServer and therefore
* ready to received and send Replication Messages.
*
* @return true if the server is connected, false if not.
*/
public boolean isConnected()
{
}
/**
* Determine whether the connection to the replication server is encrypted.
* @return true if the connection is encrypted, false otherwise.
*/
public boolean isSessionEncrypted()
{
}
/**
* Signals the RS we just entered a new status.
* @param newStatus The status the local DS just entered
*/
{
try
{
} catch (IOException ex)
{
}
}
/**
* Gets the info for DSs in the topology (except us).
* @return The info for DSs in the topology (except us)
*/
{
}
/**
* Gets the info for RSs in the topology (except the one we are connected
* to).
* @return The info for RSs in the topology (except the one we are connected
* to)
*/
{
}
{
{
}
return result;
}
/**
* Processes an incoming TopologyMsg.
* Updates the structures for the local view of the topology.
*
* @param topoMsg
* The topology information received from RS.
* @param rsServerId
* the serverId to use for the connectedDS
*/
{
{
}
}
{
do
{
}
if (logger.isTraceEnabled())
{
}
return newTopo;
}
/**
* Contains the last known state of the replication topology.
*/
static final class Topology
{
/**
* The RS's serverId that this DS was connected to when this topology state
* was computed.
*/
private final int rsServerId;
/**
* Info for other DSs.
* <p>
* Warning: does not contain info for us (for our server id)
*/
/**
* The map of replication server info initialized at connection time and
* regularly updated. This is used to decide to which best suitable
* replication server one wants to connect. Key: replication server id
* Value: replication server info for the matching replication server id
*/
private Topology()
{
this.rsServerId = -1;
}
/**
* Constructor to use when only the RSInfos need to be recomputed.
*
* @param dsInfosToKeep
* the DSInfos that will be stored as is
* @param newRSInfos
* the new RSInfos from which to compute the new topology
* @param dsServerId
* the DS serverId
* @param rsServerId
* the current connected RS serverId
* @param configuredReplicationServerUrls
* the configured replication server URLs
* @param previousRsInfos
* the RSInfos computed in the previous Topology object
*/
int dsServerId, int rsServerId,
{
this.rsServerId = rsServerId;
}
/**
* Constructor to use when a new TopologyMsg has been received.
*
* @param topoMsg
* the topology message containing the new DSInfos and RSInfos from
* which to compute the new topology
* @param dsServerId
* the DS serverId
* @param rsServerId
* the current connected RS serverId
* @param configuredReplicationServerUrls
* the configured replication server URLs
* @param previousRsInfos
* the RSInfos computed in the previous Topology object
*/
{
this.rsServerId = rsServerId;
}
int dsServerId)
{
}
{
// Update replication server info list with the received topology info
{
{
// New replication server, create info for it add it to the list
}
else
{
// Update the existing info for the replication server
}
}
// Remove any replication server that may have disappeared from the
// topology
}
/** Computes the list of DSs connected to a particular RS. */
{
if (rsServerId == rsId)
{
/*
* If we are computing connected DSs for the RS we are connected to, we
* should count the local DS as the DSInfo of the local DS is not sent
* by the replication server in the topology message. We must count
* ourselves as a connected server.
*/
}
{
{
}
}
return connectedDSs;
}
/**
* Sets the locally configured flag for the passed ReplicationServerInfo
* object, analyzing the local configuration.
*
* @param rsInfo
* the Replication server to check and update
* @param configuredReplicationServerUrls
*/
{
// Determine if the passed ReplicationServerInfo has a URL that is present
// in the locally configured replication servers
{
// The ReplicationServerInfo has been generated from a server with
// no URL in TopologyMsg (i.e: with replication protocol version < 4):
// ignore this server as we do not know how to connect to it
rsInfo.setLocallyConfigured(false);
return;
}
{
{
// This RS is locally configured, mark this
rsInfo.setLocallyConfigured(true);
return;
}
}
rsInfo.setLocallyConfigured(false);
}
/** {@inheritDoc} */
{
if (this == obj)
{
return true;
}
{
return false;
}
}
{
{
{
return false;
}
}
return true;
}
{
{
{
return false;
}
}
return true;
}
/** {@inheritDoc} */
public int hashCode()
{
final int prime = 31;
int result = 1;
return result;
}
/** {@inheritDoc} */
{
return getClass().getSimpleName()
+ " rsServerId=" + rsServerId
}
}
/**
* Check if the broker could not find any Replication Server and therefore
* connection attempt failed.
*
* @return true if the server could not connect to any Replication Server.
*/
boolean hasConnectionError()
{
return connectionError;
}
/**
* Starts publishing to the RS the current timestamp used in this server.
*/
{
// Start a CSN heartbeat thread.
if (changeTimeHeartbeatInterval > 0)
{
+ ") change time heartbeat publisher for domain \"" + getBaseDN()
}
else
{
if (logger.isTraceEnabled())
{
debugInfo("is not configured to send CSN heartbeat interval");
}
}
}
/**
* Stops publishing to the RS the current timestamp used in this server.
*/
private synchronized void stopChangeTimeHeartBeatPublishing()
{
if (ctHeartbeatPublisherThread != null)
{
}
}
/**
* Set the connectRequiresRecovery to the provided value.
* This flag is used to indicate if a recovery of Update is necessary
* after a reconnection to a RS.
* It is the responsibility of the ReplicationDomain to set it during the
* sessionInitiated phase.
*
* @param b the new value of the connectRequiresRecovery.
*/
public void setRecoveryRequired(boolean b)
{
}
/**
* Returns whether the broker is shutting down.
* @return whether the broker is shutting down.
*/
boolean shuttingDown()
{
return shutdown;
}
/**
* Returns the local address of this replication domain, or the empty string
* if it is not yet connected.
*
* @return The local address.
*/
{
}
/**
* Returns the replication monitor instance name associated with this broker.
*
* @return The replication monitor instance name.
*/
{
// Only invoked by replication domain so always non-null.
return monitor.getMonitorInstanceName();
}
{
{
// monitor name is changing, deregister before registering again
}
return newRS;
}
/**
* Must be invoked each time the session changes because, the monitor name is
* dynamically created with the session name, while monitor registration is
* static.
*
* @see #monitor
*/
private void registerReplicationMonitor()
{
// The monitor should not be registered if this is a unit test
// because the replication domain is null.
{
}
}
private void deregisterReplicationMonitor()
{
// The monitor should not be deregistered if this is a unit test
// because the replication domain is null.
{
}
}
/** {@inheritDoc} */
{
.append(", ");
}
{
}
}