001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the
004 * License.
005 *
006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
007 * specific language governing permission and limitations under the License.
008 *
009 * When distributing Covered Software, include this CDDL Header Notice in each file and include
010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
011 * Header, with the fields enclosed by brackets [] replaced by your own identifying
012 * information: "Portions Copyright [year] [name of copyright owner]".
013 *
014 * Copyright 2006-2010 Sun Microsystems, Inc.
015 * Portions Copyright 2011-2016 ForgeRock AS.
016 */
017package org.opends.server.replication.service;
018
019import java.io.IOException;
020import java.math.BigDecimal;
021import java.math.MathContext;
022import java.math.RoundingMode;
023import java.net.*;
024import java.util.*;
025import java.util.Map.Entry;
026import java.util.concurrent.ConcurrentSkipListMap;
027import java.util.concurrent.Semaphore;
028import java.util.concurrent.TimeUnit;
029import java.util.concurrent.atomic.AtomicBoolean;
030import java.util.concurrent.atomic.AtomicReference;
031
032import net.jcip.annotations.GuardedBy;
033import net.jcip.annotations.Immutable;
034
035import org.forgerock.i18n.LocalizableMessage;
036import org.forgerock.i18n.slf4j.LocalizedLogger;
037import org.forgerock.util.Utils;
038import org.opends.server.admin.std.server.ReplicationDomainCfg;
039import org.opends.server.core.DirectoryServer;
040import org.opends.server.replication.common.*;
041import org.opends.server.replication.plugin.MultimasterReplication;
042import org.opends.server.replication.protocol.*;
043import org.forgerock.opendj.ldap.DN;
044import org.opends.server.types.HostPort;
045
046import static org.opends.messages.ReplicationMessages.*;
047import static org.opends.server.replication.protocol.ProtocolVersion.*;
048import static org.opends.server.replication.server.ReplicationServer.*;
049import static org.opends.server.util.StaticUtils.*;
050
051/**
052 * The broker for Multi-master Replication.
053 */
054public class ReplicationBroker
055{
056
057  /**
058   * Immutable class containing information about whether the broker is
059   * connected to an RS and data associated to this connected RS.
060   */
061  @Immutable
062  private static final class ConnectedRS
063  {
064
065    private static final ConnectedRS NO_CONNECTED_RS = new ConnectedRS(
066        NO_CONNECTED_SERVER);
067
068    /** The info of the RS we are connected to. */
069    private final ReplicationServerInfo rsInfo;
070    /** Contains a connected session to the RS if any exist, null otherwise. */
071    private final Session session;
072    private final String replicationServer;
073
074    private ConnectedRS(String replicationServer)
075    {
076      this.rsInfo = null;
077      this.session = null;
078      this.replicationServer = replicationServer;
079    }
080
081    private ConnectedRS(ReplicationServerInfo rsInfo, Session session)
082    {
083      this.rsInfo = rsInfo;
084      this.session = session;
085      this.replicationServer = session != null ?
086          session.getReadableRemoteAddress()
087          : NO_CONNECTED_SERVER;
088    }
089
090    private static ConnectedRS stopped()
091    {
092      return new ConnectedRS("stopped");
093    }
094
095    private static ConnectedRS noConnectedRS()
096    {
097      return NO_CONNECTED_RS;
098    }
099
100    public int getServerId()
101    {
102      return rsInfo != null ? rsInfo.getServerId() : -1;
103    }
104
105    private byte getGroupId()
106    {
107      return rsInfo != null ? rsInfo.getGroupId() : -1;
108    }
109
110    private boolean isConnected()
111    {
112      return session != null;
113    }
114
115    /** {@inheritDoc} */
116    @Override
117    public String toString()
118    {
119      final StringBuilder sb = new StringBuilder();
120      toString(sb);
121      return sb.toString();
122    }
123
124    public void toString(StringBuilder sb)
125    {
126      sb.append("connected=").append(isConnected()).append(", ");
127      if (!isConnected())
128      {
129        sb.append("no connectedRS");
130      }
131      else
132      {
133        sb.append("connectedRS(serverId=").append(rsInfo.getServerId())
134          .append(", serverUrl=").append(rsInfo.getServerURL())
135          .append(", groupId=").append(rsInfo.getGroupId())
136          .append(")");
137      }
138    }
139
140  }
141  private static final LocalizedLogger logger = LocalizedLogger.getLoggerForThisClass();
142  private volatile boolean shutdown;
143  private final Object startStopLock = new Object();
144  private volatile ReplicationDomainCfg config;
145  /**
146   * String reported under CSN=monitor when there is no connected RS.
147   */
148  static final String NO_CONNECTED_SERVER = "Not connected";
149  private final ServerState state;
150  private Semaphore sendWindow;
151  private int maxSendWindow;
152  private int rcvWindow = 100;
153  private int halfRcvWindow = rcvWindow / 2;
154  private int timeout;
155  private final ReplSessionSecurity replSessionSecurity;
156  /**
157   * The RS this DS is currently connected to.
158   * <p>
159   * Always use {@link #setConnectedRS(ConnectedRS)} to set a new
160   * connected RS.
161   */
162  // @NotNull // for the reference
163  private final AtomicReference<ConnectedRS> connectedRS = new AtomicReference<>(ConnectedRS.noConnectedRS());
164  /** Our replication domain. */
165  private final ReplicationDomain domain;
166  /**
167   * This object is used as a conditional event to be notified about
168   * the reception of monitor information from the Replication Server.
169   */
170  private final AtomicBoolean monitorResponse = new AtomicBoolean(false);
171  /**
172   * A Map containing the ServerStates of all the replicas in the topology
173   * as seen by the ReplicationServer the last time it was polled or the last
174   * time it published monitoring information.
175   */
176  private Map<Integer, ServerState> replicaStates = new HashMap<>();
177  /** A thread to monitor heartbeats on the session. */
178  private HeartbeatMonitor heartbeatMonitor;
179  /** The number of times the connection was lost. */
180  private int numLostConnections;
181  /**
182   * When the broker cannot connect to any replication server
183   * it log an error and keeps continuing every second.
184   * This boolean is set when the first failure happens and is used
185   * to avoid repeating the error message for further failure to connect
186   * and to know that it is necessary to print a new message when the broker
187   * finally succeed to connect.
188   */
189  private volatile boolean connectionError;
190  private final Object connectPhaseLock = new Object();
191  /**
192   * The thread that publishes messages to the RS containing the current
193   * change time of this DS.
194   */
195  private CTHeartbeatPublisherThread ctHeartbeatPublisherThread;
196  /*
197   * Properties for the last topology info received from the network.
198   */
199  /** Contains the last known state of the replication topology. */
200  private final AtomicReference<Topology> topology = new AtomicReference<>(new Topology());
201  @GuardedBy("this")
202  private volatile int updateDoneCount;
203  private volatile boolean connectRequiresRecovery;
204
205  /**
206   * This integer defines when the best replication server checking algorithm
207   * should be engaged.
208   * Every time a monitoring message (each monitoring publisher period) is
209   * received, it is incremented. When it reaches 2, we run the checking
210   * algorithm to see if we must reconnect to another best replication server.
211   * Then we reset the value to 0. But when a topology message is received, the
212   * integer is reset to 0. This ensures that we wait at least one monitoring
213   * publisher period before running the algorithm, but also that we wait at
214   * least for a monitoring period after the last received topology message
215   * (topology stabilization).
216   */
217  private int mustRunBestServerCheckingAlgorithm;
218
219  /**
220   * The monitor provider for this replication domain.
221   * <p>
222   * The name of the monitor includes the local address and must therefore be
223   * re-registered every time the session is re-established or destroyed. The
224   * monitor provider can only be created (i.e. non-null) if there is a
225   * replication domain, which is not the case in unit tests.
226   */
227  private final ReplicationMonitor monitor;
228
229  /**
230   * Creates a new ReplicationServer Broker for a particular ReplicationDomain.
231   *
232   * @param replicationDomain The replication domain that is creating us.
233   * @param state The ServerState that should be used by this broker
234   *        when negotiating the session with the replicationServer.
235   * @param config The configuration to use.
236   * @param replSessionSecurity The session security configuration.
237   */
238  public ReplicationBroker(ReplicationDomain replicationDomain,
239      ServerState state, ReplicationDomainCfg config,
240      ReplSessionSecurity replSessionSecurity)
241  {
242    this.domain = replicationDomain;
243    this.state = state;
244    this.config = config;
245    this.replSessionSecurity = replSessionSecurity;
246    this.rcvWindow = getMaxRcvWindow();
247    this.halfRcvWindow = rcvWindow / 2;
248    this.shutdown = true;
249
250    /*
251     * Only create a monitor if there is a replication domain (this is not the
252     * case in some unit tests).
253     */
254    this.monitor = replicationDomain != null ? new ReplicationMonitor(
255        replicationDomain) : null;
256    registerReplicationMonitor();
257  }
258
259  /**
260   * Start the ReplicationBroker.
261   */
262  public void start()
263  {
264    synchronized (startStopLock)
265    {
266      if (!shutdown)
267      {
268        return;
269      }
270      shutdown = false;
271      this.rcvWindow = getMaxRcvWindow();
272      connectAsDataServer();
273    }
274  }
275
276  /**
277   * Gets the group id of the RS we are connected to.
278   * @return The group id of the RS we are connected to
279   */
280  public byte getRsGroupId()
281  {
282    return connectedRS.get().getGroupId();
283  }
284
285  /**
286   * Gets the server id of the RS we are connected to.
287   * @return The server id of the RS we are connected to
288   */
289  public int getRsServerId()
290  {
291    return connectedRS.get().getServerId();
292  }
293
294  /**
295   * Gets the server id.
296   * @return The server id
297   */
298  public int getServerId()
299  {
300    return config.getServerId();
301  }
302
303  private DN getBaseDN()
304  {
305    return config.getBaseDN();
306  }
307
308  private Set<String> getReplicationServerUrls()
309  {
310    return config.getReplicationServer();
311  }
312
313  private byte getGroupId()
314  {
315    return (byte) config.getGroupId();
316  }
317
318  /**
319   * Gets the server id.
320   * @return The server id
321   */
322  private long getGenerationID()
323  {
324    return domain.getGenerationID();
325  }
326
327  /**
328   * Set the generation id - for test purpose.
329   * @param generationID The generation id
330   */
331  public void setGenerationID(long generationID)
332  {
333    domain.setGenerationID(generationID);
334  }
335
336  /**
337   * Compares 2 replication servers addresses and returns true if they both
338   * represent the same replication server instance.
339   * @param rs1Url Replication server 1 address
340   * @param rs2Url Replication server 2 address
341   * @return True if both replication server addresses represent the same
342   * replication server instance, false otherwise.
343   */
344  private static boolean isSameReplicationServerUrl(String rs1Url,
345      String rs2Url)
346  {
347    try
348    {
349      final HostPort hp1 = HostPort.valueOf(rs1Url);
350      final HostPort hp2 = HostPort.valueOf(rs2Url);
351      return hp1.isEquivalentTo(hp2);
352    }
353    catch (RuntimeException ex)
354    {
355      // Not a RS url or not a valid port number: should not happen
356      return false;
357    }
358  }
359
360  /**
361   * Bag class for keeping info we get from a replication server in order to
362   * compute the best one to connect to. This is in fact a wrapper to a
363   * ReplServerStartMsg (V3) or a ReplServerStartDSMsg (V4). This can also be
364   * updated with a info coming from received topology messages or monitoring
365   * messages.
366   */
367  static class ReplicationServerInfo
368  {
369    private RSInfo rsInfo;
370    private final short protocolVersion;
371    private final DN baseDN;
372    private final int windowSize;
373    // @NotNull
374    private final ServerState serverState;
375    private final boolean sslEncryption;
376    private final int degradedStatusThreshold;
377    /** Keeps the 0 value if created with a ReplServerStartMsg. */
378    private int connectedDSNumber;
379    // @NotNull
380    private Set<Integer> connectedDSs;
381    /**
382     * Is this RS locally configured? (the RS is recognized as a usable server).
383     */
384    private boolean locallyConfigured = true;
385
386    /**
387     * Create a new instance of ReplicationServerInfo wrapping the passed
388     * message.
389     * @param msg LocalizableMessage to wrap.
390     * @param newServerURL Override serverURL.
391     * @return The new instance wrapping the passed message.
392     * @throws IllegalArgumentException If the passed message has an unexpected
393     *                                  type.
394     */
395    private static ReplicationServerInfo newInstance(
396      ReplicationMsg msg, String newServerURL) throws IllegalArgumentException
397    {
398      final ReplicationServerInfo rsInfo = newInstance(msg);
399      rsInfo.setServerURL(newServerURL);
400      return rsInfo;
401    }
402
403    /**
404     * Create a new instance of ReplicationServerInfo wrapping the passed
405     * message.
406     * @param msg LocalizableMessage to wrap.
407     * @return The new instance wrapping the passed message.
408     * @throws IllegalArgumentException If the passed message has an unexpected
409     *                                  type.
410     */
411    static ReplicationServerInfo newInstance(ReplicationMsg msg)
412        throws IllegalArgumentException
413    {
414      if (msg instanceof ReplServerStartMsg)
415      {
416        // RS uses protocol V3 or lower
417        return new ReplicationServerInfo((ReplServerStartMsg) msg);
418      }
419      else if (msg instanceof ReplServerStartDSMsg)
420      {
421        // RS uses protocol V4 or higher
422        return new ReplicationServerInfo((ReplServerStartDSMsg) msg);
423      }
424
425      // Unsupported message type: should not happen
426      throw new IllegalArgumentException("Unexpected PDU type: "
427          + msg.getClass().getName() + ":\n" + msg);
428    }
429
430    /**
431     * Constructs a ReplicationServerInfo object wrapping a
432     * {@link ReplServerStartMsg}.
433     *
434     * @param msg
435     *          The {@link ReplServerStartMsg} this object will wrap.
436     */
437    private ReplicationServerInfo(ReplServerStartMsg msg)
438    {
439      this.protocolVersion = msg.getVersion();
440      this.rsInfo = new RSInfo(msg.getServerId(), msg.getServerURL(),
441          msg.getGenerationId(), msg.getGroupId(), 1);
442      this.baseDN = msg.getBaseDN();
443      this.windowSize = msg.getWindowSize();
444      final ServerState ss = msg.getServerState();
445      this.serverState = ss != null ? ss : new ServerState();
446      this.sslEncryption = msg.getSSLEncryption();
447      this.degradedStatusThreshold = msg.getDegradedStatusThreshold();
448    }
449
450    /**
451     * Constructs a ReplicationServerInfo object wrapping a
452     * {@link ReplServerStartDSMsg}.
453     *
454     * @param msg
455     *          The {@link ReplServerStartDSMsg} this object will wrap.
456     */
457    private ReplicationServerInfo(ReplServerStartDSMsg msg)
458    {
459      this.rsInfo = new RSInfo(msg.getServerId(), msg.getServerURL(),
460          msg.getGenerationId(), msg.getGroupId(), msg.getWeight());
461      this.protocolVersion = msg.getVersion();
462      this.baseDN = msg.getBaseDN();
463      this.windowSize = msg.getWindowSize();
464      final ServerState ss = msg.getServerState();
465      this.serverState = ss != null ? ss : new ServerState();
466      this.sslEncryption = msg.getSSLEncryption();
467      this.degradedStatusThreshold = msg.getDegradedStatusThreshold();
468      this.connectedDSNumber = msg.getConnectedDSNumber();
469    }
470
471    /**
472     * Constructs a new replication server info with the passed RSInfo internal
473     * values and the passed connected DSs.
474     *
475     * @param rsInfo
476     *          The RSinfo to use for the update
477     * @param connectedDSs
478     *          The new connected DSs
479     */
480    ReplicationServerInfo(RSInfo rsInfo, Set<Integer> connectedDSs)
481    {
482      this.rsInfo =
483          new RSInfo(rsInfo.getId(), rsInfo.getServerUrl(), rsInfo
484              .getGenerationId(), rsInfo.getGroupId(), rsInfo.getWeight());
485      this.protocolVersion = 0;
486      this.baseDN = null;
487      this.windowSize = 0;
488      this.connectedDSs = connectedDSs;
489      this.connectedDSNumber = connectedDSs.size();
490      this.sslEncryption = false;
491      this.degradedStatusThreshold = -1;
492      this.serverState = new ServerState();
493    }
494
495    /**
496     * Get the server state.
497     * @return The server state
498     */
499    public ServerState getServerState()
500    {
501      return serverState;
502    }
503
504    /**
505     * Get the group id.
506     * @return The group id
507     */
508    public byte getGroupId()
509    {
510      return rsInfo.getGroupId();
511    }
512
513    /**
514     * Get the server protocol version.
515     * @return the protocolVersion
516     */
517    public short getProtocolVersion()
518    {
519      return protocolVersion;
520    }
521
522    /**
523     * Get the generation id.
524     * @return the generationId
525     */
526    public long getGenerationId()
527    {
528      return rsInfo.getGenerationId();
529    }
530
531    /**
532     * Get the server id.
533     * @return the serverId
534     */
535    public int getServerId()
536    {
537      return rsInfo.getId();
538    }
539
540    /**
541     * Get the server URL.
542     * @return the serverURL
543     */
544    public String getServerURL()
545    {
546      return rsInfo.getServerUrl();
547    }
548
549    /**
550     * Get the base DN.
551     *
552     * @return the base DN
553     */
554    public DN getBaseDN()
555    {
556      return baseDN;
557    }
558
559    /**
560     * Get the window size.
561     * @return the windowSize
562     */
563    public int getWindowSize()
564    {
565      return windowSize;
566    }
567
568    /**
569     * Get the ssl encryption.
570     * @return the sslEncryption
571     */
572    public boolean isSslEncryption()
573    {
574      return sslEncryption;
575    }
576
577    /**
578     * Get the degraded status threshold.
579     * @return the degradedStatusThreshold
580     */
581    public int getDegradedStatusThreshold()
582    {
583      return degradedStatusThreshold;
584    }
585
586    /**
587     * Get the weight.
588     * @return the weight. Null if this object is a wrapper for
589     * a ReplServerStartMsg.
590     */
591    public int getWeight()
592    {
593      return rsInfo.getWeight();
594    }
595
596    /**
597     * Get the connected DS number.
598     * @return the connectedDSNumber. Null if this object is a wrapper for
599     * a ReplServerStartMsg.
600     */
601    public int getConnectedDSNumber()
602    {
603      return connectedDSNumber;
604    }
605
606    /**
607     * Converts the object to a RSInfo object.
608     * @return The RSInfo object matching this object.
609     */
610    RSInfo toRSInfo()
611    {
612      return rsInfo;
613    }
614
615    /**
616     * Updates replication server info with the passed RSInfo internal values
617     * and the passed connected DSs.
618     * @param rsInfo The RSinfo to use for the update
619     * @param connectedDSs The new connected DSs
620     */
621    private void update(RSInfo rsInfo, Set<Integer> connectedDSs)
622    {
623      this.rsInfo = new RSInfo(this.rsInfo.getId(), this.rsInfo.getServerUrl(),
624          rsInfo.getGenerationId(), rsInfo.getGroupId(), rsInfo.getWeight());
625      this.connectedDSs = connectedDSs;
626      this.connectedDSNumber = connectedDSs.size();
627    }
628
629    private void setServerURL(String newServerURL)
630    {
631      rsInfo = new RSInfo(rsInfo.getId(), newServerURL,
632          rsInfo.getGenerationId(), rsInfo.getGroupId(), rsInfo.getWeight());
633    }
634
635    /**
636     * Updates replication server info with the passed server state.
637     * @param serverState The ServerState to use for the update
638     */
639    private void update(ServerState serverState)
640    {
641      this.serverState.update(serverState);
642    }
643
644    /**
645     * Get the getConnectedDSs.
646     * @return the getConnectedDSs
647     */
648    public Set<Integer> getConnectedDSs()
649    {
650      return connectedDSs;
651    }
652
653    /**
654     * Gets the locally configured status for this RS.
655     * @return the locallyConfigured
656     */
657    public boolean isLocallyConfigured()
658    {
659      return locallyConfigured;
660    }
661
662    /**
663     * Sets the locally configured status for this RS.
664     * @param locallyConfigured the locallyConfigured to set
665     */
666    public void setLocallyConfigured(boolean locallyConfigured)
667    {
668      this.locallyConfigured = locallyConfigured;
669    }
670
671    /**
672     * Returns a string representation of this object.
673     * @return A string representation of this object.
674     */
675    @Override
676    public String toString()
677    {
678      return "ReplServerInfo Url:" + getServerURL()
679          + " ServerId:" + getServerId()
680          + " GroupId:" + getGroupId()
681          + " connectedDSs:" + connectedDSs;
682    }
683  }
684
685  /**
686   * Contacts all replication servers to get information from them and being
687   * able to choose the more suitable.
688   * @return the collected information.
689   */
690  private Map<Integer, ReplicationServerInfo> collectReplicationServersInfo()
691  {
692    final Map<Integer, ReplicationServerInfo> rsInfos = new ConcurrentSkipListMap<>();
693
694    for (String serverUrl : getReplicationServerUrls())
695    {
696      // Connect to server + get and store info about it
697      final ConnectedRS rs = performPhaseOneHandshake(serverUrl, false);
698      final ReplicationServerInfo rsInfo = rs.rsInfo;
699      if (rsInfo != null)
700      {
701        rsInfos.put(rsInfo.getServerId(), rsInfo);
702      }
703    }
704
705    return rsInfos;
706  }
707
708  /**
709   * Connect to a ReplicationServer.
710   *
711   * Handshake sequences between a DS and a RS is divided into 2 logical
712   * consecutive phases (phase 1 and phase 2). DS always initiates connection
713   * and always sends first message:
714   *
715   * DS<->RS:
716   * -------
717   *
718   * phase 1:
719   * DS --- ServerStartMsg ---> RS
720   * DS <--- ReplServerStartDSMsg --- RS
721   * phase 2:
722   * DS --- StartSessionMsg ---> RS
723   * DS <--- TopologyMsg --- RS
724   *
725   * Before performing a full handshake sequence, DS searches for best suitable
726   * RS by making only phase 1 handshake to every RS he knows then closing
727   * connection. This allows to gather information on available RSs and then
728   * decide with which RS the full handshake (phase 1 then phase 2) will be
729   * finally performed.
730   *
731   * @throws NumberFormatException address was invalid
732   */
733  private void connectAsDataServer()
734  {
735    /*
736     * If a first connect or a connection failure occur, we go through here.
737     * force status machine to NOT_CONNECTED_STATUS so that monitoring can see
738     * that we are not connected.
739     */
740    domain.toNotConnectedStatus();
741
742    /*
743    Stop any existing heartbeat monitor and changeTime publisher
744    from a previous session.
745    */
746    stopRSHeartBeatMonitoring();
747    stopChangeTimeHeartBeatPublishing();
748    mustRunBestServerCheckingAlgorithm = 0;
749
750    synchronized (connectPhaseLock)
751    {
752      final int serverId = getServerId();
753      final DN baseDN = getBaseDN();
754
755      /*
756       * Connect to each replication server and get their ServerState then find
757       * out which one is the best to connect to.
758       */
759      if (logger.isTraceEnabled())
760      {
761        debugInfo("phase 1 : will perform PhaseOneH with each RS in order to elect the preferred one");
762      }
763
764      // Get info from every available replication servers
765      Map<Integer, ReplicationServerInfo> rsInfos =
766          collectReplicationServersInfo();
767      computeNewTopology(toRSInfos(rsInfos));
768
769      if (rsInfos.isEmpty())
770      {
771        setConnectedRS(ConnectedRS.noConnectedRS());
772      }
773      else
774      {
775        // At least one server answered, find the best one.
776        RSEvaluations evals = computeBestReplicationServer(true, -1, state,
777            rsInfos, serverId, getGroupId(), getGenerationID());
778
779        // Best found, now initialize connection to this one (handshake phase 1)
780        if (logger.isTraceEnabled())
781        {
782          debugInfo("phase 2 : will perform PhaseOneH with the preferred RS=" + evals.getBestRS());
783        }
784
785        final ConnectedRS electedRS = performPhaseOneHandshake(
786            evals.getBestRS().getServerURL(), true);
787        final ReplicationServerInfo electedRsInfo = electedRS.rsInfo;
788        if (electedRsInfo != null)
789        {
790          /*
791          Update replication server info with potentially more up to date
792          data (server state for instance may have changed)
793          */
794          rsInfos.put(electedRsInfo.getServerId(), electedRsInfo);
795
796          // Handshake phase 1 exchange went well
797
798          // Compute in which status we are starting the session to tell the RS
799          final ServerStatus initStatus = computeInitialServerStatus(
800              electedRsInfo.getGenerationId(), electedRsInfo.getServerState(),
801              electedRsInfo.getDegradedStatusThreshold(), getGenerationID());
802
803          // Perform session start (handshake phase 2)
804          final TopologyMsg topologyMsg =
805              performPhaseTwoHandshake(electedRS, initStatus);
806
807          if (topologyMsg != null) // Handshake phase 2 exchange went well
808          {
809            connectToReplicationServer(electedRS, initStatus, topologyMsg);
810          } // Could perform handshake phase 2 with best
811        } // Could perform handshake phase 1 with best
812      }
813
814      // connectedRS has been updated by calls above, reload it
815      final ConnectedRS rs = connectedRS.get();
816      if (rs.isConnected())
817      {
818        connectPhaseLock.notify();
819
820        final long rsGenId = rs.rsInfo.getGenerationId();
821        final int rsServerId = rs.rsInfo.getServerId();
822        if (rsGenId == getGenerationID() || rsGenId == -1)
823        {
824          logger.info(NOTE_NOW_FOUND_SAME_GENERATION_CHANGELOG, serverId, rsServerId, baseDN,
825              rs.replicationServer, getGenerationID());
826        }
827        else
828        {
829          logger.warn(WARN_NOW_FOUND_BAD_GENERATION_CHANGELOG, serverId, rsServerId, baseDN,
830              rs.replicationServer, getGenerationID(), rsGenId);
831        }
832      }
833      else
834      {
835         // This server could not find any replicationServer.
836         // It's going to start in degraded mode. Log a message.
837        if (!connectionError)
838        {
839          connectionError = true;
840          connectPhaseLock.notify();
841
842          if (!rsInfos.isEmpty())
843          {
844            logger.warn(WARN_COULD_NOT_FIND_CHANGELOG, serverId, baseDN,
845                Utils.joinAsString(", ", rsInfos.keySet()));
846          }
847          else
848          {
849            logger.warn(WARN_NO_AVAILABLE_CHANGELOGS, serverId, baseDN);
850          }
851        }
852      }
853    }
854  }
855
856  private void computeNewTopology(List<RSInfo> newRSInfos)
857  {
858    final int rsServerId = getRsServerId();
859
860    Topology oldTopo;
861    Topology newTopo;
862    do
863    {
864      oldTopo = topology.get();
865      newTopo = new Topology(oldTopo.replicaInfos, newRSInfos, getServerId(),
866          rsServerId, getReplicationServerUrls(), oldTopo.rsInfos);
867    }
868    while (!topology.compareAndSet(oldTopo, newTopo));
869
870    if (logger.isTraceEnabled())
871    {
872      debugInfo(topologyChange(rsServerId, oldTopo, newTopo));
873    }
874  }
875
876  private StringBuilder topologyChange(int rsServerId, Topology oldTopo,
877      Topology newTopo)
878  {
879    final StringBuilder sb = new StringBuilder();
880    sb.append("rsServerId=").append(rsServerId);
881    if (newTopo.equals(oldTopo))
882    {
883      sb.append(", unchangedTopology=").append(newTopo);
884    }
885    else
886    {
887      sb.append(", oldTopology=").append(oldTopo);
888      sb.append(", newTopology=").append(newTopo);
889    }
890    return sb;
891  }
892
893  /**
894   * Connects to a replication server.
895   *
896   * @param rs
897   *          the Replication Server to connect to
898   * @param initStatus
899   *          The status to enter the state machine with
900   * @param topologyMsg
901   *          the message containing the topology information
902   */
903  private void connectToReplicationServer(ConnectedRS rs,
904      ServerStatus initStatus, TopologyMsg topologyMsg)
905  {
906    final DN baseDN = getBaseDN();
907    final ReplicationServerInfo rsInfo = rs.rsInfo;
908
909    boolean connectCompleted = false;
910    try
911    {
912      maxSendWindow = rsInfo.getWindowSize();
913
914      receiveTopo(topologyMsg, rs.getServerId());
915
916      /*
917      Log a message to let the administrator know that the failure was resolved.
918      Wake up all the thread that were waiting on the window
919      on the previous connection.
920      */
921      connectionError = false;
922      if (sendWindow != null)
923      {
924        /*
925         * Fix (hack) for OPENDJ-401: we want to ensure that no threads holding
926         * this semaphore will get blocked when they acquire it. However, we
927         * also need to make sure that we don't overflow the semaphore by
928         * releasing too many permits.
929         */
930        final int MAX_PERMITS = Integer.MAX_VALUE >>> 2;
931        if (sendWindow.availablePermits() < MAX_PERMITS)
932        {
933          /*
934           * At least 2^29 acquisitions would need to occur for this to be
935           * insufficient. In addition, at least 2^30 releases would need to
936           * occur for this to potentially overflow. Hopefully this is unlikely
937           * to happen.
938           */
939          sendWindow.release(MAX_PERMITS);
940        }
941      }
942      sendWindow = new Semaphore(maxSendWindow);
943      rcvWindow = getMaxRcvWindow();
944
945      domain.sessionInitiated(initStatus, rsInfo.getServerState());
946
947      final byte groupId = getGroupId();
948      if (rs.getGroupId() != groupId)
949      {
950        /*
951        Connected to replication server with wrong group id:
952        warn user and start heartbeat monitor to recover when a server
953        with the right group id shows up.
954        */
955        logger.warn(WARN_CONNECTED_TO_SERVER_WITH_WRONG_GROUP_ID,
956            groupId, rs.getServerId(), rsInfo.getServerURL(), rs.getGroupId(), baseDN, getServerId());
957      }
958      startRSHeartBeatMonitoring(rs);
959      if (rsInfo.getProtocolVersion() >=
960        ProtocolVersion.REPLICATION_PROTOCOL_V3)
961      {
962        startChangeTimeHeartBeatPublishing(rs);
963      }
964      connectCompleted = true;
965    }
966    catch (Exception e)
967    {
968      logger.error(ERR_COMPUTING_FAKE_OPS, baseDN, rsInfo.getServerURL(),
969          e.getLocalizedMessage() + " " + stackTraceToSingleLineString(e));
970    }
971    finally
972    {
973      if (!connectCompleted)
974      {
975        setConnectedRS(ConnectedRS.noConnectedRS());
976      }
977    }
978  }
979
980  /**
981   * Determines the status we are starting with according to our state and the
982   * RS state.
983   *
984   * @param rsGenId The generation id of the RS
985   * @param rsState The server state of the RS
986   * @param degradedStatusThreshold The degraded status threshold of the RS
987   * @param dsGenId The local generation id
988   * @return The initial status
989   */
990  private ServerStatus computeInitialServerStatus(long rsGenId,
991    ServerState rsState, int degradedStatusThreshold, long dsGenId)
992  {
993    if (rsGenId == -1)
994    {
995      // RS has no generation id
996      return ServerStatus.NORMAL_STATUS;
997    }
998    else if (rsGenId != dsGenId)
999    {
1000      // DS and RS do not have same generation id
1001      return ServerStatus.BAD_GEN_ID_STATUS;
1002    }
1003    else
1004    {
1005      /*
1006      DS and RS have same generation id
1007
1008      Determine if we are late or not to replay changes. RS uses a
1009      threshold value for pending changes to be replayed by a DS to
1010      determine if the DS is in normal status or in degraded status.
1011      Let's compare the local and remote server state using  this threshold
1012      value to determine if we are late or not
1013      */
1014
1015      int nChanges = ServerState.diffChanges(rsState, state);
1016      if (logger.isTraceEnabled())
1017      {
1018        debugInfo("computed " + nChanges + " changes late.");
1019      }
1020
1021      /*
1022      Check status to know if it is relevant to change the status. Do not
1023      take RSD lock to test. If we attempt to change the status whereas
1024      we are in a status that do not allows that, this will be noticed by
1025      the changeStatusFromStatusAnalyzer method. This allows to take the
1026      lock roughly only when needed versus every sleep time timeout.
1027      */
1028      if (degradedStatusThreshold > 0 && nChanges >= degradedStatusThreshold)
1029      {
1030        return ServerStatus.DEGRADED_STATUS;
1031      }
1032      // degradedStatusThreshold value of '0' means no degrading system used
1033      // (no threshold): force normal status
1034      return ServerStatus.NORMAL_STATUS;
1035    }
1036  }
1037
1038
1039
1040  /**
1041   * Connect to the provided server performing the first phase handshake (start
1042   * messages exchange) and return the reply message from the replication
1043   * server, wrapped in a ReplicationServerInfo object.
1044   *
1045   * @param serverURL
1046   *          Server to connect to.
1047   * @param keepSession
1048   *          Do we keep session opened or not after handshake. Use true if want
1049   *          to perform handshake phase 2 with the same session and keep the
1050   *          session to create as the current one.
1051   * @return The answer from the server . Null if could not get an answer.
1052   */
1053  private ConnectedRS performPhaseOneHandshake(String serverURL, boolean keepSession)
1054  {
1055    Session newSession = null;
1056    Socket socket = null;
1057    boolean hasConnected = false;
1058    LocalizableMessage errorMessage = null;
1059
1060    try
1061    {
1062      // Open a socket connection to the next candidate.
1063      socket = new Socket();
1064      socket.setReceiveBufferSize(1000000);
1065      socket.setTcpNoDelay(true);
1066      if (config.getSourceAddress() != null)
1067      {
1068        InetSocketAddress local = new InetSocketAddress(config.getSourceAddress(), 0);
1069        socket.bind(local);
1070      }
1071      int timeoutMS = MultimasterReplication.getConnectionTimeoutMS();
1072      socket.connect(HostPort.valueOf(serverURL).toInetSocketAddress(), timeoutMS);
1073      newSession = replSessionSecurity.createClientSession(socket, timeoutMS);
1074      boolean isSslEncryption = replSessionSecurity.isSslEncryption();
1075
1076      // Send our ServerStartMsg.
1077      final HostPort hp = new HostPort(
1078          socket.getLocalAddress().getHostName(), socket.getLocalPort());
1079      final String url = hp.toString();
1080      final StartMsg serverStartMsg = new ServerStartMsg(getServerId(), url, getBaseDN(),
1081          getMaxRcvWindow(), config.getHeartbeatInterval(), state,
1082          getGenerationID(), isSslEncryption, getGroupId());
1083      newSession.publish(serverStartMsg);
1084
1085      // Read the ReplServerStartMsg or ReplServerStartDSMsg that should
1086      // come back.
1087      ReplicationMsg msg = newSession.receive();
1088      if (logger.isTraceEnabled())
1089      {
1090        debugInfo("RB HANDSHAKE SENT:\n" + serverStartMsg + "\nAND RECEIVED:\n"
1091            + msg);
1092      }
1093
1094      // Wrap received message in a server info object
1095      final ReplicationServerInfo replServerInfo =
1096          ReplicationServerInfo.newInstance(msg, serverURL);
1097
1098      // Sanity check
1099      final DN repDN = replServerInfo.getBaseDN();
1100      if (!getBaseDN().equals(repDN))
1101      {
1102        errorMessage = ERR_DS_DN_DOES_NOT_MATCH.get(repDN, getBaseDN());
1103        return setConnectedRS(ConnectedRS.noConnectedRS());
1104      }
1105
1106      /*
1107       * We have sent our own protocol version to the replication server. The
1108       * replication server will use the same one (or an older one if it is an
1109       * old replication server).
1110       */
1111      newSession.setProtocolVersion(
1112          getCompatibleVersion(replServerInfo.getProtocolVersion()));
1113
1114      if (!isSslEncryption)
1115      {
1116        newSession.stopEncryption();
1117      }
1118
1119      hasConnected = true;
1120
1121      if (keepSession)
1122      {
1123        // cannot store it yet,
1124        // only store after a successful phase two handshake
1125        return new ConnectedRS(replServerInfo, newSession);
1126      }
1127      return new ConnectedRS(replServerInfo, null);
1128    }
1129    catch (ConnectException e)
1130    {
1131      logger.traceException(e);
1132      errorMessage = WARN_NO_CHANGELOG_SERVER_LISTENING.get(getServerId(), serverURL, getBaseDN());
1133    }
1134    catch (SocketTimeoutException e)
1135    {
1136      logger.traceException(e);
1137      errorMessage = WARN_TIMEOUT_CONNECTING_TO_RS.get(getServerId(), serverURL, getBaseDN());
1138    }
1139    catch (Exception e)
1140    {
1141      logger.traceException(e);
1142      errorMessage = WARN_EXCEPTION_STARTING_SESSION_PHASE.get(
1143          getServerId(), serverURL, getBaseDN(), stackTraceToSingleLineString(e));
1144    }
1145    finally
1146    {
1147      if (!hasConnected || !keepSession)
1148      {
1149        close(newSession);
1150        close(socket);
1151      }
1152
1153      if (!hasConnected && errorMessage != null && !connectionError)
1154      {
1155        // There was no server waiting on this host:port
1156        // Log a notice and will try the next replicationServer in the list
1157        if (keepSession) // Log error message only for final connection
1158        {
1159          // log the error message only once to avoid overflowing the error log
1160          logger.error(errorMessage);
1161        }
1162
1163        logger.trace(errorMessage);
1164      }
1165    }
1166    return setConnectedRS(ConnectedRS.noConnectedRS());
1167  }
1168
1169  /**
1170   * Performs the second phase handshake (send StartSessionMsg and receive
1171   * TopologyMsg messages exchange) and return the reply message from the
1172   * replication server.
1173   *
1174   * @param electedRS Server we are connecting with.
1175   * @param initStatus The status we are starting with
1176   * @return The ReplServerStartMsg the server replied. Null if could not
1177   *         get an answer.
1178   */
1179  private TopologyMsg performPhaseTwoHandshake(ConnectedRS electedRS,
1180    ServerStatus initStatus)
1181  {
1182    try
1183    {
1184      // Send our StartSessionMsg.
1185      final StartSessionMsg startSessionMsg;
1186      startSessionMsg = new StartSessionMsg(
1187          initStatus,
1188          domain.getRefUrls(),
1189          domain.isAssured(),
1190          domain.getAssuredMode(),
1191          domain.getAssuredSdLevel());
1192      startSessionMsg.setEclIncludes(
1193          domain.getEclIncludes(domain.getServerId()),
1194          domain.getEclIncludesForDeletes(domain.getServerId()));
1195      final Session session = electedRS.session;
1196      session.publish(startSessionMsg);
1197
1198      // Read the TopologyMsg that should come back.
1199      final TopologyMsg topologyMsg = (TopologyMsg) session.receive();
1200
1201      if (logger.isTraceEnabled())
1202      {
1203        debugInfo("RB HANDSHAKE SENT:\n" + startSessionMsg
1204            + "\nAND RECEIVED:\n" + topologyMsg);
1205      }
1206
1207      // Alright set the timeout to the desired value
1208      session.setSoTimeout(timeout);
1209      setConnectedRS(electedRS);
1210      return topologyMsg;
1211    }
1212    catch (Exception e)
1213    {
1214      logger.error(WARN_EXCEPTION_STARTING_SESSION_PHASE,
1215          getServerId(), electedRS.rsInfo.getServerURL(), getBaseDN(), stackTraceToSingleLineString(e));
1216
1217      setConnectedRS(ConnectedRS.noConnectedRS());
1218      return null;
1219    }
1220  }
1221
1222  /**
1223   * Class holding evaluation results for electing the best replication server
1224   * for the local directory server.
1225   */
1226  static class RSEvaluations
1227  {
1228    private final int localServerId;
1229    private Map<Integer, ReplicationServerInfo> bestRSs;
1230    private final Map<Integer, LocalizableMessage> rsEvals = new HashMap<>();
1231
1232    /**
1233     * Ctor.
1234     *
1235     * @param localServerId
1236     *          the serverId for the local directory server
1237     * @param rsInfos
1238     *          a Map of serverId => {@link ReplicationServerInfo} with all the
1239     *          candidate replication servers
1240     */
1241    RSEvaluations(int localServerId,
1242        Map<Integer, ReplicationServerInfo> rsInfos)
1243    {
1244      this.localServerId = localServerId;
1245      this.bestRSs = rsInfos;
1246    }
1247
1248    private boolean keepBest(LocalEvaluation eval)
1249    {
1250      if (eval.hasAcceptedAny())
1251      {
1252        bestRSs = eval.getAccepted();
1253        rsEvals.putAll(eval.getRejected());
1254        return true;
1255      }
1256      return false;
1257    }
1258
1259    /**
1260     * Sets the elected best replication server, rejecting all the other
1261     * replication servers with the supplied evaluation.
1262     *
1263     * @param bestRsId
1264     *          the serverId of the elected replication server
1265     * @param rejectedRSsEval
1266     *          the evaluation for all the rejected replication servers
1267     */
1268    private void setBestRS(int bestRsId, LocalizableMessage rejectedRSsEval)
1269    {
1270      for (Iterator<Entry<Integer, ReplicationServerInfo>> it =
1271          this.bestRSs.entrySet().iterator(); it.hasNext();)
1272      {
1273        final Entry<Integer, ReplicationServerInfo> entry = it.next();
1274        final Integer rsId = entry.getKey();
1275        final ReplicationServerInfo rsInfo = entry.getValue();
1276        if (rsInfo.getServerId() != bestRsId)
1277        {
1278          it.remove();
1279        }
1280        rsEvals.put(rsId, rejectedRSsEval);
1281      }
1282    }
1283
1284    private void discardAll(LocalizableMessage eval)
1285    {
1286      for (Integer rsId : bestRSs.keySet())
1287      {
1288        rsEvals.put(rsId, eval);
1289      }
1290    }
1291
1292    private boolean foundBestRS()
1293    {
1294      return bestRSs.size() == 1;
1295    }
1296
1297    /**
1298     * Returns the {@link ReplicationServerInfo} for the best replication
1299     * server.
1300     *
1301     * @return the {@link ReplicationServerInfo} for the best replication server
1302     */
1303    ReplicationServerInfo getBestRS()
1304    {
1305      if (foundBestRS())
1306      {
1307        return bestRSs.values().iterator().next();
1308      }
1309      return null;
1310    }
1311
1312    /**
1313     * Returns the evaluations for all the candidate replication servers.
1314     *
1315     * @return a Map of serverId => LocalizableMessage containing the evaluation for each
1316     *         candidate replication servers.
1317     */
1318    Map<Integer, LocalizableMessage> getEvaluations()
1319    {
1320      if (foundBestRS())
1321      {
1322        final Integer bestRSServerId = getBestRS().getServerId();
1323        if (rsEvals.get(bestRSServerId) == null)
1324        {
1325          final LocalizableMessage eval = NOTE_BEST_RS.get(bestRSServerId, localServerId);
1326          rsEvals.put(bestRSServerId, eval);
1327        }
1328      }
1329      return Collections.unmodifiableMap(rsEvals);
1330    }
1331
1332    /**
1333     * Returns the evaluation for the supplied replication server Id.
1334     * <p>
1335     * Note: "unknown RS" message is returned if the supplied replication server
1336     * was not part of the candidate replication servers.
1337     *
1338     * @param rsServerId
1339     *          the supplied replication server Id
1340     * @return the evaluation {@link LocalizableMessage} for the supplied replication
1341     *         server Id
1342     */
1343    private LocalizableMessage getEvaluation(int rsServerId)
1344    {
1345      final LocalizableMessage evaluation = getEvaluations().get(rsServerId);
1346      if (evaluation != null)
1347      {
1348        return evaluation;
1349      }
1350      return NOTE_UNKNOWN_RS.get(rsServerId, localServerId);
1351    }
1352
1353    /** {@inheritDoc} */
1354    @Override
1355    public String toString()
1356    {
1357      return "Current best replication server Ids: " + bestRSs.keySet()
1358          + ", Evaluation of connected replication servers"
1359          + " (ServerId => Evaluation): " + rsEvals.keySet()
1360          + ", Any replication server not appearing here"
1361          + " could not be contacted.";
1362    }
1363  }
1364
1365  /**
1366   * Evaluation local to one filter.
1367   */
1368  private static class LocalEvaluation
1369  {
1370    private final Map<Integer, ReplicationServerInfo> accepted = new HashMap<>();
1371    private final Map<ReplicationServerInfo, LocalizableMessage> rsEvals = new HashMap<>();
1372
1373    private void accept(Integer rsId, ReplicationServerInfo rsInfo)
1374    {
1375      // forget previous eval, including undoing reject
1376      this.rsEvals.remove(rsInfo);
1377      this.accepted.put(rsId, rsInfo);
1378    }
1379
1380    private void reject(ReplicationServerInfo rsInfo, LocalizableMessage reason)
1381    {
1382      this.accepted.remove(rsInfo.getServerId()); // undo accept
1383      this.rsEvals.put(rsInfo, reason);
1384    }
1385
1386    private Map<Integer, ReplicationServerInfo> getAccepted()
1387    {
1388      return accepted;
1389    }
1390
1391    private ReplicationServerInfo[] getAcceptedRSInfos()
1392    {
1393      return accepted.values().toArray(
1394          new ReplicationServerInfo[accepted.size()]);
1395    }
1396
1397    public Map<Integer, LocalizableMessage> getRejected()
1398    {
1399      final Map<Integer, LocalizableMessage> result = new HashMap<>();
1400      for (Entry<ReplicationServerInfo, LocalizableMessage> entry : rsEvals.entrySet())
1401      {
1402        result.put(entry.getKey().getServerId(), entry.getValue());
1403      }
1404      return result;
1405    }
1406
1407    private boolean hasAcceptedAny()
1408    {
1409      return !accepted.isEmpty();
1410    }
1411
1412  }
1413
1414  /**
1415   * Returns the replication server that best fits our need so that we can
1416   * connect to it or determine if we must disconnect from current one to
1417   * re-connect to best server.
1418   * <p>
1419   * Note: this method is static for test purpose (access from unit tests)
1420   *
1421   * @param firstConnection True if we run this method for the very first
1422   * connection of the broker. False if we run this method to determine if the
1423   * replication server we are currently connected to is still the best or not.
1424   * @param rsServerId The id of the replication server we are currently
1425   * connected to. Only used when firstConnection is false.
1426   * @param myState The local server state.
1427   * @param rsInfos The list of available replication servers and their
1428   * associated information (choice will be made among them).
1429   * @param localServerId The server id for the suffix we are working for.
1430   * @param groupId The groupId we prefer being connected to if possible
1431   * @param generationId The generation id we are using
1432   * @return The computed best replication server. If the returned value is
1433   * null, the best replication server is undetermined but the local server must
1434   * disconnect (so the best replication server is another one than the current
1435   * one). Null can only be returned when firstConnection is false.
1436   */
1437  static RSEvaluations computeBestReplicationServer(
1438      boolean firstConnection, int rsServerId, ServerState myState,
1439      Map<Integer, ReplicationServerInfo> rsInfos, int localServerId,
1440      byte groupId, long generationId)
1441  {
1442    final RSEvaluations evals = new RSEvaluations(localServerId, rsInfos);
1443    // Shortcut, if only one server, this is the best
1444    if (evals.foundBestRS())
1445    {
1446      return evals;
1447    }
1448
1449    /**
1450     * Apply some filtering criteria to determine the best servers list from
1451     * the available ones. The ordered list of criteria is (from more important
1452     * to less important):
1453     * - replication server has the same group id as the local DS one
1454     * - replication server has the same generation id as the local DS one
1455     * - replication server is up to date regarding changes generated by the
1456     *   local DS
1457     * - replication server in the same VM as local DS one
1458     */
1459    /*
1460    The list of best replication servers is filtered with each criteria. At
1461    each criteria, the list is replaced with the filtered one if there
1462    are some servers from the filtering, otherwise, the list is left as is
1463    and the new filtering for the next criteria is applied and so on.
1464
1465    Use only servers locally configured: those are servers declared in
1466    the local configuration. When the current method is called, for
1467    sure, at least one server from the list is locally configured
1468    */
1469    filterServersLocallyConfigured(evals, localServerId);
1470    // Some servers with same group id ?
1471    filterServersWithSameGroupId(evals, localServerId, groupId);
1472    // Some servers with same generation id ?
1473    final boolean rssWithSameGenerationIdExist =
1474        filterServersWithSameGenerationId(evals, localServerId, generationId);
1475    if (rssWithSameGenerationIdExist)
1476    {
1477      // If some servers with the right generation id this is useful to
1478      // run the local DS change criteria
1479      filterServersWithAllLocalDSChanges(evals, myState, localServerId);
1480    }
1481    // Some servers in the local VM or local host?
1482    filterServersOnSameHost(evals, localServerId);
1483
1484    if (evals.foundBestRS())
1485    {
1486      return evals;
1487    }
1488
1489    /**
1490     * Now apply the choice based on the weight to the best servers list
1491     */
1492    if (firstConnection)
1493    {
1494      // We are not connected to a server yet
1495      computeBestServerForWeight(evals, -1, -1);
1496    }
1497    else
1498    {
1499      /*
1500       * We are already connected to a RS: compute the best RS as far as the
1501       * weights is concerned. If this is another one, some DS must disconnect.
1502       */
1503      computeBestServerForWeight(evals, rsServerId, localServerId);
1504    }
1505    return evals;
1506  }
1507
1508  /**
1509   * Creates a new list that contains only replication servers that are locally
1510   * configured.
1511   * @param evals The evaluation object
1512   */
1513  private static void filterServersLocallyConfigured(RSEvaluations evals,
1514      int localServerId)
1515  {
1516    final LocalEvaluation eval = new LocalEvaluation();
1517    for (Entry<Integer, ReplicationServerInfo> entry : evals.bestRSs.entrySet())
1518    {
1519      final Integer rsId = entry.getKey();
1520      final ReplicationServerInfo rsInfo = entry.getValue();
1521      if (rsInfo.isLocallyConfigured())
1522      {
1523        eval.accept(rsId, rsInfo);
1524      }
1525      else
1526      {
1527        eval.reject(rsInfo,
1528            NOTE_RS_NOT_LOCALLY_CONFIGURED.get(rsId, localServerId));
1529      }
1530    }
1531    evals.keepBest(eval);
1532  }
1533
1534  /**
1535   * Creates a new list that contains only replication servers that have the
1536   * passed group id, from a passed replication server list.
1537   * @param evals The evaluation object
1538   * @param groupId The group id that must match
1539   */
1540  private static void filterServersWithSameGroupId(RSEvaluations evals,
1541      int localServerId, byte groupId)
1542  {
1543    final LocalEvaluation eval = new LocalEvaluation();
1544    for (Entry<Integer, ReplicationServerInfo> entry : evals.bestRSs.entrySet())
1545    {
1546      final Integer rsId = entry.getKey();
1547      final ReplicationServerInfo rsInfo = entry.getValue();
1548      if (rsInfo.getGroupId() == groupId)
1549      {
1550        eval.accept(rsId, rsInfo);
1551      }
1552      else
1553      {
1554        eval.reject(rsInfo, NOTE_RS_HAS_DIFFERENT_GROUP_ID_THAN_DS.get(
1555            rsId, rsInfo.getGroupId(), localServerId, groupId));
1556      }
1557    }
1558    evals.keepBest(eval);
1559  }
1560
1561  /**
1562   * Creates a new list that contains only replication servers that have the
1563   * provided generation id, from a provided replication server list.
1564   * When the selected replication servers have no change (empty serverState)
1565   * then the 'empty'(generationId==-1) replication servers are also included
1566   * in the result list.
1567   *
1568   * @param evals The evaluation object
1569   * @param generationId The generation id that must match
1570   * @return whether some replication server passed the filter
1571   */
1572  private static boolean filterServersWithSameGenerationId(
1573      RSEvaluations evals, long localServerId, long generationId)
1574  {
1575    final Map<Integer, ReplicationServerInfo> bestServers = evals.bestRSs;
1576    final LocalEvaluation eval = new LocalEvaluation();
1577    boolean emptyState = true;
1578
1579    for (Entry<Integer, ReplicationServerInfo> entry : bestServers.entrySet())
1580    {
1581      final Integer rsId = entry.getKey();
1582      final ReplicationServerInfo rsInfo = entry.getValue();
1583      if (rsInfo.getGenerationId() == generationId)
1584      {
1585        eval.accept(rsId, rsInfo);
1586        if (!rsInfo.serverState.isEmpty())
1587        {
1588          emptyState = false;
1589        }
1590      }
1591      else if (rsInfo.getGenerationId() == -1)
1592      {
1593        eval.reject(rsInfo, NOTE_RS_HAS_NO_GENERATION_ID.get(rsId,
1594            generationId, localServerId));
1595      }
1596      else
1597      {
1598        eval.reject(rsInfo, NOTE_RS_HAS_DIFFERENT_GENERATION_ID_THAN_DS.get(
1599            rsId, rsInfo.getGenerationId(), localServerId, generationId));
1600      }
1601    }
1602
1603    if (emptyState)
1604    {
1605      // If the RS with a generationId have all an empty state,
1606      // then the 'empty'(genId=-1) RSes are also candidate
1607      for (Entry<Integer, ReplicationServerInfo> entry : bestServers.entrySet())
1608      {
1609        ReplicationServerInfo rsInfo = entry.getValue();
1610        if (rsInfo.getGenerationId() == -1)
1611        {
1612          // will undo the reject of previously rejected RSs
1613          eval.accept(entry.getKey(), rsInfo);
1614        }
1615      }
1616    }
1617
1618    return evals.keepBest(eval);
1619  }
1620
1621  /**
1622   * Creates a new list that contains only replication servers that have the
1623   * latest changes from the passed DS, from a passed replication server list.
1624   * @param evals The evaluation object
1625   * @param localState The state of the local DS
1626   * @param localServerId The server id to consider for the changes
1627   */
1628  private static void filterServersWithAllLocalDSChanges(
1629      RSEvaluations evals, ServerState localState, int localServerId)
1630  {
1631    // Extract the CSN of the latest change generated by the local server
1632    final CSN localCSN = getCSN(localState, localServerId);
1633
1634    /**
1635     * Find replication servers that are up to date (or more up to date than us,
1636     * if for instance we failed and restarted, having sent some changes to the
1637     * RS but without having time to store our own state) regarding our own
1638     * server id. If some servers are more up to date, prefer this list but take
1639     * only the latest CSN.
1640     */
1641    final LocalEvaluation mostUpToDateEval = new LocalEvaluation();
1642    boolean foundRSMoreUpToDateThanLocalDS = false;
1643    CSN latestRsCSN = null;
1644    for (Entry<Integer, ReplicationServerInfo> entry : evals.bestRSs.entrySet())
1645    {
1646      final Integer rsId = entry.getKey();
1647      final ReplicationServerInfo rsInfo = entry.getValue();
1648      final CSN rsCSN = getCSN(rsInfo.getServerState(), localServerId);
1649
1650      // Has this replication server the latest local change ?
1651      if (rsCSN.isOlderThan(localCSN))
1652      {
1653        mostUpToDateEval.reject(rsInfo, NOTE_RS_LATER_THAN_LOCAL_DS.get(
1654            rsId, rsCSN.toStringUI(), localServerId, localCSN.toStringUI()));
1655      }
1656      else if (rsCSN.equals(localCSN))
1657      {
1658        // This replication server has exactly the latest change from the
1659        // local server
1660        if (!foundRSMoreUpToDateThanLocalDS)
1661        {
1662          mostUpToDateEval.accept(rsId, rsInfo);
1663        }
1664        else
1665        {
1666          mostUpToDateEval.reject(rsInfo,
1667            NOTE_RS_LATER_THAN_ANOTHER_RS_MORE_UP_TO_DATE_THAN_LOCAL_DS.get(
1668              rsId, rsCSN.toStringUI(), localServerId, localCSN.toStringUI()));
1669        }
1670      }
1671      else if (rsCSN.isNewerThan(localCSN))
1672      {
1673        // This replication server is even more up to date than the local server
1674        if (latestRsCSN == null)
1675        {
1676          foundRSMoreUpToDateThanLocalDS = true;
1677          // all previous results are now outdated, reject them all
1678          rejectAllWithRSIsLaterThanBestRS(mostUpToDateEval, localServerId,
1679              localCSN);
1680          // Initialize the latest CSN
1681          latestRsCSN = rsCSN;
1682        }
1683
1684        if (rsCSN.equals(latestRsCSN))
1685        {
1686          mostUpToDateEval.accept(rsId, rsInfo);
1687        }
1688        else if (rsCSN.isNewerThan(latestRsCSN))
1689        {
1690          // This RS is even more up to date, reject all previously accepted RSs
1691          // and store this new RS
1692          rejectAllWithRSIsLaterThanBestRS(mostUpToDateEval, localServerId,
1693              localCSN);
1694          mostUpToDateEval.accept(rsId, rsInfo);
1695          latestRsCSN = rsCSN;
1696        }
1697        else
1698        {
1699          mostUpToDateEval.reject(rsInfo,
1700            NOTE_RS_LATER_THAN_ANOTHER_RS_MORE_UP_TO_DATE_THAN_LOCAL_DS.get(
1701              rsId, rsCSN.toStringUI(), localServerId, localCSN.toStringUI()));
1702        }
1703      }
1704    }
1705    evals.keepBest(mostUpToDateEval);
1706  }
1707
1708  private static CSN getCSN(ServerState state, int serverId)
1709  {
1710    final CSN csn = state.getCSN(serverId);
1711    if (csn != null)
1712    {
1713      return csn;
1714    }
1715    return new CSN(0, 0, serverId);
1716  }
1717
1718  private static void rejectAllWithRSIsLaterThanBestRS(
1719      final LocalEvaluation eval, int localServerId, CSN localCSN)
1720  {
1721    for (ReplicationServerInfo rsInfo : eval.getAcceptedRSInfos())
1722    {
1723      final String rsCSN =
1724          getCSN(rsInfo.getServerState(), localServerId).toStringUI();
1725      final LocalizableMessage reason =
1726          NOTE_RS_LATER_THAN_ANOTHER_RS_MORE_UP_TO_DATE_THAN_LOCAL_DS.get(
1727            rsInfo.getServerId(), rsCSN, localServerId, localCSN.toStringUI());
1728      eval.reject(rsInfo, reason);
1729    }
1730  }
1731
1732  /**
1733   * Creates a new list that contains only replication servers that are on the
1734   * same host as the local DS, from a passed replication server list. This
1735   * method will gives priority to any replication server which is in the same
1736   * VM as this DS.
1737   *
1738   * @param evals The evaluation object
1739   */
1740  private static void filterServersOnSameHost(RSEvaluations evals,
1741      int localServerId)
1742  {
1743    /*
1744     * Initially look for all servers on the same host. If we find one in the
1745     * same VM, then narrow the search.
1746     */
1747    boolean foundRSInSameVM = false;
1748    final LocalEvaluation eval = new LocalEvaluation();
1749    for (Entry<Integer, ReplicationServerInfo> entry : evals.bestRSs.entrySet())
1750    {
1751      final Integer rsId = entry.getKey();
1752      final ReplicationServerInfo rsInfo = entry.getValue();
1753      final HostPort hp = HostPort.valueOf(rsInfo.getServerURL());
1754      if (hp.isLocalAddress())
1755      {
1756        if (isLocalReplicationServerPort(hp.getPort()))
1757        {
1758          if (!foundRSInSameVM)
1759          {
1760            // An RS in the same VM will always have priority.
1761            // Narrow the search to only include servers in this VM.
1762            rejectAllWithRSOnDifferentVMThanDS(eval, localServerId);
1763            foundRSInSameVM = true;
1764          }
1765          eval.accept(rsId, rsInfo);
1766        }
1767        else if (!foundRSInSameVM)
1768        {
1769          // OK, accept RSs on the same machine because we have not found an RS
1770          // in the same VM yet
1771          eval.accept(rsId, rsInfo);
1772        }
1773        else
1774        {
1775          // Skip: we have found some RSs in the same VM, but this RS is not.
1776          eval.reject(rsInfo, NOTE_RS_ON_DIFFERENT_VM_THAN_DS.get(rsId,
1777              localServerId));
1778        }
1779      }
1780      else
1781      {
1782        eval.reject(rsInfo, NOTE_RS_ON_DIFFERENT_HOST_THAN_DS.get(rsId,
1783            localServerId));
1784      }
1785    }
1786    evals.keepBest(eval);
1787  }
1788
1789  private static void rejectAllWithRSOnDifferentVMThanDS(LocalEvaluation eval,
1790      int localServerId)
1791  {
1792    for (ReplicationServerInfo rsInfo : eval.getAcceptedRSInfos())
1793    {
1794      eval.reject(rsInfo, NOTE_RS_ON_DIFFERENT_VM_THAN_DS.get(
1795          rsInfo.getServerId(), localServerId));
1796    }
1797  }
1798
1799  /**
1800   * Computes the best replication server the local server should be connected
1801   * to so that the load is correctly spread across the topology, following the
1802   * weights guidance.
1803   * Warning: This method is expected to be called with at least 2 servers in
1804   * bestServers
1805   * Note: this method is static for test purpose (access from unit tests)
1806   * @param evals The evaluation object
1807   * @param currentRsServerId The replication server the local server is
1808   *        currently connected to. -1 if the local server is not yet connected
1809   *        to any replication server.
1810   * @param localServerId The server id of the local server. This is not used
1811   *        when it is not connected to a replication server
1812   *        (currentRsServerId = -1)
1813   */
1814  static void computeBestServerForWeight(RSEvaluations evals,
1815      int currentRsServerId, int localServerId)
1816  {
1817    final Map<Integer, ReplicationServerInfo> bestServers = evals.bestRSs;
1818    /*
1819     * - Compute the load goal of each RS, deducing it from the weights affected
1820     * to them.
1821     * - Compute the current load of each RS, deducing it from the DSs
1822     * currently connected to them.
1823     * - Compute the differences between the load goals and the current loads of
1824     * the RSs.
1825     */
1826    // Sum of the weights
1827    int sumOfWeights = 0;
1828    // Sum of the connected DSs
1829    int sumOfConnectedDSs = 0;
1830    for (ReplicationServerInfo rsInfo : bestServers.values())
1831    {
1832      sumOfWeights += rsInfo.getWeight();
1833      sumOfConnectedDSs += rsInfo.getConnectedDSNumber();
1834    }
1835
1836    // Distance (difference) of the current loads to the load goals of each RS:
1837    // key:server id, value: distance
1838    Map<Integer, BigDecimal> loadDistances = new HashMap<>();
1839    // Precision for the operations (number of digits after the dot)
1840    final MathContext mathContext = new MathContext(32, RoundingMode.HALF_UP);
1841    for (Entry<Integer, ReplicationServerInfo> entry : bestServers.entrySet())
1842    {
1843      final Integer rsId = entry.getKey();
1844      final ReplicationServerInfo rsInfo = entry.getValue();
1845
1846      //  load goal = rs weight / sum of weights
1847      BigDecimal loadGoalBd = BigDecimal.valueOf(rsInfo.getWeight()).divide(
1848          BigDecimal.valueOf(sumOfWeights), mathContext);
1849      BigDecimal currentLoadBd = BigDecimal.ZERO;
1850      if (sumOfConnectedDSs != 0)
1851      {
1852        // current load = number of connected DSs / total number of DSs
1853        int connectedDSs = rsInfo.getConnectedDSNumber();
1854        currentLoadBd = BigDecimal.valueOf(connectedDSs).divide(
1855            BigDecimal.valueOf(sumOfConnectedDSs), mathContext);
1856      }
1857      // load distance = load goal - current load
1858      BigDecimal loadDistanceBd =
1859        loadGoalBd.subtract(currentLoadBd, mathContext);
1860      loadDistances.put(rsId, loadDistanceBd);
1861    }
1862
1863    if (currentRsServerId == -1)
1864    {
1865      // The local server is not connected yet, find best server to connect to,
1866      // taking the weights into account.
1867      computeBestServerWhenNotConnected(evals, loadDistances, localServerId);
1868    }
1869    else
1870    {
1871      // The local server is currently connected to a RS, let's see if it must
1872      // disconnect or not, taking the weights into account.
1873      computeBestServerWhenConnected(evals, loadDistances, localServerId,
1874          currentRsServerId, sumOfWeights, sumOfConnectedDSs);
1875    }
1876  }
1877
1878  private static void computeBestServerWhenNotConnected(RSEvaluations evals,
1879      Map<Integer, BigDecimal> loadDistances, int localServerId)
1880  {
1881    final Map<Integer, ReplicationServerInfo> bestServers = evals.bestRSs;
1882    /*
1883     * Find the server with the current highest distance to its load goal and
1884     * choose it. Make an exception if every server is correctly balanced,
1885     * that is every current load distances are equal to 0, in that case,
1886     * choose the server with the highest weight
1887     */
1888    int bestRsId = 0; // If all server equal, return the first one
1889    float highestDistance = Float.NEGATIVE_INFINITY;
1890    boolean allRsWithZeroDistance = true;
1891    int highestWeightRsId = -1;
1892    int highestWeight = -1;
1893    for (Integer rsId : bestServers.keySet())
1894    {
1895      float loadDistance = loadDistances.get(rsId).floatValue();
1896      if (loadDistance > highestDistance)
1897      {
1898        // This server is far more from its balance point
1899        bestRsId = rsId;
1900        highestDistance = loadDistance;
1901      }
1902      if (loadDistance != 0)
1903      {
1904        allRsWithZeroDistance = false;
1905      }
1906      int weight = bestServers.get(rsId).getWeight();
1907      if (weight > highestWeight)
1908      {
1909        // This server has a higher weight
1910        highestWeightRsId = rsId;
1911        highestWeight = weight;
1912      }
1913    }
1914    // All servers with a 0 distance ?
1915    if (allRsWithZeroDistance)
1916    {
1917      // Choose server with the highest weight
1918      bestRsId = highestWeightRsId;
1919    }
1920    evals.setBestRS(bestRsId, NOTE_BIGGEST_WEIGHT_RS.get(localServerId,
1921        bestRsId));
1922  }
1923
1924  private static void computeBestServerWhenConnected(RSEvaluations evals,
1925      Map<Integer, BigDecimal> loadDistances, int localServerId,
1926      int currentRsServerId, int sumOfWeights, int sumOfConnectedDSs)
1927  {
1928    final Map<Integer, ReplicationServerInfo> bestServers = evals.bestRSs;
1929    final MathContext mathContext = new MathContext(32, RoundingMode.HALF_UP);
1930    float currentLoadDistance =
1931      loadDistances.get(currentRsServerId).floatValue();
1932    if (currentLoadDistance < 0)
1933    {
1934      /*
1935      Too much DSs connected to the current RS, compared with its load
1936      goal:
1937      Determine the potential number of DSs to disconnect from the current
1938      RS and see if the local DS is part of them: the DSs that must
1939      disconnect are those with the lowest server id.
1940      Compute the sum of the distances of the load goals of the other RSs
1941      */
1942      BigDecimal sumOfLoadDistancesOfOtherRSsBd = BigDecimal.ZERO;
1943      for (Integer rsId : bestServers.keySet())
1944      {
1945        if (rsId != currentRsServerId)
1946        {
1947          sumOfLoadDistancesOfOtherRSsBd = sumOfLoadDistancesOfOtherRSsBd.add(
1948            loadDistances.get(rsId), mathContext);
1949        }
1950      }
1951
1952      if (sumOfLoadDistancesOfOtherRSsBd.floatValue() > 0)
1953      {
1954        /*
1955        The average distance of the other RSs shows a lack of DSs.
1956        Compute the number of DSs to disconnect from the current RS,
1957        rounding to the nearest integer number. Do only this if there is
1958        no risk of yoyo effect: when the exact balance cannot be
1959        established due to the current number of DSs connected, do not
1960        disconnect a DS. A simple example where the balance cannot be
1961        reached is:
1962        - RS1 has weight 1 and 2 DSs
1963        - RS2 has weight 1 and 1 DS
1964        => disconnecting a DS from RS1 to reconnect it to RS2 would have no
1965        sense as this would lead to the reverse situation. In that case,
1966        the perfect balance cannot be reached and we must stick to the
1967        current situation, otherwise the DS would keep move between the 2
1968        RSs
1969        */
1970        float notRoundedOverloadingDSsNumber = sumOfLoadDistancesOfOtherRSsBd.
1971          multiply(BigDecimal.valueOf(sumOfConnectedDSs), mathContext)
1972              .floatValue();
1973        int overloadingDSsNumber = Math.round(notRoundedOverloadingDSsNumber);
1974
1975        // Avoid yoyo effect
1976        if (overloadingDSsNumber == 1)
1977        {
1978          // What would be the new load distance for the current RS if
1979          // we disconnect some DSs ?
1980          ReplicationServerInfo currentReplicationServerInfo =
1981            bestServers.get(currentRsServerId);
1982
1983          int currentRsWeight = currentReplicationServerInfo.getWeight();
1984          BigDecimal currentRsWeightBd = BigDecimal.valueOf(currentRsWeight);
1985          BigDecimal sumOfWeightsBd = BigDecimal.valueOf(sumOfWeights);
1986          BigDecimal currentRsLoadGoalBd =
1987            currentRsWeightBd.divide(sumOfWeightsBd, mathContext);
1988          BigDecimal potentialCurrentRsNewLoadBd = BigDecimal.ZERO;
1989          if (sumOfConnectedDSs != 0)
1990          {
1991            int connectedDSs = currentReplicationServerInfo.
1992              getConnectedDSNumber();
1993            BigDecimal potentialNewConnectedDSsBd =
1994                BigDecimal.valueOf(connectedDSs - 1);
1995            BigDecimal sumOfConnectedDSsBd =
1996                BigDecimal.valueOf(sumOfConnectedDSs);
1997            potentialCurrentRsNewLoadBd =
1998              potentialNewConnectedDSsBd.divide(sumOfConnectedDSsBd,
1999                mathContext);
2000          }
2001          BigDecimal potentialCurrentRsNewLoadDistanceBd =
2002            currentRsLoadGoalBd.subtract(potentialCurrentRsNewLoadBd,
2003              mathContext);
2004
2005          // What would be the new load distance for the other RSs ?
2006          BigDecimal additionalDsLoadBd =
2007              BigDecimal.ONE.divide(
2008                  BigDecimal.valueOf(sumOfConnectedDSs), mathContext);
2009          BigDecimal potentialNewSumOfLoadDistancesOfOtherRSsBd =
2010            sumOfLoadDistancesOfOtherRSsBd.subtract(additionalDsLoadBd,
2011                  mathContext);
2012
2013          /*
2014          Now compare both values: we must not disconnect the DS if this
2015          is for going in a situation where the load distance of the other
2016          RSs is the opposite of the future load distance of the local RS
2017          or we would evaluate that we should disconnect just after being
2018          arrived on the new RS. But we should disconnect if we reach the
2019          perfect balance (both values are 0).
2020          */
2021          if (mustAvoidYoyoEffect(potentialCurrentRsNewLoadDistanceBd,
2022              potentialNewSumOfLoadDistancesOfOtherRSsBd))
2023          {
2024            // Avoid the yoyo effect, and keep the local DS connected to its
2025            // current RS
2026            evals.setBestRS(currentRsServerId,
2027                NOTE_AVOID_YOYO_EFFECT.get(localServerId, currentRsServerId));
2028            return;
2029          }
2030        }
2031
2032        ReplicationServerInfo currentRsInfo =
2033            bestServers.get(currentRsServerId);
2034        if (isServerOverloadingRS(localServerId, currentRsInfo,
2035            overloadingDSsNumber))
2036        {
2037          // The local server is part of the DSs to disconnect
2038          evals.discardAll(NOTE_DISCONNECT_DS_FROM_OVERLOADED_RS.get(
2039              localServerId, currentRsServerId));
2040        }
2041        else
2042        {
2043          // The local server is not part of the servers to disconnect from the
2044          // current RS.
2045          evals.setBestRS(currentRsServerId,
2046              NOTE_DO_NOT_DISCONNECT_DS_FROM_OVERLOADED_RS.get(localServerId,
2047                  currentRsServerId));
2048        }
2049      } else {
2050        // The average distance of the other RSs does not show a lack of DSs:
2051        // no need to disconnect any DS from the current RS.
2052        evals.setBestRS(currentRsServerId,
2053            NOTE_NO_NEED_TO_REBALANCE_DSS_BETWEEN_RSS.get(localServerId,
2054                currentRsServerId));
2055      }
2056    } else {
2057      // The RS load goal is reached or there are not enough DSs connected to
2058      // it to reach it: do not disconnect from this RS and return rsInfo for
2059      // this RS
2060      evals.setBestRS(currentRsServerId,
2061          NOTE_DO_NOT_DISCONNECT_DS_FROM_ACCEPTABLE_LOAD_RS.get(localServerId,
2062              currentRsServerId));
2063    }
2064  }
2065
2066  private static boolean mustAvoidYoyoEffect(BigDecimal rsNewLoadDistance,
2067      BigDecimal otherRSsNewSumOfLoadDistances)
2068  {
2069    final MathContext roundCtx = new MathContext(6, RoundingMode.DOWN);
2070    final BigDecimal rsLoadDistance = rsNewLoadDistance.round(roundCtx);
2071    final BigDecimal otherRSsSumOfLoadDistances =
2072        otherRSsNewSumOfLoadDistances.round(roundCtx);
2073
2074    return rsLoadDistance.compareTo(BigDecimal.ZERO) != 0
2075        && rsLoadDistance.compareTo(otherRSsSumOfLoadDistances.negate()) == 0;
2076  }
2077
2078  /**
2079   * Returns whether the local DS is overloading the RS.
2080   * <p>
2081   * There are an "overloadingDSsNumber" of DS overloading the RS. The list of
2082   * DSs connected to this RS is ordered by serverId to use a consistent
2083   * ordering across all nodes in the topology. The serverIds which index in the
2084   * List are lower than "overloadingDSsNumber" will be evicted first.
2085   * <p>
2086   * This ordering is unfair since nodes with the lower serverIds will be
2087   * evicted more often than nodes with higher serverIds. However, it is a
2088   * consistent and reliable ordering applicable anywhere in the topology.
2089   */
2090  private static boolean isServerOverloadingRS(int localServerId,
2091      ReplicationServerInfo currentRsInfo, int overloadingDSsNumber)
2092  {
2093    List<Integer> serversConnectedToCurrentRS = new ArrayList<>(currentRsInfo.getConnectedDSs());
2094    Collections.sort(serversConnectedToCurrentRS);
2095
2096    final int idx = serversConnectedToCurrentRS.indexOf(localServerId);
2097    return idx != -1 && idx < overloadingDSsNumber;
2098  }
2099
2100  /**
2101   * Start the heartbeat monitor thread.
2102   */
2103  private void startRSHeartBeatMonitoring(ConnectedRS rs)
2104  {
2105    final long heartbeatInterval = config.getHeartbeatInterval();
2106    if (heartbeatInterval > 0)
2107    {
2108      heartbeatMonitor = new HeartbeatMonitor(getServerId(), rs.getServerId(),
2109          getBaseDN().toString(), rs.session, heartbeatInterval);
2110      heartbeatMonitor.start();
2111    }
2112  }
2113
2114  /**
2115   * Stop the heartbeat monitor thread.
2116   */
2117  private synchronized void stopRSHeartBeatMonitoring()
2118  {
2119    if (heartbeatMonitor != null)
2120    {
2121      heartbeatMonitor.shutdown();
2122      heartbeatMonitor = null;
2123    }
2124  }
2125
2126  /**
2127   * Restart the ReplicationBroker.
2128   * @param infiniteTry the socket which failed
2129   */
2130  public void reStart(boolean infiniteTry)
2131  {
2132    reStart(connectedRS.get().session, infiniteTry);
2133  }
2134
2135  /**
2136   * Restart the ReplicationServer broker after a failure.
2137   *
2138   * @param failingSession the socket which failed
2139   * @param infiniteTry the socket which failed
2140   */
2141  private void reStart(Session failingSession, boolean infiniteTry)
2142  {
2143    if (failingSession != null)
2144    {
2145      failingSession.close();
2146      numLostConnections++;
2147    }
2148
2149    ConnectedRS rs = connectedRS.get();
2150    if (failingSession == rs.session && !rs.equals(ConnectedRS.noConnectedRS()))
2151    {
2152      rs = setConnectedRS(ConnectedRS.noConnectedRS());
2153    }
2154
2155    while (true)
2156    {
2157      // Synchronize inside the loop in order to allow shutdown.
2158      synchronized (startStopLock)
2159      {
2160        if (rs.isConnected() || shutdown)
2161        {
2162          break;
2163        }
2164
2165        try
2166        {
2167          connectAsDataServer();
2168          rs = connectedRS.get();
2169        }
2170        catch (Exception e)
2171        {
2172          logger.error(NOTE_EXCEPTION_RESTARTING_SESSION,
2173              getBaseDN(), e.getLocalizedMessage() + " " + stackTraceToSingleLineString(e));
2174        }
2175
2176        if (rs.isConnected() || !infiniteTry)
2177        {
2178          break;
2179        }
2180      }
2181      try
2182      {
2183          Thread.sleep(500);
2184      }
2185      catch (InterruptedException ignored)
2186      {
2187        // ignore
2188      }
2189    }
2190
2191    if (logger.isTraceEnabled())
2192    {
2193      debugInfo("end restart : connected=" + rs.isConnected() + " with RS("
2194          + rs.getServerId() + ") genId=" + getGenerationID());
2195    }
2196  }
2197
2198  /**
2199   * Publish a message to the other servers.
2200   * @param msg the message to publish
2201   */
2202  public void publish(ReplicationMsg msg)
2203  {
2204    publish(msg, false, true);
2205  }
2206
2207  /**
2208   * Publish a message to the other servers.
2209   * @param msg            The message to publish.
2210   * @param retryOnFailure Whether reconnect should automatically be done.
2211   * @return               Whether publish succeeded.
2212   */
2213  boolean publish(ReplicationMsg msg, boolean retryOnFailure)
2214  {
2215    return publish(msg, false, retryOnFailure);
2216  }
2217
2218  /**
2219   * Publish a recovery message to the other servers.
2220   * @param msg the message to publish
2221   */
2222  public void publishRecovery(ReplicationMsg msg)
2223  {
2224    publish(msg, true, true);
2225  }
2226
2227  /**
2228   * Publish a message to the other servers.
2229   * @param msg the message to publish
2230   * @param recoveryMsg the message is a recovery LocalizableMessage
2231   * @param retryOnFailure whether retry should be done on failure
2232   * @return whether the message was successfully sent.
2233   */
2234  private boolean publish(ReplicationMsg msg, boolean recoveryMsg,
2235      boolean retryOnFailure)
2236  {
2237    boolean done = false;
2238
2239    while (!done && !shutdown)
2240    {
2241      if (connectionError)
2242      {
2243        /*
2244        It was not possible to connect to any replication server.
2245        Since the operation was already processed, we have no other
2246        choice than to return without sending the ReplicationMsg
2247        and relying on the resend procedure of the connect phase to
2248        fix the problem when we finally connect.
2249        */
2250
2251        if (logger.isTraceEnabled())
2252        {
2253          debugInfo("publish(): Publishing a message is not possible due to"
2254              + " existing connection error.");
2255        }
2256
2257        return false;
2258      }
2259
2260      try
2261      {
2262        /*
2263        save the session at the time when we acquire the
2264        sendwindow credit so that we can make sure later
2265        that the session did not change in between.
2266        This is necessary to make sure that we don't publish a message
2267        on a session with a credit that was acquired from a previous
2268        session.
2269        */
2270        Session currentSession;
2271        Semaphore currentWindowSemaphore;
2272        synchronized (connectPhaseLock)
2273        {
2274          currentSession = connectedRS.get().session;
2275          currentWindowSemaphore = sendWindow;
2276        }
2277
2278        /*
2279        If the Replication domain has decided that there is a need to
2280        recover some changes then it is not allowed to send this
2281        change but it will be the responsibility of the recovery thread to
2282        do it.
2283        */
2284        if (!recoveryMsg & connectRequiresRecovery)
2285        {
2286          return false;
2287        }
2288
2289        boolean credit;
2290        if (msg instanceof UpdateMsg)
2291        {
2292          /*
2293          Acquiring the window credit must be done outside of the
2294          connectPhaseLock because it can be blocking and we don't
2295          want to hold off reconnection in case the connection dropped.
2296          */
2297          credit =
2298            currentWindowSemaphore.tryAcquire(500, TimeUnit.MILLISECONDS);
2299        }
2300        else
2301        {
2302          credit = true;
2303        }
2304
2305        if (credit)
2306        {
2307          synchronized (connectPhaseLock)
2308          {
2309            /*
2310            session may have been set to null in the connection phase
2311            when restarting the broker for example.
2312            Check the session. If it has changed, some disconnection or
2313            reconnection happened and we need to restart from scratch.
2314            */
2315            final Session session = connectedRS.get().session;
2316            if (session != null && session == currentSession)
2317            {
2318              session.publish(msg);
2319              done = true;
2320            }
2321          }
2322        }
2323        if (!credit && currentWindowSemaphore.availablePermits() == 0)
2324        {
2325          synchronized (connectPhaseLock)
2326          {
2327            /*
2328            the window is still closed.
2329            Send a WindowProbeMsg message to wake up the receiver in case the
2330            window update message was lost somehow...
2331            then loop to check again if connection was closed.
2332            */
2333            Session session = connectedRS.get().session;
2334            if (session != null)
2335            {
2336              session.publish(new WindowProbeMsg());
2337            }
2338          }
2339        }
2340      }
2341      catch (IOException e)
2342      {
2343        if (logger.isTraceEnabled())
2344        {
2345          debugInfo("publish(): IOException caught: "
2346              + stackTraceToSingleLineString(e));
2347        }
2348        if (!retryOnFailure)
2349        {
2350          return false;
2351        }
2352
2353        // The receive threads should handle reconnection or
2354        // mark this broker in error. Just retry.
2355        synchronized (connectPhaseLock)
2356        {
2357          try
2358          {
2359            connectPhaseLock.wait(100);
2360          }
2361          catch (InterruptedException ignored)
2362          {
2363            if (logger.isTraceEnabled())
2364            {
2365              debugInfo("publish(): InterruptedException caught 1: "
2366                  + stackTraceToSingleLineString(ignored));
2367            }
2368          }
2369        }
2370      }
2371      catch (InterruptedException ignored)
2372      {
2373        // just loop.
2374        if (logger.isTraceEnabled())
2375        {
2376          debugInfo("publish(): InterruptedException caught 2: "
2377              + stackTraceToSingleLineString(ignored));
2378        }
2379      }
2380    }
2381    return true;
2382  }
2383
2384  /**
2385   * Receive a message.
2386   * This method is not thread-safe and should either always be
2387   * called in a single thread or protected by a locking mechanism
2388   * before being called. This is a wrapper to the method with a boolean version
2389   * so that we do not have to modify existing tests.
2390   *
2391   * @return the received message
2392   * @throws SocketTimeoutException if the timeout set by setSoTimeout
2393   *         has expired
2394   */
2395  public ReplicationMsg receive() throws SocketTimeoutException
2396  {
2397    return receive(false, true, false);
2398  }
2399
2400  /**
2401   * Receive a message.
2402   * This method is not thread-safe and should either always be
2403   * called in a single thread or protected by a locking mechanism
2404   * before being called.
2405   *
2406   * @param reconnectToTheBestRS Whether broker will automatically switch
2407   *                             to the best suitable RS.
2408   * @param reconnectOnFailure   Whether broker will automatically reconnect
2409   *                             on failure.
2410   * @param returnOnTopoChange   Whether broker should return TopologyMsg
2411   *                             received.
2412   * @return the received message
2413   *
2414   * @throws SocketTimeoutException if the timeout set by setSoTimeout
2415   *         has expired
2416   */
2417  ReplicationMsg receive(boolean reconnectToTheBestRS,
2418      boolean reconnectOnFailure, boolean returnOnTopoChange)
2419    throws SocketTimeoutException
2420  {
2421    while (!shutdown)
2422    {
2423      ConnectedRS rs = connectedRS.get();
2424      if (reconnectOnFailure && !rs.isConnected())
2425      {
2426        // infinite try to reconnect
2427        reStart(null, true);
2428        continue;
2429      }
2430
2431      // Save session information for later in case we need it for log messages
2432      // after the session has been closed and/or failed.
2433      if (rs.session == null)
2434      {
2435        // Must be shutting down.
2436        break;
2437      }
2438
2439      final int serverId = getServerId();
2440      final DN baseDN = getBaseDN();
2441      final int previousRsServerID = rs.getServerId();
2442      try
2443      {
2444        ReplicationMsg msg = rs.session.receive();
2445        if (msg instanceof UpdateMsg)
2446        {
2447          synchronized (this)
2448          {
2449            rcvWindow--;
2450          }
2451        }
2452        if (msg instanceof WindowMsg)
2453        {
2454          final WindowMsg windowMsg = (WindowMsg) msg;
2455          sendWindow.release(windowMsg.getNumAck());
2456        }
2457        else if (msg instanceof TopologyMsg)
2458        {
2459          final TopologyMsg topoMsg = (TopologyMsg) msg;
2460          receiveTopo(topoMsg, getRsServerId());
2461          if (reconnectToTheBestRS)
2462          {
2463            // Reset wait time before next computation of best server
2464            mustRunBestServerCheckingAlgorithm = 0;
2465          }
2466
2467          // Caller wants to check what's changed
2468          if (returnOnTopoChange)
2469          {
2470            return msg;
2471          }
2472        }
2473        else if (msg instanceof StopMsg)
2474        {
2475          // RS performs a proper disconnection
2476          logger.warn(WARN_REPLICATION_SERVER_PROPERLY_DISCONNECTED, previousRsServerID, rs.replicationServer,
2477              serverId, baseDN);
2478
2479          // Try to find a suitable RS
2480          reStart(rs.session, true);
2481        }
2482        else if (msg instanceof MonitorMsg)
2483        {
2484          // This is the response to a MonitorRequest that was sent earlier or
2485          // the regular message of the monitoring publisher of the RS.
2486          MonitorMsg monitorMsg = (MonitorMsg) msg;
2487
2488          // Extract and store replicas ServerStates
2489          final Map<Integer, ServerState> newReplicaStates = new HashMap<>();
2490          for (int srvId : toIterable(monitorMsg.ldapIterator()))
2491          {
2492            newReplicaStates.put(srvId, monitorMsg.getLDAPServerState(srvId));
2493          }
2494          replicaStates = newReplicaStates;
2495
2496          // Notify the sender that the response was received.
2497          synchronized (monitorResponse)
2498          {
2499            monitorResponse.set(true);
2500            monitorResponse.notify();
2501          }
2502
2503          // Update the replication servers ServerStates with new received info
2504          Map<Integer, ReplicationServerInfo> rsInfos = topology.get().rsInfos;
2505          for (int srvId : toIterable(monitorMsg.rsIterator()))
2506          {
2507            final ReplicationServerInfo rsInfo = rsInfos.get(srvId);
2508            if (rsInfo != null)
2509            {
2510              rsInfo.update(monitorMsg.getRSServerState(srvId));
2511            }
2512          }
2513
2514          /*
2515          Now if it is allowed, compute the best replication server to see if
2516          it is still the one we are currently connected to. If not,
2517          disconnect properly and let the connection algorithm re-connect to
2518          best replication server
2519          */
2520          if (reconnectToTheBestRS)
2521          {
2522            mustRunBestServerCheckingAlgorithm++;
2523            if (mustRunBestServerCheckingAlgorithm == 2)
2524            {
2525              // Stable topology (no topo msg since few seconds): proceed with
2526              // best server checking.
2527              final RSEvaluations evals = computeBestReplicationServer(
2528                  false, previousRsServerID, state,
2529                  rsInfos, serverId, getGroupId(), getGenerationID());
2530              final ReplicationServerInfo bestServerInfo = evals.getBestRS();
2531              if (previousRsServerID != -1
2532                  && (bestServerInfo == null
2533                      || bestServerInfo.getServerId() != previousRsServerID))
2534              {
2535                // The best replication server is no more the one we are
2536                // currently using. Disconnect properly then reconnect.
2537                LocalizableMessage message;
2538                if (bestServerInfo == null)
2539                {
2540                  message = NOTE_LOAD_BALANCE_REPLICATION_SERVER.get(
2541                      serverId, previousRsServerID, rs.replicationServer, baseDN);
2542                }
2543                else
2544                {
2545                  final int bestRsServerId = bestServerInfo.getServerId();
2546                  message = NOTE_NEW_BEST_REPLICATION_SERVER.get(
2547                      serverId, previousRsServerID, rs.replicationServer, bestRsServerId, baseDN,
2548                      evals.getEvaluation(previousRsServerID),
2549                      evals.getEvaluation(bestRsServerId));
2550                }
2551                logger.info(message);
2552                if (logger.isTraceEnabled())
2553                {
2554                  debugInfo("best replication servers evaluation results: " + evals);
2555                }
2556                reStart(true);
2557              }
2558
2559              // Reset wait time before next computation of best server
2560              mustRunBestServerCheckingAlgorithm = 0;
2561            }
2562          }
2563        }
2564        else
2565        {
2566          return msg;
2567        }
2568      }
2569      catch (SocketTimeoutException e)
2570      {
2571        throw e;
2572      }
2573      catch (Exception e)
2574      {
2575        logger.traceException(e);
2576
2577        if (!shutdown)
2578        {
2579          if (rs.session == null || !rs.session.closeInitiated())
2580          {
2581            // We did not initiate the close on our side, log an error message.
2582            logger.error(WARN_REPLICATION_SERVER_BADLY_DISCONNECTED,
2583                serverId, baseDN, previousRsServerID, rs.replicationServer);
2584          }
2585
2586          if (!reconnectOnFailure)
2587          {
2588            break; // does not seem necessary to explicitly disconnect ..
2589          }
2590
2591          reStart(rs.session, true);
2592        }
2593      }
2594    } // while !shutdown
2595    return null;
2596  }
2597
2598  /**
2599   * Gets the States of all the Replicas currently in the Topology. When this
2600   * method is called, a Monitoring message will be sent to the Replication
2601   * Server to which this domain is currently connected so that it computes a
2602   * table containing information about all Directory Servers in the topology.
2603   * This Computation involves communications will all the servers currently
2604   * connected and
2605   *
2606   * @return The States of all Replicas in the topology (except us)
2607   */
2608  public Map<Integer, ServerState> getReplicaStates()
2609  {
2610    monitorResponse.set(false);
2611
2612    // publish Monitor Request LocalizableMessage to the Replication Server
2613    publish(new MonitorRequestMsg(getServerId(), getRsServerId()));
2614
2615    // wait for Response up to 10 seconds.
2616    try
2617    {
2618      synchronized (monitorResponse)
2619      {
2620        if (!monitorResponse.get())
2621        {
2622          monitorResponse.wait(10000);
2623        }
2624      }
2625    } catch (InterruptedException e)
2626    {
2627      Thread.currentThread().interrupt();
2628    }
2629    return replicaStates;
2630  }
2631
2632  /**
2633   * This method allows to do the necessary computing for the window
2634   * management after treatment by the worker threads.
2635   *
2636   * This should be called once the replay thread have done their job
2637   * and the window can be open again.
2638   */
2639  public synchronized void updateWindowAfterReplay()
2640  {
2641    try
2642    {
2643      updateDoneCount++;
2644      final Session session = connectedRS.get().session;
2645      if (updateDoneCount >= halfRcvWindow && session != null)
2646      {
2647        session.publish(new WindowMsg(updateDoneCount));
2648        rcvWindow += updateDoneCount;
2649        updateDoneCount = 0;
2650      }
2651    } catch (IOException e)
2652    {
2653      // Any error on the socket will be handled by the thread calling receive()
2654      // just ignore.
2655    }
2656  }
2657
2658  /** Stop the server. */
2659  public void stop()
2660  {
2661    if (logger.isTraceEnabled() && !shutdown)
2662    {
2663      debugInfo("is stopping and will close the connection to RS(" + getRsServerId() + ")");
2664    }
2665
2666    synchronized (startStopLock)
2667    {
2668      if (shutdown)
2669      {
2670        return;
2671      }
2672      domain.publishReplicaOfflineMsg();
2673      shutdown = true;
2674      setConnectedRS(ConnectedRS.stopped());
2675      stopRSHeartBeatMonitoring();
2676      stopChangeTimeHeartBeatPublishing();
2677      deregisterReplicationMonitor();
2678    }
2679  }
2680
2681  /**
2682   * Set a timeout value.
2683   * With this option set to a non-zero value, calls to the receive() method
2684   * block for only this amount of time after which a
2685   * java.net.SocketTimeoutException is raised.
2686   * The Broker is valid and usable even after such an Exception is raised.
2687   *
2688   * @param timeout the specified timeout, in milliseconds.
2689   * @throws SocketException if there is an error in the underlying protocol,
2690   *         such as a TCP error.
2691   */
2692  public void setSoTimeout(int timeout) throws SocketException
2693  {
2694    this.timeout = timeout;
2695    final Session session = connectedRS.get().session;
2696    if (session != null)
2697    {
2698      session.setSoTimeout(timeout);
2699    }
2700  }
2701
2702  /**
2703   * Get the name of the replicationServer to which this broker is currently
2704   * connected.
2705   *
2706   * @return the name of the replicationServer to which this domain
2707   *         is currently connected.
2708   */
2709  public String getReplicationServer()
2710  {
2711    return connectedRS.get().replicationServer;
2712  }
2713
2714  /**
2715   * Get the maximum receive window size.
2716   *
2717   * @return The maximum receive window size.
2718   */
2719  public int getMaxRcvWindow()
2720  {
2721    return config.getWindowSize();
2722  }
2723
2724  /**
2725   * Get the current receive window size.
2726   *
2727   * @return The current receive window size.
2728   */
2729  public int getCurrentRcvWindow()
2730  {
2731    return rcvWindow;
2732  }
2733
2734  /**
2735   * Get the maximum send window size.
2736   *
2737   * @return The maximum send window size.
2738   */
2739  public int getMaxSendWindow()
2740  {
2741    return maxSendWindow;
2742  }
2743
2744  /**
2745   * Get the current send window size.
2746   *
2747   * @return The current send window size.
2748   */
2749  public int getCurrentSendWindow()
2750  {
2751    if (isConnected())
2752    {
2753      return sendWindow.availablePermits();
2754    }
2755    return 0;
2756  }
2757
2758  /**
2759   * Get the number of times the connection was lost.
2760   * @return The number of times the connection was lost.
2761   */
2762  public int getNumLostConnections()
2763  {
2764    return numLostConnections;
2765  }
2766
2767  /**
2768   * Change some configuration parameters.
2769   *
2770   * @param newConfig  The new config to use.
2771   * @return                    A boolean indicating if the changes
2772   *                            requires to restart the service.
2773   */
2774  boolean changeConfig(ReplicationDomainCfg newConfig)
2775  {
2776    // These parameters needs to be renegotiated with the ReplicationServer
2777    // so if they have changed, that requires restarting the session with
2778    // the ReplicationServer.
2779    // A new session is necessary only when information regarding
2780    // the connection is modified
2781    boolean needToRestartSession =
2782        !newConfig.getReplicationServer().equals(config.getReplicationServer())
2783        || newConfig.getWindowSize() != config.getWindowSize()
2784        || newConfig.getHeartbeatInterval() != config.getHeartbeatInterval()
2785        || newConfig.getGroupId() != config.getGroupId();
2786
2787    this.config = newConfig;
2788    this.rcvWindow = newConfig.getWindowSize();
2789    this.halfRcvWindow = this.rcvWindow / 2;
2790
2791    return needToRestartSession;
2792  }
2793
2794  /**
2795   * Get the version of the replication protocol.
2796   * @return The version of the replication protocol.
2797   */
2798  public short getProtocolVersion()
2799  {
2800    final Session session = connectedRS.get().session;
2801    if (session != null)
2802    {
2803      return session.getProtocolVersion();
2804    }
2805    return ProtocolVersion.getCurrentVersion();
2806  }
2807
2808  /**
2809   * Check if the broker is connected to a ReplicationServer and therefore
2810   * ready to received and send Replication Messages.
2811   *
2812   * @return true if the server is connected, false if not.
2813   */
2814  public boolean isConnected()
2815  {
2816    return connectedRS.get().isConnected();
2817  }
2818
2819  /**
2820   * Determine whether the connection to the replication server is encrypted.
2821   * @return true if the connection is encrypted, false otherwise.
2822   */
2823  public boolean isSessionEncrypted()
2824  {
2825    final Session session = connectedRS.get().session;
2826    return session != null ? session.isEncrypted() : false;
2827  }
2828
2829  /**
2830   * Signals the RS we just entered a new status.
2831   * @param newStatus The status the local DS just entered
2832   */
2833  public void signalStatusChange(ServerStatus newStatus)
2834  {
2835    try
2836    {
2837      connectedRS.get().session.publish(
2838          new ChangeStatusMsg(ServerStatus.INVALID_STATUS, newStatus));
2839    } catch (IOException ex)
2840    {
2841      logger.error(ERR_EXCEPTION_SENDING_CS, getBaseDN(), getServerId(),
2842          ex.getLocalizedMessage() + " " + stackTraceToSingleLineString(ex));
2843    }
2844  }
2845
2846  /**
2847   * Gets the info for DSs in the topology (except us).
2848   * @return The info for DSs in the topology (except us)
2849   */
2850  public Map<Integer, DSInfo> getReplicaInfos()
2851  {
2852    return topology.get().replicaInfos;
2853  }
2854
2855  /**
2856   * Gets the info for RSs in the topology (except the one we are connected
2857   * to).
2858   * @return The info for RSs in the topology (except the one we are connected
2859   * to)
2860   */
2861  public List<RSInfo> getRsInfos()
2862  {
2863    return toRSInfos(topology.get().rsInfos);
2864  }
2865
2866  private List<RSInfo> toRSInfos(Map<Integer, ReplicationServerInfo> rsInfos)
2867  {
2868    final List<RSInfo> result = new ArrayList<>();
2869    for (ReplicationServerInfo rsInfo : rsInfos.values())
2870    {
2871      result.add(rsInfo.toRSInfo());
2872    }
2873    return result;
2874  }
2875
2876  /**
2877   * Processes an incoming TopologyMsg.
2878   * Updates the structures for the local view of the topology.
2879   *
2880   * @param topoMsg
2881   *          The topology information received from RS.
2882   * @param rsServerId
2883   *          the serverId to use for the connectedDS
2884   */
2885  private void receiveTopo(TopologyMsg topoMsg, int rsServerId)
2886  {
2887    final Topology newTopo = computeNewTopology(topoMsg, rsServerId);
2888    for (DSInfo dsInfo : newTopo.replicaInfos.values())
2889    {
2890      domain.setEclIncludes(dsInfo.getDsId(), dsInfo.getEclIncludes(), dsInfo
2891          .getEclIncludesForDeletes());
2892    }
2893  }
2894
2895  private Topology computeNewTopology(TopologyMsg topoMsg, int rsServerId)
2896  {
2897    Topology oldTopo;
2898    Topology newTopo;
2899    do
2900    {
2901      oldTopo = topology.get();
2902      newTopo = new Topology(topoMsg, getServerId(), rsServerId,
2903              getReplicationServerUrls(), oldTopo.rsInfos);
2904    }
2905    while (!topology.compareAndSet(oldTopo, newTopo));
2906
2907    if (logger.isTraceEnabled())
2908    {
2909      final StringBuilder sb = topologyChange(rsServerId, oldTopo, newTopo);
2910      sb.append(" received TopologyMsg=").append(topoMsg);
2911      debugInfo(sb);
2912    }
2913    return newTopo;
2914  }
2915
2916  /**
2917   * Contains the last known state of the replication topology.
2918   */
2919  static final class Topology
2920  {
2921
2922    /**
2923     * The RS's serverId that this DS was connected to when this topology state
2924     * was computed.
2925     */
2926    private final int rsServerId;
2927    /**
2928     * Info for other DSs.
2929     * <p>
2930     * Warning: does not contain info for us (for our server id)
2931     */
2932    final Map<Integer, DSInfo> replicaInfos;
2933    /**
2934     * The map of replication server info initialized at connection time and
2935     * regularly updated. This is used to decide to which best suitable
2936     * replication server one wants to connect. Key: replication server id
2937     * Value: replication server info for the matching replication server id
2938     */
2939    final Map<Integer, ReplicationServerInfo> rsInfos;
2940
2941    private Topology()
2942    {
2943      this.rsServerId = -1;
2944      this.replicaInfos = Collections.emptyMap();
2945      this.rsInfos = Collections.emptyMap();
2946    }
2947
2948    /**
2949     * Constructor to use when only the RSInfos need to be recomputed.
2950     *
2951     * @param dsInfosToKeep
2952     *          the DSInfos that will be stored as is
2953     * @param newRSInfos
2954     *          the new RSInfos from which to compute the new topology
2955     * @param dsServerId
2956     *          the DS serverId
2957     * @param rsServerId
2958     *          the current connected RS serverId
2959     * @param configuredReplicationServerUrls
2960     *          the configured replication server URLs
2961     * @param previousRsInfos
2962     *          the RSInfos computed in the previous Topology object
2963     */
2964    Topology(Map<Integer, DSInfo> dsInfosToKeep, List<RSInfo> newRSInfos,
2965        int dsServerId, int rsServerId,
2966        Set<String> configuredReplicationServerUrls,
2967        Map<Integer, ReplicationServerInfo> previousRsInfos)
2968    {
2969      this.rsServerId = rsServerId;
2970      this.replicaInfos = dsInfosToKeep == null
2971          ? Collections.<Integer, DSInfo>emptyMap() : dsInfosToKeep;
2972      this.rsInfos = computeRSInfos(dsServerId, newRSInfos,
2973          previousRsInfos, configuredReplicationServerUrls);
2974    }
2975
2976    /**
2977     * Constructor to use when a new TopologyMsg has been received.
2978     *
2979     * @param topoMsg
2980     *          the topology message containing the new DSInfos and RSInfos from
2981     *          which to compute the new topology
2982     * @param dsServerId
2983     *          the DS serverId
2984     * @param rsServerId
2985     *          the current connected RS serverId
2986     * @param configuredReplicationServerUrls
2987     *          the configured replication server URLs
2988     * @param previousRsInfos
2989     *          the RSInfos computed in the previous Topology object
2990     */
2991    Topology(TopologyMsg topoMsg, int dsServerId,
2992        int rsServerId, Set<String> configuredReplicationServerUrls,
2993        Map<Integer, ReplicationServerInfo> previousRsInfos)
2994    {
2995      this.rsServerId = rsServerId;
2996      this.replicaInfos = removeThisDs(topoMsg.getReplicaInfos(), dsServerId);
2997      this.rsInfos = computeRSInfos(dsServerId, topoMsg.getRsInfos(),
2998          previousRsInfos, configuredReplicationServerUrls);
2999    }
3000
3001    private Map<Integer, DSInfo> removeThisDs(Map<Integer, DSInfo> dsInfos,
3002        int dsServerId)
3003    {
3004      final Map<Integer, DSInfo> copy = new HashMap<>(dsInfos);
3005      copy.remove(dsServerId);
3006      return Collections.unmodifiableMap(copy);
3007    }
3008
3009    private Map<Integer, ReplicationServerInfo> computeRSInfos(
3010        int dsServerId, List<RSInfo> newRsInfos,
3011        Map<Integer, ReplicationServerInfo> previousRsInfos,
3012        Set<String> configuredReplicationServerUrls)
3013    {
3014      final Map<Integer, ReplicationServerInfo> results = new HashMap<>(previousRsInfos);
3015
3016      // Update replication server info list with the received topology info
3017      final Set<Integer> rssToKeep = new HashSet<>();
3018      for (RSInfo newRSInfo : newRsInfos)
3019      {
3020        final int rsId = newRSInfo.getId();
3021        rssToKeep.add(rsId); // Mark this server as still existing
3022        Set<Integer> connectedDSs =
3023            computeDSsConnectedTo(rsId, dsServerId);
3024        ReplicationServerInfo rsInfo = results.get(rsId);
3025        if (rsInfo == null)
3026        {
3027          // New replication server, create info for it add it to the list
3028          rsInfo = new ReplicationServerInfo(newRSInfo, connectedDSs);
3029          setLocallyConfiguredFlag(rsInfo, configuredReplicationServerUrls);
3030          results.put(rsId, rsInfo);
3031        }
3032        else
3033        {
3034          // Update the existing info for the replication server
3035          rsInfo.update(newRSInfo, connectedDSs);
3036        }
3037      }
3038
3039      // Remove any replication server that may have disappeared from the
3040      // topology
3041      results.keySet().retainAll(rssToKeep);
3042
3043      return Collections.unmodifiableMap(results);
3044    }
3045
3046    /** Computes the list of DSs connected to a particular RS. */
3047    private Set<Integer> computeDSsConnectedTo(int rsId, int dsServerId)
3048    {
3049      final Set<Integer> connectedDSs = new HashSet<>();
3050      if (rsServerId == rsId)
3051      {
3052        /*
3053         * If we are computing connected DSs for the RS we are connected to, we
3054         * should count the local DS as the DSInfo of the local DS is not sent
3055         * by the replication server in the topology message. We must count
3056         * ourselves as a connected server.
3057         */
3058        connectedDSs.add(dsServerId);
3059      }
3060
3061      for (DSInfo dsInfo : replicaInfos.values())
3062      {
3063        if (dsInfo.getRsId() == rsId)
3064        {
3065          connectedDSs.add(dsInfo.getDsId());
3066        }
3067      }
3068
3069      return connectedDSs;
3070    }
3071
3072    /**
3073     * Sets the locally configured flag for the passed ReplicationServerInfo
3074     * object, analyzing the local configuration.
3075     *
3076     * @param rsInfo
3077     *          the Replication server to check and update
3078     * @param configuredReplicationServerUrls
3079     */
3080    private void setLocallyConfiguredFlag(ReplicationServerInfo rsInfo,
3081        Set<String> configuredReplicationServerUrls)
3082    {
3083      // Determine if the passed ReplicationServerInfo has a URL that is present
3084      // in the locally configured replication servers
3085      String rsUrl = rsInfo.getServerURL();
3086      if (rsUrl == null)
3087      {
3088        // The ReplicationServerInfo has been generated from a server with
3089        // no URL in TopologyMsg (i.e: with replication protocol version < 4):
3090        // ignore this server as we do not know how to connect to it
3091        rsInfo.setLocallyConfigured(false);
3092        return;
3093      }
3094      for (String serverUrl : configuredReplicationServerUrls)
3095      {
3096        if (isSameReplicationServerUrl(serverUrl, rsUrl))
3097        {
3098          // This RS is locally configured, mark this
3099          rsInfo.setLocallyConfigured(true);
3100          rsInfo.setServerURL(serverUrl);
3101          return;
3102        }
3103      }
3104      rsInfo.setLocallyConfigured(false);
3105    }
3106
3107    /** {@inheritDoc} */
3108    @Override
3109    public boolean equals(Object obj)
3110    {
3111      if (this == obj)
3112      {
3113        return true;
3114      }
3115      if (obj == null || getClass() != obj.getClass())
3116      {
3117        return false;
3118      }
3119      final Topology other = (Topology) obj;
3120      return rsServerId == other.rsServerId
3121          && Objects.equals(replicaInfos, other.replicaInfos)
3122          && Objects.equals(rsInfos, other.rsInfos)
3123          && urlsEqual1(replicaInfos, other.replicaInfos)
3124          && urlsEqual2(rsInfos, other.rsInfos);
3125    }
3126
3127    private boolean urlsEqual1(Map<Integer, DSInfo> replicaInfos1,
3128        Map<Integer, DSInfo> replicaInfos2)
3129    {
3130      for (Entry<Integer, DSInfo> entry : replicaInfos1.entrySet())
3131      {
3132        DSInfo dsInfo = replicaInfos2.get(entry.getKey());
3133        if (!Objects.equals(entry.getValue().getDsUrl(), dsInfo.getDsUrl()))
3134        {
3135          return false;
3136        }
3137      }
3138      return true;
3139    }
3140
3141    private boolean urlsEqual2(Map<Integer, ReplicationServerInfo> rsInfos1,
3142        Map<Integer, ReplicationServerInfo> rsInfos2)
3143    {
3144      for (Entry<Integer, ReplicationServerInfo> entry : rsInfos1.entrySet())
3145      {
3146        ReplicationServerInfo rsInfo = rsInfos2.get(entry.getKey());
3147        if (!Objects.equals(entry.getValue().getServerURL(), rsInfo.getServerURL()))
3148        {
3149          return false;
3150        }
3151      }
3152      return true;
3153    }
3154
3155    /** {@inheritDoc} */
3156    @Override
3157    public int hashCode()
3158    {
3159      final int prime = 31;
3160      int result = 1;
3161      result = prime * result + rsServerId;
3162      result = prime * result
3163          + (replicaInfos == null ? 0 : replicaInfos.hashCode());
3164      result = prime * result + (rsInfos == null ? 0 : rsInfos.hashCode());
3165      return result;
3166    }
3167
3168    /** {@inheritDoc} */
3169    @Override
3170    public String toString()
3171    {
3172      return getClass().getSimpleName()
3173          + " rsServerId=" + rsServerId
3174          + ", replicaInfos=" + replicaInfos.values()
3175          + ", rsInfos=" + rsInfos.values();
3176    }
3177  }
3178
3179  /**
3180   * Check if the broker could not find any Replication Server and therefore
3181   * connection attempt failed.
3182   *
3183   * @return true if the server could not connect to any Replication Server.
3184   */
3185  boolean hasConnectionError()
3186  {
3187    return connectionError;
3188  }
3189
3190  /**
3191   * Starts publishing to the RS the current timestamp used in this server.
3192   */
3193  private void startChangeTimeHeartBeatPublishing(ConnectedRS rs)
3194  {
3195    // Start a CSN heartbeat thread.
3196    long changeTimeHeartbeatInterval = config.getChangetimeHeartbeatInterval();
3197    if (changeTimeHeartbeatInterval > 0)
3198    {
3199      final String threadName = "Replica DS(" + getServerId()
3200              + ") change time heartbeat publisher for domain \"" + getBaseDN()
3201              + "\" to RS(" + rs.getServerId() + ") at " + rs.replicationServer;
3202
3203      ctHeartbeatPublisherThread = new CTHeartbeatPublisherThread(
3204          threadName, rs.session, changeTimeHeartbeatInterval, getServerId());
3205      ctHeartbeatPublisherThread.start();
3206    }
3207    else
3208    {
3209      if (logger.isTraceEnabled())
3210      {
3211        debugInfo("is not configured to send CSN heartbeat interval");
3212      }
3213    }
3214  }
3215
3216  /**
3217   * Stops publishing to the RS the current timestamp used in this server.
3218   */
3219  private synchronized void stopChangeTimeHeartBeatPublishing()
3220  {
3221    if (ctHeartbeatPublisherThread != null)
3222    {
3223      ctHeartbeatPublisherThread.shutdown();
3224      ctHeartbeatPublisherThread = null;
3225    }
3226  }
3227
3228  /**
3229   * Set the connectRequiresRecovery to the provided value.
3230   * This flag is used to indicate if a recovery of Update is necessary
3231   * after a reconnection to a RS.
3232   * It is the responsibility of the ReplicationDomain to set it during the
3233   * sessionInitiated phase.
3234   *
3235   * @param b the new value of the connectRequiresRecovery.
3236   */
3237  public void setRecoveryRequired(boolean b)
3238  {
3239    connectRequiresRecovery = b;
3240  }
3241
3242  /**
3243   * Returns whether the broker is shutting down.
3244   * @return whether the broker is shutting down.
3245   */
3246  boolean shuttingDown()
3247  {
3248    return shutdown;
3249  }
3250
3251  /**
3252   * Returns the local address of this replication domain, or the empty string
3253   * if it is not yet connected.
3254   *
3255   * @return The local address.
3256   */
3257  String getLocalUrl()
3258  {
3259    final Session session = connectedRS.get().session;
3260    return session != null ? session.getLocalUrl() : "";
3261  }
3262
3263  /**
3264   * Returns the replication monitor instance name associated with this broker.
3265   *
3266   * @return The replication monitor instance name.
3267   */
3268  String getReplicationMonitorInstanceName()
3269  {
3270    // Only invoked by replication domain so always non-null.
3271    return monitor.getMonitorInstanceName();
3272  }
3273
3274  private ConnectedRS setConnectedRS(final ConnectedRS newRS)
3275  {
3276    final ConnectedRS oldRS = connectedRS.getAndSet(newRS);
3277    if (!oldRS.equals(newRS) && oldRS.session != null)
3278    {
3279      // monitor name is changing, deregister before registering again
3280      deregisterReplicationMonitor();
3281      oldRS.session.close();
3282      registerReplicationMonitor();
3283    }
3284    return newRS;
3285  }
3286
3287  /**
3288   * Must be invoked each time the session changes because, the monitor name is
3289   * dynamically created with the session name, while monitor registration is
3290   * static.
3291   *
3292   * @see #monitor
3293   */
3294  private void registerReplicationMonitor()
3295  {
3296    // The monitor should not be registered if this is a unit test
3297    // because the replication domain is null.
3298    if (monitor != null)
3299    {
3300      DirectoryServer.registerMonitorProvider(monitor);
3301    }
3302  }
3303
3304  private void deregisterReplicationMonitor()
3305  {
3306    // The monitor should not be deregistered if this is a unit test
3307    // because the replication domain is null.
3308    if (monitor != null)
3309    {
3310      DirectoryServer.deregisterMonitorProvider(monitor);
3311    }
3312  }
3313
3314  /** {@inheritDoc} */
3315  @Override
3316  public String toString()
3317  {
3318    final StringBuilder sb = new StringBuilder();
3319    sb.append(getClass().getSimpleName())
3320      .append(" \"").append(getBaseDN()).append(" ")
3321      .append(getServerId()).append("\",")
3322      .append(" groupId=").append(getGroupId())
3323      .append(", genId=").append(getGenerationID())
3324      .append(", ");
3325    connectedRS.get().toString(sb);
3326    return sb.toString();
3327  }
3328
3329  private void debugInfo(CharSequence message)
3330  {
3331    logger.trace(getClass().getSimpleName() + " for baseDN=" + getBaseDN()
3332        + " and serverId=" + getServerId() + ": " + message);
3333  }
3334}