001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the
004 * License.
005 *
006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
007 * specific language governing permission and limitations under the License.
008 *
009 * When distributing Covered Software, include this CDDL Header Notice in each file and include
010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
011 * Header, with the fields enclosed by brackets [] replaced by your own identifying
012 * information: "Portions Copyright [year] [name of copyright owner]".
013 *
014 * Copyright 2006-2010 Sun Microsystems, Inc.
015 * Portions Copyright 2011-2016 ForgeRock AS.
016 */
017package org.opends.server.replication.server;
018
019import static org.opends.messages.ReplicationMessages.*;
020
021import java.io.IOException;
022import java.util.Random;
023import java.util.concurrent.Semaphore;
024import java.util.concurrent.TimeUnit;
025import java.util.concurrent.atomic.AtomicInteger;
026
027import org.forgerock.i18n.LocalizableMessage;
028import org.forgerock.i18n.slf4j.LocalizedLogger;
029import org.forgerock.opendj.config.server.ConfigException;
030import org.forgerock.opendj.ldap.ResultCode;
031import org.opends.server.admin.std.server.MonitorProviderCfg;
032import org.opends.server.api.MonitorData;
033import org.opends.server.core.DirectoryServer;
034import org.opends.server.replication.common.AssuredMode;
035import org.opends.server.replication.common.CSN;
036import org.opends.server.replication.common.RSInfo;
037import org.opends.server.replication.common.ServerStatus;
038import org.opends.server.replication.protocol.AckMsg;
039import org.opends.server.replication.protocol.ChangeTimeHeartbeatMsg;
040import org.opends.server.replication.protocol.HeartbeatThread;
041import org.opends.server.replication.protocol.MonitorMsg;
042import org.opends.server.replication.protocol.MonitorRequestMsg;
043import org.opends.server.replication.protocol.ProtocolVersion;
044import org.opends.server.replication.protocol.ReplServerStartMsg;
045import org.opends.server.replication.protocol.ReplicationMsg;
046import org.opends.server.replication.protocol.ResetGenerationIdMsg;
047import org.opends.server.replication.protocol.RoutableMsg;
048import org.opends.server.replication.protocol.Session;
049import org.opends.server.replication.protocol.StartMsg;
050import org.opends.server.replication.protocol.StartSessionMsg;
051import org.opends.server.replication.protocol.TopologyMsg;
052import org.opends.server.replication.protocol.UpdateMsg;
053import org.opends.server.replication.protocol.WindowMsg;
054import org.opends.server.replication.server.changelog.api.ChangelogException;
055import org.opends.server.types.DirectoryException;
056import org.opends.server.types.InitializationException;
057
058/**
059 * This class defines a server handler  :
060 * - that is a MessageHandler (see this class for more details)
061 * - that handles all interaction with a peer server (RS or DS).
062 */
063public abstract class ServerHandler extends MessageHandler
064{
065
066  private static final LocalizedLogger logger = LocalizedLogger.getLoggerForThisClass();
067
068  /**
069   * Time during which the server will wait for existing thread to stop
070   * during the shutdownWriter.
071   */
072  private static final int SHUTDOWN_JOIN_TIMEOUT = 30000;
073
074  /**
075   * The serverId of the remote server.
076   */
077  protected int serverId;
078  /**
079   * The session opened with the remote server.
080   */
081  protected final Session session;
082
083  /**
084   * The serverURL of the remote server.
085   */
086  protected String serverURL;
087  /**
088   * Number of updates received from the server in assured safe read mode.
089   */
090  private int assuredSrReceivedUpdates;
091  /**
092   * Number of updates received from the server in assured safe read mode that
093   * timed out.
094   */
095  private final AtomicInteger assuredSrReceivedUpdatesTimeout = new AtomicInteger();
096  /**
097   * Number of updates sent to the server in assured safe read mode.
098   */
099  private int assuredSrSentUpdates;
100  /**
101   * Number of updates sent to the server in assured safe read mode that timed
102   * out.
103   */
104  private final AtomicInteger assuredSrSentUpdatesTimeout = new AtomicInteger();
105  /**
106   * Number of updates received from the server in assured safe data mode.
107   */
108  private int assuredSdReceivedUpdates;
109  /**
110   * Number of updates received from the server in assured safe data mode that
111   * timed out.
112   */
113  private final AtomicInteger assuredSdReceivedUpdatesTimeout = new AtomicInteger();
114  /**
115   * Number of updates sent to the server in assured safe data mode.
116   */
117  private int assuredSdSentUpdates;
118
119  /**
120   * Number of updates sent to the server in assured safe data mode that timed out.
121   */
122  private final AtomicInteger assuredSdSentUpdatesTimeout = new AtomicInteger();
123
124  /**
125   * The associated ServerWriter that sends messages to the remote server.
126   */
127  private ServerWriter writer;
128
129  /**
130   * The associated ServerReader that receives messages from the remote server.
131   */
132  private ServerReader reader;
133
134  /** Window. */
135  private int rcvWindow;
136  private final int rcvWindowSizeHalf;
137
138  /** The size of the receiving window. */
139  protected final int maxRcvWindow;
140  /** Semaphore that the writer uses to control the flow to the remote server. */
141  private Semaphore sendWindow;
142  /** The initial size of the sending window. */
143  private int sendWindowSize;
144  /** Remote generation id. */
145  protected long generationId = -1;
146  /** The generation id of the hosting RS. */
147  protected long localGenerationId = -1;
148  /** The generation id before processing a new start handshake. */
149  protected long oldGenerationId = -1;
150  /** Group id of this remote server. */
151  protected byte groupId = -1;
152  /** The SSL encryption after the negotiation with the peer. */
153  protected boolean sslEncryption;
154  /**
155   * The time in milliseconds between heartbeats from the replication
156   * server.  Zero means heartbeats are off.
157   */
158  protected long heartbeatInterval;
159
160  /** The thread that will send heartbeats. */
161  private HeartbeatThread heartbeatThread;
162
163  /** Set when ServerWriter is stopping. */
164  private volatile boolean shutdownWriter;
165
166  /** Weight of this remote server. */
167  protected int weight = 1;
168
169  /**
170   * Creates a new server handler instance with the provided socket.
171   *
172   * @param session The Session used by the ServerHandler to
173   *                 communicate with the remote entity.
174   * @param queueSize The maximum number of update that will be kept
175   *                  in memory by this ServerHandler.
176   * @param replicationServer The hosting replication server.
177   * @param rcvWindowSize The window size to receive from the remote server.
178   */
179  public ServerHandler(
180      Session session,
181      int queueSize,
182      ReplicationServer replicationServer,
183      int rcvWindowSize)
184  {
185    super(queueSize, replicationServer);
186    this.session = session;
187    this.rcvWindowSizeHalf = rcvWindowSize / 2;
188    this.maxRcvWindow = rcvWindowSize;
189    this.rcvWindow = rcvWindowSize;
190  }
191
192  /**
193   * Abort a start procedure currently establishing.
194   * @param reason The provided reason.
195   */
196  protected void abortStart(LocalizableMessage reason)
197  {
198    // We did not recognize the message, close session as what can happen after
199    // is undetermined and we do not want the server to be disturbed
200    Session localSession = session;
201    if (localSession != null)
202    {
203      if (reason != null)
204      {
205        if (logger.isTraceEnabled())
206        {
207         logger.trace("In " + this + " closing session with err=" + reason);
208        }
209        logger.error(reason);
210      }
211
212      // This method is only called when aborting a failing handshake and
213      // not StopMsg should be sent in such situation. StopMsg are only
214      // expected when full handshake has been performed, or at end of
215      // handshake phase 1, when DS was just gathering available RS info
216      localSession.close();
217    }
218
219    releaseDomainLock();
220
221    // If generation id of domain was changed, set it back to old value
222    // We may have changed it as it was -1 and we received a value >0 from peer
223    // server and the last topo message sent may have failed being sent: in that
224    // case retrieve old value of generation id for replication server domain
225    if (oldGenerationId != -100)
226    {
227      replicationServerDomain.changeGenerationId(oldGenerationId);
228    }
229  }
230
231  /**
232   * Releases the lock on the replication server domain if it was held.
233   */
234  protected void releaseDomainLock()
235  {
236    if (replicationServerDomain.hasLock())
237    {
238      replicationServerDomain.release();
239    }
240  }
241
242  /**
243   * Check the protocol window and send WindowMsg if necessary.
244   *
245   * @throws IOException when the session becomes unavailable.
246   */
247  public synchronized void checkWindow() throws IOException
248  {
249    if (rcvWindow < rcvWindowSizeHalf)
250    {
251      WindowMsg msg = new WindowMsg(rcvWindowSizeHalf);
252      session.publish(msg);
253      rcvWindow += rcvWindowSizeHalf;
254    }
255  }
256
257  /**
258   * Decrement the protocol window, then check if it is necessary
259   * to send a WindowMsg and send it.
260   *
261   * @throws IOException when the session becomes unavailable.
262   */
263  private synchronized void decAndCheckWindow() throws IOException
264  {
265    rcvWindow--;
266    checkWindow();
267  }
268
269  /**
270   * Finalize the initialization, create reader, writer, heartbeat system
271   * and monitoring system.
272   * @throws DirectoryException When an exception is raised.
273   */
274  protected void finalizeStart() throws DirectoryException
275  {
276    // FIXME:ECL We should refactor so that a SH always have a session
277    if (session != null)
278    {
279      try
280      {
281        // Disable timeout for next communications
282        session.setSoTimeout(0);
283      }
284      catch(Exception e)
285      { /* do nothing */
286      }
287
288      // sendWindow MUST be created before starting the writer
289      sendWindow = new Semaphore(sendWindowSize);
290
291      writer = new ServerWriter(session, this, replicationServerDomain,
292          replicationServer.getDSRSShutdownSync());
293      reader = new ServerReader(session, this);
294
295      session.setName("Replication server RS(" + getReplicationServerId()
296          + ") session thread to " + this + " at "
297          + session.getReadableRemoteAddress());
298      session.start();
299      try
300      {
301        session.waitForStartup();
302      }
303      catch (InterruptedException e)
304      {
305        final LocalizableMessage message =
306            ERR_SESSION_STARTUP_INTERRUPTED.get(session.getName());
307        throw new DirectoryException(ResultCode.OTHER, message, e);
308      }
309      reader.start();
310      writer.start();
311
312      // Create a thread to send heartbeat messages.
313      if (heartbeatInterval > 0)
314      {
315        String threadName = "Replication server RS(" + getReplicationServerId()
316            + ") heartbeat publisher to " + this + " at "
317            + session.getReadableRemoteAddress();
318        heartbeatThread = new HeartbeatThread(threadName, session,
319            heartbeatInterval / 3);
320        heartbeatThread.start();
321      }
322    }
323
324    DirectoryServer.deregisterMonitorProvider(this);
325    DirectoryServer.registerMonitorProvider(this);
326  }
327
328  /**
329   * Sends a message.
330   *
331   * @param msg
332   *          The message to be sent.
333   * @throws IOException
334   *           When it occurs while sending the message,
335   */
336  public void send(ReplicationMsg msg) throws IOException
337  {
338    // avoid logging anything for unit tests that include a null domain.
339    if (logger.isTraceEnabled())
340    {
341      logger.trace("In "
342          + replicationServerDomain.getLocalRSMonitorInstanceName() + " "
343          + this + " publishes message:\n" + msg);
344    }
345    session.publish(msg);
346  }
347
348  /**
349   * Get the age of the older change that has not yet been replicated
350   * to the server handled by this ServerHandler.
351   * @return The age if the older change has not yet been replicated
352   *         to the server handled by this ServerHandler.
353   */
354  public long getApproxFirstMissingDate()
355  {
356    // Get the older CSN received
357    CSN olderUpdateCSN = getOlderUpdateCSN();
358    if (olderUpdateCSN != null)
359    {
360      // If not present in the local RS db,
361      // then approximate with the older update time
362      return olderUpdateCSN.getTime();
363    }
364    return 0;
365  }
366
367  /**
368   * Get the number of updates received from the server in assured safe data
369   * mode.
370   * @return The number of updates received from the server in assured safe data
371   * mode
372   */
373  public int getAssuredSdReceivedUpdates()
374  {
375    return assuredSdReceivedUpdates;
376  }
377
378  /**
379   * Get the number of updates received from the server in assured safe data
380   * mode that timed out.
381   * @return The number of updates received from the server in assured safe data
382   * mode that timed out.
383   */
384  public AtomicInteger getAssuredSdReceivedUpdatesTimeout()
385  {
386    return assuredSdReceivedUpdatesTimeout;
387  }
388
389  /**
390   * Get the number of updates sent to the server in assured safe data mode.
391   * @return The number of updates sent to the server in assured safe data mode
392   */
393  public int getAssuredSdSentUpdates()
394  {
395    return assuredSdSentUpdates;
396  }
397
398  /**
399   * Get the number of updates sent to the server in assured safe data mode that
400   * timed out.
401   * @return The number of updates sent to the server in assured safe data mode
402   * that timed out.
403   */
404  public AtomicInteger getAssuredSdSentUpdatesTimeout()
405  {
406    return assuredSdSentUpdatesTimeout;
407  }
408
409  /**
410   * Get the number of updates received from the server in assured safe read
411   * mode.
412   * @return The number of updates received from the server in assured safe read
413   * mode
414   */
415  public int getAssuredSrReceivedUpdates()
416  {
417    return assuredSrReceivedUpdates;
418  }
419
420  /**
421   * Get the number of updates received from the server in assured safe read
422   * mode that timed out.
423   * @return The number of updates received from the server in assured safe read
424   * mode that timed out.
425   */
426  public AtomicInteger getAssuredSrReceivedUpdatesTimeout()
427  {
428    return assuredSrReceivedUpdatesTimeout;
429  }
430
431  /**
432   * Get the number of updates sent to the server in assured safe read mode.
433   * @return The number of updates sent to the server in assured safe read mode
434   */
435  public int getAssuredSrSentUpdates()
436  {
437    return assuredSrSentUpdates;
438  }
439
440  /**
441   * Get the number of updates sent to the server in assured safe read mode that
442   * timed out.
443   * @return The number of updates sent to the server in assured safe read mode
444   * that timed out.
445   */
446  public AtomicInteger getAssuredSrSentUpdatesTimeout()
447  {
448    return assuredSrSentUpdatesTimeout;
449  }
450
451  /**
452   * Returns the Replication Server Domain to which belongs this server handler.
453   *
454   * @return The replication server domain.
455   */
456  public ReplicationServerDomain getDomain()
457  {
458    return replicationServerDomain;
459  }
460
461  /**
462   * Returns the value of generationId for that handler.
463   * @return The value of the generationId.
464   */
465  public long getGenerationId()
466  {
467    return generationId;
468  }
469
470  /**
471   * Gets the group id of the server represented by this object.
472   * @return The group id of the server represented by this object.
473   */
474  public byte getGroupId()
475  {
476    return groupId;
477  }
478
479  /**
480   * Get our heartbeat interval.
481   * @return Our heartbeat interval.
482   */
483  public long getHeartbeatInterval()
484  {
485    return heartbeatInterval;
486  }
487
488  @Override
489  public MonitorData getMonitorData()
490  {
491    // Get the generic ones
492    MonitorData attributes = super.getMonitorData();
493
494    attributes.add("server-id", serverId);
495    attributes.add("domain-name", getBaseDN());
496
497    // Deprecated
498    attributes.add("max-waiting-changes", maxQueueSize);
499    attributes.add("sent-updates", getOutCount());
500    attributes.add("received-updates", getInCount());
501
502    // Assured counters
503    attributes.add("assured-sr-received-updates", getAssuredSrReceivedUpdates());
504    attributes.add("assured-sr-received-updates-timeout", getAssuredSrReceivedUpdatesTimeout());
505    attributes.add("assured-sr-sent-updates", getAssuredSrSentUpdates());
506    attributes.add("assured-sr-sent-updates-timeout", getAssuredSrSentUpdatesTimeout());
507    attributes.add("assured-sd-received-updates", getAssuredSdReceivedUpdates());
508    if (!isDataServer())
509    {
510      attributes.add("assured-sd-sent-updates", getAssuredSdSentUpdates());
511      attributes.add("assured-sd-sent-updates-timeout", getAssuredSdSentUpdatesTimeout());
512    } else
513    {
514      attributes.add("assured-sd-received-updates-timeout", getAssuredSdReceivedUpdatesTimeout());
515    }
516
517    // Window stats
518    attributes.add("max-send-window", sendWindowSize);
519    attributes.add("current-send-window", sendWindow.availablePermits());
520    attributes.add("max-rcv-window", maxRcvWindow);
521    attributes.add("current-rcv-window", rcvWindow);
522
523    // Encryption
524    attributes.add("ssl-encryption", session.isEncrypted());
525
526    // Data generation
527    attributes.add("generation-id", generationId);
528
529    return attributes;
530  }
531
532  /**
533   * Retrieves the name of this monitor provider.  It should be unique among all
534   * monitor providers, including all instances of the same monitor provider.
535   *
536   * @return  The name of this monitor provider.
537   */
538  @Override
539  public abstract String getMonitorInstanceName();
540
541  /**
542   * Gets the protocol version used with this remote server.
543   * @return The protocol version used with this remote server.
544   */
545  public short getProtocolVersion()
546  {
547    return session.getProtocolVersion();
548  }
549
550  /**
551   * Get the Server Id.
552   *
553   * @return the ID of the server to which this object is linked
554   */
555  public int getServerId()
556  {
557    return serverId;
558  }
559
560  /**
561   * Retrieves the URL for this server handler.
562   *
563   * @return  The URL for this server handler, in the form of an address and
564   *          port separated by a colon.
565   */
566  public String getServerURL()
567  {
568    return serverURL;
569  }
570
571  /**
572   * Return the ServerStatus.
573   * @return The server status.
574   */
575  protected abstract ServerStatus getStatus();
576
577  /**
578   * Increment the number of updates received from the server in assured safe
579   * data mode.
580   */
581  public void incrementAssuredSdReceivedUpdates()
582  {
583    assuredSdReceivedUpdates++;
584  }
585
586  /**
587   * Increment the number of updates received from the server in assured safe
588   * data mode that timed out.
589   */
590  public void incrementAssuredSdReceivedUpdatesTimeout()
591  {
592    assuredSdReceivedUpdatesTimeout.incrementAndGet();
593  }
594
595  /**
596   * Increment the number of updates sent to the server in assured safe data
597   * mode that timed out.
598   */
599  public void incrementAssuredSdSentUpdatesTimeout()
600  {
601    assuredSdSentUpdatesTimeout.incrementAndGet();
602  }
603
604  /**
605   * Increment the number of updates received from the server in assured safe
606   * read mode.
607   */
608  public void incrementAssuredSrReceivedUpdates()
609  {
610    assuredSrReceivedUpdates++;
611  }
612
613  /**
614   * Increment the number of updates received from the server in assured safe
615   * read mode that timed out.
616   */
617  public void incrementAssuredSrReceivedUpdatesTimeout()
618  {
619    assuredSrReceivedUpdatesTimeout.incrementAndGet();
620  }
621
622  /**
623   * Increment the number of updates sent to the server in assured safe read
624   * mode that timed out.
625   */
626  public void incrementAssuredSrSentUpdatesTimeout()
627  {
628    assuredSrSentUpdatesTimeout.incrementAndGet();
629  }
630
631  /** {@inheritDoc} */
632  @Override
633  public void initializeMonitorProvider(MonitorProviderCfg configuration)
634  throws ConfigException, InitializationException
635  {
636    // Nothing to do for now
637  }
638
639  /**
640   * Check if the server associated to this ServerHandler is a data server
641   * in the topology.
642   * @return true if the server is a data server.
643   */
644  public abstract boolean isDataServer();
645
646  /**
647   * Check if the server associated to this ServerHandler is a replication
648   * server.
649   * @return true if the server is a replication server.
650   */
651  public boolean isReplicationServer()
652  {
653    return !isDataServer();
654  }
655
656  // The handshake phase must be done by blocking any access to structures
657  // keeping info on connected servers, so that one can safely check for
658  // pre-existence of a server, send a coherent snapshot of known topology to
659  // peers, update the local view of the topology...
660  //
661  // For instance a kind of problem could be that while we connect with a
662  // peer RS, a DS is connecting at the same time and we could publish the
663  // connected DSs to the peer RS forgetting this last DS in the TopologyMsg.
664  //
665  // This method and every others that need to read/make changes to the
666  // structures holding topology for the domain should:
667  // - call ReplicationServerDomain.lock()
668  // - read/modify structures
669  // - call ReplicationServerDomain.release()
670  //
671  // More information is provided in comment of ReplicationServerDomain.lock()
672
673  /**
674   * Lock the domain without a timeout.
675   * <p>
676   * If domain already exists, lock it until handshake is finished otherwise it
677   * will be created and locked later in the method
678   *
679   * @throws DirectoryException
680   *           When an exception occurs.
681   * @throws InterruptedException
682   *           If the current thread was interrupted while waiting for the lock.
683   */
684  public void lockDomainNoTimeout() throws DirectoryException,
685      InterruptedException
686  {
687    if (!replicationServerDomain.hasLock())
688    {
689      replicationServerDomain.lock();
690    }
691  }
692
693  /**
694   * Lock the domain with a timeout.
695   * <p>
696   * Take the lock on the domain. WARNING: Here we try to acquire the lock with
697   * a timeout. This is for preventing a deadlock that may happen if there are
698   * cross connection attempts (for same domain) from this replication server
699   * and from a peer one.
700   * <p>
701   * Here is the scenario:
702   * <ol>
703   * <li>RS1 connect thread takes the domain lock and starts connection to RS2
704   * </li>
705   * <li>at the same time RS2 connect thread takes his domain lock and start
706   * connection to RS2</li>
707   * <li>RS2 listen thread starts processing received ReplServerStartMsg from
708   * RS1 and wants to acquire the lock on the domain (here) but cannot as RS2
709   * connect thread already has it</li>
710   * <li>RS1 listen thread starts processing received ReplServerStartMsg from
711   * RS2 and wants to acquire the lock on the domain (here) but cannot as RS1
712   * connect thread already has it</li>
713   * </ol>
714   * => Deadlock: 4 threads are locked.
715   * <p>
716   * To prevent threads locking in such situation, the listen threads here will
717   * both timeout trying to acquire the lock. The random time for the timeout
718   * should allow on connection attempt to be aborted whereas the other one
719   * should have time to finish in the same time.
720   * <p>
721   * Warning: the minimum time (3s) should be big enough to allow normal
722   * situation connections to terminate. The added random time should represent
723   * a big enough range so that the chance to have one listen thread timing out
724   * a lot before the peer one is great. When the first listen thread times out,
725   * the remote connect thread should release the lock and allow the peer listen
726   * thread to take the lock it was waiting for and process the connection
727   * attempt.
728   *
729   * @throws DirectoryException
730   *           When an exception occurs.
731   * @throws InterruptedException
732   *           If the current thread was interrupted while waiting for the lock.
733   */
734  public void lockDomainWithTimeout() throws DirectoryException,
735      InterruptedException
736  {
737    final Random random = new Random();
738    final int randomTime = random.nextInt(6); // Random from 0 to 5
739    // Wait at least 3 seconds + (0 to 5 seconds)
740    final long timeout = 3000 + randomTime * 1000;
741    final boolean lockAcquired = replicationServerDomain.tryLock(timeout);
742    if (!lockAcquired)
743    {
744      LocalizableMessage message = WARN_TIMEOUT_WHEN_CROSS_CONNECTION.get(
745          getBaseDN(), serverId, session.getReadableRemoteAddress(), getReplicationServerId());
746      throw new DirectoryException(ResultCode.OTHER, message);
747    }
748  }
749
750  /**
751   * Processes a routable message.
752   *
753   * @param msg The message to be processed.
754   */
755  void process(RoutableMsg msg)
756  {
757    if (logger.isTraceEnabled())
758    {
759      logger.trace("In "
760          + replicationServerDomain.getLocalRSMonitorInstanceName() + " "
761          + this + " processes routable msg received:" + msg);
762    }
763    replicationServerDomain.process(msg, this);
764  }
765
766  /**
767   * Responds to a monitor request message.
768   *
769   * @param msg
770   *          The monitor request message.
771   */
772  void processMonitorRequestMsg(MonitorRequestMsg msg)
773  {
774    replicationServerDomain.processMonitorRequestMsg(msg, this);
775  }
776
777  /**
778   * Responds to a monitor message.
779   *
780   * @param msg
781   *          The monitor message.
782   */
783  void processMonitorMsg(MonitorMsg msg)
784  {
785    replicationServerDomain.processMonitorMsg(msg, this);
786  }
787
788  /**
789   * Processes a change time heartbeat msg.
790   *
791   * @param msg
792   *          The message to be processed.
793   * @throws DirectoryException
794   *           When an exception is raised.
795   */
796  void process(ChangeTimeHeartbeatMsg msg) throws DirectoryException
797  {
798    if (logger.isTraceEnabled())
799    {
800      logger.trace("In "
801          + replicationServerDomain.getLocalRSMonitorInstanceName() + " "
802          + this + " processes received msg:\n" + msg);
803    }
804    replicationServerDomain.processChangeTimeHeartbeatMsg(this, msg);
805  }
806
807  /**
808   * Process the reception of a WindowProbeMsg message.
809   *
810   * @throws IOException
811   *           When the session becomes unavailable.
812   */
813  public void replyToWindowProbe() throws IOException
814  {
815    if (rcvWindow > 0)
816    {
817      // The LDAP server believes that its window is closed while it is not,
818      // this means that some problem happened in the window exchange procedure!
819      // lets update the LDAP server with out current window size and hope
820      // that everything will work better in the future.
821      // TODO also log an error message.
822      session.publish(new WindowMsg(rcvWindow));
823    }
824    else
825    {
826      // Both the LDAP server and the replication server believes that the
827      // window is closed. Lets check the flowcontrol in case we
828      // can now resume operations and send a windowMessage if necessary.
829      checkWindow();
830    }
831  }
832
833  /**
834   * Sends the provided TopologyMsg to the peer server.
835   *
836   * @param topoMsg
837   *          The TopologyMsg message to be sent.
838   * @throws IOException
839   *           When it occurs while sending the message,
840   */
841  public void sendTopoInfo(TopologyMsg topoMsg) throws IOException
842  {
843    // V1 Rs do not support the TopologyMsg
844    if (getProtocolVersion() > ProtocolVersion.REPLICATION_PROTOCOL_V1)
845    {
846      send(topoMsg);
847    }
848  }
849
850  /**
851   * Set a new generation ID.
852   *
853   * @param generationId The new generation ID
854   *
855   */
856  public void setGenerationId(long generationId)
857  {
858    this.generationId = generationId;
859  }
860
861  /**
862   * Sets the window size when used when sending to the remote.
863   * @param size The provided window size.
864   */
865  protected void setSendWindowSize(int size)
866  {
867    this.sendWindowSize = size;
868  }
869
870  /**
871   * Shutdown This ServerHandler.
872   */
873  @Override
874  public void shutdown()
875  {
876    shutdownWriter = true;
877    setConsumerActive(false);
878    super.shutdown();
879
880    if (session != null)
881    {
882      session.close();
883    }
884    if (heartbeatThread != null)
885    {
886      heartbeatThread.shutdown();
887    }
888
889    DirectoryServer.deregisterMonitorProvider(this);
890
891    /*
892     * Be sure to wait for ServerWriter and ServerReader death
893     * It does not matter if we try to stop a thread which is us (reader
894     * or writer), but we must not wait for our own thread death.
895     */
896    try
897    {
898      if (writer != null && !Thread.currentThread().equals(writer))
899      {
900        writer.join(SHUTDOWN_JOIN_TIMEOUT);
901      }
902      if (reader != null && !Thread.currentThread().equals(reader))
903      {
904        reader.join(SHUTDOWN_JOIN_TIMEOUT);
905      }
906    } catch (InterruptedException e)
907    {
908      // don't try anymore to join and return.
909    }
910    if (logger.isTraceEnabled())
911    {
912      logger.trace("SH.shutdowned(" + this + ")");
913    }
914  }
915
916  /**
917   * Select the next update that must be sent to the server managed by this
918   * ServerHandler.
919   *
920   * @return the next update that must be sent to the server managed by this
921   *         ServerHandler.
922   * @throws ChangelogException
923   *            If a problem occurs when reading the changelog
924   */
925  public UpdateMsg take() throws ChangelogException
926  {
927    final UpdateMsg msg = getNextMessage();
928
929    acquirePermitInSendWindow();
930
931    if (msg != null)
932    {
933      incrementOutCount();
934      if (msg.isAssured())
935      {
936        incrementAssuredStats(msg);
937      }
938      return msg;
939    }
940    return null;
941  }
942
943  private void acquirePermitInSendWindow()
944  {
945    boolean acquired = false;
946    boolean interrupted = true;
947    do
948    {
949      try
950      {
951        acquired = sendWindow.tryAcquire(500, TimeUnit.MILLISECONDS);
952        interrupted = false;
953      } catch (InterruptedException e)
954      {
955        // loop until not interrupted
956      }
957    } while ((interrupted || !acquired) && !shutdownWriter);
958  }
959
960  private void incrementAssuredStats(final UpdateMsg msg)
961  {
962    if (msg.getAssuredMode() == AssuredMode.SAFE_READ_MODE)
963    {
964      assuredSrSentUpdates++;
965    }
966    else if (!isDataServer())
967    {
968      assuredSdSentUpdates++;
969    }
970  }
971
972  /**
973   * Creates a RSInfo structure representing this remote RS.
974   * @return The RSInfo structure representing this remote RS
975   */
976  public RSInfo toRSInfo()
977  {
978    return new RSInfo(serverId, serverURL, generationId, groupId, weight);
979  }
980
981  /**
982   * Update the send window size based on the credit specified in the
983   * given window message.
984   *
985   * @param windowMsg The Window LocalizableMessage containing the information
986   *                  necessary for updating the window size.
987   */
988  public void updateWindow(WindowMsg windowMsg)
989  {
990    sendWindow.release(windowMsg.getNumAck());
991  }
992
993  /**
994   * Log the messages involved in the start handshake.
995   * @param inStartMsg The message received first.
996   * @param outStartMsg The message sent in response.
997   */
998  protected void logStartHandshakeRCVandSND(
999      StartMsg inStartMsg,
1000      StartMsg outStartMsg)
1001  {
1002    if (logger.isTraceEnabled())
1003    {
1004      logger.trace("In " + this.replicationServer.getMonitorInstanceName()
1005          + ", " + getClass().getSimpleName() + " " + this + ":"
1006          + "\nSH START HANDSHAKE RECEIVED:\n" + inStartMsg
1007          + "\nAND REPLIED:\n" + outStartMsg);
1008    }
1009  }
1010
1011  /**
1012   * Log the messages involved in the start handshake.
1013   * @param outStartMsg The message sent first.
1014   * @param inStartMsg The message received in response.
1015   */
1016  protected void logStartHandshakeSNDandRCV(
1017      StartMsg outStartMsg,
1018      StartMsg inStartMsg)
1019  {
1020    if (logger.isTraceEnabled())
1021    {
1022      logger.trace("In " + this.replicationServer.getMonitorInstanceName()
1023          + ", " + getClass().getSimpleName() + " " + this + ":"
1024          + "\nSH START HANDSHAKE SENT:\n" + outStartMsg + "\nAND RECEIVED:\n"
1025          + inStartMsg);
1026    }
1027  }
1028
1029  /**
1030   * Log the messages involved in the Topology handshake.
1031   * @param inTopoMsg The message received first.
1032   * @param outTopoMsg The message sent in response.
1033   */
1034  protected void logTopoHandshakeRCVandSND(
1035      TopologyMsg inTopoMsg,
1036      TopologyMsg outTopoMsg)
1037  {
1038    if (logger.isTraceEnabled())
1039    {
1040      logger.trace("In " + this.replicationServer.getMonitorInstanceName()
1041          + ", " + getClass().getSimpleName() + " " + this + ":"
1042          + "\nSH TOPO HANDSHAKE RECEIVED:\n" + inTopoMsg + "\nAND REPLIED:\n"
1043          + outTopoMsg);
1044    }
1045  }
1046
1047  /**
1048   * Log the messages involved in the Topology handshake.
1049   * @param outTopoMsg The message sent first.
1050   * @param inTopoMsg The message received in response.
1051   */
1052  protected void logTopoHandshakeSNDandRCV(
1053      TopologyMsg outTopoMsg,
1054      TopologyMsg inTopoMsg)
1055  {
1056    if (logger.isTraceEnabled())
1057    {
1058      logger.trace("In " + this.replicationServer.getMonitorInstanceName()
1059          + ", " + getClass().getSimpleName() + " " + this + ":"
1060          + "\nSH TOPO HANDSHAKE SENT:\n" + outTopoMsg + "\nAND RECEIVED:\n"
1061          + inTopoMsg);
1062    }
1063  }
1064
1065  /**
1066   * Log the messages involved in the Topology/StartSession handshake.
1067   * @param inStartSessionMsg The message received first.
1068   * @param outTopoMsg The message sent in response.
1069   */
1070  protected void logStartSessionHandshake(
1071      StartSessionMsg inStartSessionMsg,
1072      TopologyMsg outTopoMsg)
1073  {
1074    if (logger.isTraceEnabled())
1075    {
1076      logger.trace("In " + this.replicationServer.getMonitorInstanceName()
1077          + ", " + getClass().getSimpleName() + " " + this + " :"
1078          + "\nSH SESSION HANDSHAKE RECEIVED:\n" + inStartSessionMsg
1079          + "\nAND REPLIED:\n" + outTopoMsg);
1080    }
1081  }
1082
1083  /**
1084   * Log stop message has been received.
1085   */
1086  protected void logStopReceived()
1087  {
1088    if (logger.isTraceEnabled())
1089    {
1090      logger.trace("In " + this.replicationServer.getMonitorInstanceName()
1091          + ", " + getClass().getSimpleName() + " " + this + " :"
1092          + "\nSH SESSION HANDSHAKE RECEIVED A STOP MESSAGE");
1093    }
1094  }
1095
1096  /**
1097   * Process a Ack message received.
1098   * @param ack the message received.
1099   */
1100  void processAck(AckMsg ack)
1101  {
1102    replicationServerDomain.processAck(ack, this);
1103  }
1104
1105  /**
1106   * Get the reference generation id (associated with the changes in the db).
1107   * @return the reference generation id.
1108   */
1109  public long getReferenceGenId()
1110  {
1111    return replicationServerDomain.getGenerationId();
1112  }
1113
1114  /**
1115   * Process a ResetGenerationIdMsg message received.
1116   * @param msg the message received.
1117   */
1118  void processResetGenId(ResetGenerationIdMsg msg)
1119  {
1120    replicationServerDomain.resetGenerationId(this, msg);
1121  }
1122
1123  /**
1124   * Put a new update message received.
1125   * @param update the update message received.
1126   * @throws IOException when it occurs.
1127   */
1128  public void put(UpdateMsg update) throws IOException
1129  {
1130    decAndCheckWindow();
1131    replicationServerDomain.put(update, this);
1132  }
1133
1134  /**
1135   * Stop this handler.
1136   */
1137  public void doStop()
1138  {
1139    replicationServerDomain.stopServer(this, false);
1140  }
1141
1142  /**
1143   * Creates a ReplServerStartMsg for the current ServerHandler.
1144   *
1145   * @return a new ReplServerStartMsg for the current ServerHandler.
1146   */
1147  protected ReplServerStartMsg createReplServerStartMsg()
1148  {
1149    return new ReplServerStartMsg(getReplicationServerId(),
1150        getReplicationServerURL(), getBaseDN(), maxRcvWindow,
1151        replicationServerDomain.getLatestServerState(), localGenerationId,
1152        sslEncryption, getLocalGroupId(),
1153        replicationServer.getDegradedStatusThreshold());
1154  }
1155
1156  /**
1157   * Returns a "badly disconnected" error message for this server handler.
1158   *
1159   * @return a "badly disconnected" error message for this server handler
1160   */
1161  public LocalizableMessage getBadlyDisconnectedErrorMessage()
1162  {
1163    if (isDataServer())
1164    {
1165      return ERR_DS_BADLY_DISCONNECTED.get(getReplicationServerId(),
1166          getServerId(), session.getReadableRemoteAddress(), getBaseDN());
1167    }
1168    return ERR_RS_BADLY_DISCONNECTED.get(getReplicationServerId(),
1169        getServerId(), session.getReadableRemoteAddress(), getBaseDN());
1170  }
1171}