001/* 002 * The contents of this file are subject to the terms of the Common Development and 003 * Distribution License (the License). You may not use this file except in compliance with the 004 * License. 005 * 006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the 007 * specific language governing permission and limitations under the License. 008 * 009 * When distributing Covered Software, include this CDDL Header Notice in each file and include 010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL 011 * Header, with the fields enclosed by brackets [] replaced by your own identifying 012 * information: "Portions Copyright [year] [name of copyright owner]". 013 * 014 * Copyright 2006-2010 Sun Microsystems, Inc. 015 * Portions Copyright 2011-2016 ForgeRock AS. 016 */ 017package org.opends.server.replication.server; 018 019import static org.opends.messages.ReplicationMessages.*; 020 021import java.io.IOException; 022import java.util.Random; 023import java.util.concurrent.Semaphore; 024import java.util.concurrent.TimeUnit; 025import java.util.concurrent.atomic.AtomicInteger; 026 027import org.forgerock.i18n.LocalizableMessage; 028import org.forgerock.i18n.slf4j.LocalizedLogger; 029import org.forgerock.opendj.config.server.ConfigException; 030import org.forgerock.opendj.ldap.ResultCode; 031import org.opends.server.admin.std.server.MonitorProviderCfg; 032import org.opends.server.api.MonitorData; 033import org.opends.server.core.DirectoryServer; 034import org.opends.server.replication.common.AssuredMode; 035import org.opends.server.replication.common.CSN; 036import org.opends.server.replication.common.RSInfo; 037import org.opends.server.replication.common.ServerStatus; 038import org.opends.server.replication.protocol.AckMsg; 039import org.opends.server.replication.protocol.ChangeTimeHeartbeatMsg; 040import org.opends.server.replication.protocol.HeartbeatThread; 041import org.opends.server.replication.protocol.MonitorMsg; 042import org.opends.server.replication.protocol.MonitorRequestMsg; 043import org.opends.server.replication.protocol.ProtocolVersion; 044import org.opends.server.replication.protocol.ReplServerStartMsg; 045import org.opends.server.replication.protocol.ReplicationMsg; 046import org.opends.server.replication.protocol.ResetGenerationIdMsg; 047import org.opends.server.replication.protocol.RoutableMsg; 048import org.opends.server.replication.protocol.Session; 049import org.opends.server.replication.protocol.StartMsg; 050import org.opends.server.replication.protocol.StartSessionMsg; 051import org.opends.server.replication.protocol.TopologyMsg; 052import org.opends.server.replication.protocol.UpdateMsg; 053import org.opends.server.replication.protocol.WindowMsg; 054import org.opends.server.replication.server.changelog.api.ChangelogException; 055import org.opends.server.types.DirectoryException; 056import org.opends.server.types.InitializationException; 057 058/** 059 * This class defines a server handler : 060 * - that is a MessageHandler (see this class for more details) 061 * - that handles all interaction with a peer server (RS or DS). 062 */ 063public abstract class ServerHandler extends MessageHandler 064{ 065 066 private static final LocalizedLogger logger = LocalizedLogger.getLoggerForThisClass(); 067 068 /** 069 * Time during which the server will wait for existing thread to stop 070 * during the shutdownWriter. 071 */ 072 private static final int SHUTDOWN_JOIN_TIMEOUT = 30000; 073 074 /** 075 * The serverId of the remote server. 076 */ 077 protected int serverId; 078 /** 079 * The session opened with the remote server. 080 */ 081 protected final Session session; 082 083 /** 084 * The serverURL of the remote server. 085 */ 086 protected String serverURL; 087 /** 088 * Number of updates received from the server in assured safe read mode. 089 */ 090 private int assuredSrReceivedUpdates; 091 /** 092 * Number of updates received from the server in assured safe read mode that 093 * timed out. 094 */ 095 private final AtomicInteger assuredSrReceivedUpdatesTimeout = new AtomicInteger(); 096 /** 097 * Number of updates sent to the server in assured safe read mode. 098 */ 099 private int assuredSrSentUpdates; 100 /** 101 * Number of updates sent to the server in assured safe read mode that timed 102 * out. 103 */ 104 private final AtomicInteger assuredSrSentUpdatesTimeout = new AtomicInteger(); 105 /** 106 * Number of updates received from the server in assured safe data mode. 107 */ 108 private int assuredSdReceivedUpdates; 109 /** 110 * Number of updates received from the server in assured safe data mode that 111 * timed out. 112 */ 113 private final AtomicInteger assuredSdReceivedUpdatesTimeout = new AtomicInteger(); 114 /** 115 * Number of updates sent to the server in assured safe data mode. 116 */ 117 private int assuredSdSentUpdates; 118 119 /** 120 * Number of updates sent to the server in assured safe data mode that timed out. 121 */ 122 private final AtomicInteger assuredSdSentUpdatesTimeout = new AtomicInteger(); 123 124 /** 125 * The associated ServerWriter that sends messages to the remote server. 126 */ 127 private ServerWriter writer; 128 129 /** 130 * The associated ServerReader that receives messages from the remote server. 131 */ 132 private ServerReader reader; 133 134 /** Window. */ 135 private int rcvWindow; 136 private final int rcvWindowSizeHalf; 137 138 /** The size of the receiving window. */ 139 protected final int maxRcvWindow; 140 /** Semaphore that the writer uses to control the flow to the remote server. */ 141 private Semaphore sendWindow; 142 /** The initial size of the sending window. */ 143 private int sendWindowSize; 144 /** Remote generation id. */ 145 protected long generationId = -1; 146 /** The generation id of the hosting RS. */ 147 protected long localGenerationId = -1; 148 /** The generation id before processing a new start handshake. */ 149 protected long oldGenerationId = -1; 150 /** Group id of this remote server. */ 151 protected byte groupId = -1; 152 /** The SSL encryption after the negotiation with the peer. */ 153 protected boolean sslEncryption; 154 /** 155 * The time in milliseconds between heartbeats from the replication 156 * server. Zero means heartbeats are off. 157 */ 158 protected long heartbeatInterval; 159 160 /** The thread that will send heartbeats. */ 161 private HeartbeatThread heartbeatThread; 162 163 /** Set when ServerWriter is stopping. */ 164 private volatile boolean shutdownWriter; 165 166 /** Weight of this remote server. */ 167 protected int weight = 1; 168 169 /** 170 * Creates a new server handler instance with the provided socket. 171 * 172 * @param session The Session used by the ServerHandler to 173 * communicate with the remote entity. 174 * @param queueSize The maximum number of update that will be kept 175 * in memory by this ServerHandler. 176 * @param replicationServer The hosting replication server. 177 * @param rcvWindowSize The window size to receive from the remote server. 178 */ 179 public ServerHandler( 180 Session session, 181 int queueSize, 182 ReplicationServer replicationServer, 183 int rcvWindowSize) 184 { 185 super(queueSize, replicationServer); 186 this.session = session; 187 this.rcvWindowSizeHalf = rcvWindowSize / 2; 188 this.maxRcvWindow = rcvWindowSize; 189 this.rcvWindow = rcvWindowSize; 190 } 191 192 /** 193 * Abort a start procedure currently establishing. 194 * @param reason The provided reason. 195 */ 196 protected void abortStart(LocalizableMessage reason) 197 { 198 // We did not recognize the message, close session as what can happen after 199 // is undetermined and we do not want the server to be disturbed 200 Session localSession = session; 201 if (localSession != null) 202 { 203 if (reason != null) 204 { 205 if (logger.isTraceEnabled()) 206 { 207 logger.trace("In " + this + " closing session with err=" + reason); 208 } 209 logger.error(reason); 210 } 211 212 // This method is only called when aborting a failing handshake and 213 // not StopMsg should be sent in such situation. StopMsg are only 214 // expected when full handshake has been performed, or at end of 215 // handshake phase 1, when DS was just gathering available RS info 216 localSession.close(); 217 } 218 219 releaseDomainLock(); 220 221 // If generation id of domain was changed, set it back to old value 222 // We may have changed it as it was -1 and we received a value >0 from peer 223 // server and the last topo message sent may have failed being sent: in that 224 // case retrieve old value of generation id for replication server domain 225 if (oldGenerationId != -100) 226 { 227 replicationServerDomain.changeGenerationId(oldGenerationId); 228 } 229 } 230 231 /** 232 * Releases the lock on the replication server domain if it was held. 233 */ 234 protected void releaseDomainLock() 235 { 236 if (replicationServerDomain.hasLock()) 237 { 238 replicationServerDomain.release(); 239 } 240 } 241 242 /** 243 * Check the protocol window and send WindowMsg if necessary. 244 * 245 * @throws IOException when the session becomes unavailable. 246 */ 247 public synchronized void checkWindow() throws IOException 248 { 249 if (rcvWindow < rcvWindowSizeHalf) 250 { 251 WindowMsg msg = new WindowMsg(rcvWindowSizeHalf); 252 session.publish(msg); 253 rcvWindow += rcvWindowSizeHalf; 254 } 255 } 256 257 /** 258 * Decrement the protocol window, then check if it is necessary 259 * to send a WindowMsg and send it. 260 * 261 * @throws IOException when the session becomes unavailable. 262 */ 263 private synchronized void decAndCheckWindow() throws IOException 264 { 265 rcvWindow--; 266 checkWindow(); 267 } 268 269 /** 270 * Finalize the initialization, create reader, writer, heartbeat system 271 * and monitoring system. 272 * @throws DirectoryException When an exception is raised. 273 */ 274 protected void finalizeStart() throws DirectoryException 275 { 276 // FIXME:ECL We should refactor so that a SH always have a session 277 if (session != null) 278 { 279 try 280 { 281 // Disable timeout for next communications 282 session.setSoTimeout(0); 283 } 284 catch(Exception e) 285 { /* do nothing */ 286 } 287 288 // sendWindow MUST be created before starting the writer 289 sendWindow = new Semaphore(sendWindowSize); 290 291 writer = new ServerWriter(session, this, replicationServerDomain, 292 replicationServer.getDSRSShutdownSync()); 293 reader = new ServerReader(session, this); 294 295 session.setName("Replication server RS(" + getReplicationServerId() 296 + ") session thread to " + this + " at " 297 + session.getReadableRemoteAddress()); 298 session.start(); 299 try 300 { 301 session.waitForStartup(); 302 } 303 catch (InterruptedException e) 304 { 305 final LocalizableMessage message = 306 ERR_SESSION_STARTUP_INTERRUPTED.get(session.getName()); 307 throw new DirectoryException(ResultCode.OTHER, message, e); 308 } 309 reader.start(); 310 writer.start(); 311 312 // Create a thread to send heartbeat messages. 313 if (heartbeatInterval > 0) 314 { 315 String threadName = "Replication server RS(" + getReplicationServerId() 316 + ") heartbeat publisher to " + this + " at " 317 + session.getReadableRemoteAddress(); 318 heartbeatThread = new HeartbeatThread(threadName, session, 319 heartbeatInterval / 3); 320 heartbeatThread.start(); 321 } 322 } 323 324 DirectoryServer.deregisterMonitorProvider(this); 325 DirectoryServer.registerMonitorProvider(this); 326 } 327 328 /** 329 * Sends a message. 330 * 331 * @param msg 332 * The message to be sent. 333 * @throws IOException 334 * When it occurs while sending the message, 335 */ 336 public void send(ReplicationMsg msg) throws IOException 337 { 338 // avoid logging anything for unit tests that include a null domain. 339 if (logger.isTraceEnabled()) 340 { 341 logger.trace("In " 342 + replicationServerDomain.getLocalRSMonitorInstanceName() + " " 343 + this + " publishes message:\n" + msg); 344 } 345 session.publish(msg); 346 } 347 348 /** 349 * Get the age of the older change that has not yet been replicated 350 * to the server handled by this ServerHandler. 351 * @return The age if the older change has not yet been replicated 352 * to the server handled by this ServerHandler. 353 */ 354 public long getApproxFirstMissingDate() 355 { 356 // Get the older CSN received 357 CSN olderUpdateCSN = getOlderUpdateCSN(); 358 if (olderUpdateCSN != null) 359 { 360 // If not present in the local RS db, 361 // then approximate with the older update time 362 return olderUpdateCSN.getTime(); 363 } 364 return 0; 365 } 366 367 /** 368 * Get the number of updates received from the server in assured safe data 369 * mode. 370 * @return The number of updates received from the server in assured safe data 371 * mode 372 */ 373 public int getAssuredSdReceivedUpdates() 374 { 375 return assuredSdReceivedUpdates; 376 } 377 378 /** 379 * Get the number of updates received from the server in assured safe data 380 * mode that timed out. 381 * @return The number of updates received from the server in assured safe data 382 * mode that timed out. 383 */ 384 public AtomicInteger getAssuredSdReceivedUpdatesTimeout() 385 { 386 return assuredSdReceivedUpdatesTimeout; 387 } 388 389 /** 390 * Get the number of updates sent to the server in assured safe data mode. 391 * @return The number of updates sent to the server in assured safe data mode 392 */ 393 public int getAssuredSdSentUpdates() 394 { 395 return assuredSdSentUpdates; 396 } 397 398 /** 399 * Get the number of updates sent to the server in assured safe data mode that 400 * timed out. 401 * @return The number of updates sent to the server in assured safe data mode 402 * that timed out. 403 */ 404 public AtomicInteger getAssuredSdSentUpdatesTimeout() 405 { 406 return assuredSdSentUpdatesTimeout; 407 } 408 409 /** 410 * Get the number of updates received from the server in assured safe read 411 * mode. 412 * @return The number of updates received from the server in assured safe read 413 * mode 414 */ 415 public int getAssuredSrReceivedUpdates() 416 { 417 return assuredSrReceivedUpdates; 418 } 419 420 /** 421 * Get the number of updates received from the server in assured safe read 422 * mode that timed out. 423 * @return The number of updates received from the server in assured safe read 424 * mode that timed out. 425 */ 426 public AtomicInteger getAssuredSrReceivedUpdatesTimeout() 427 { 428 return assuredSrReceivedUpdatesTimeout; 429 } 430 431 /** 432 * Get the number of updates sent to the server in assured safe read mode. 433 * @return The number of updates sent to the server in assured safe read mode 434 */ 435 public int getAssuredSrSentUpdates() 436 { 437 return assuredSrSentUpdates; 438 } 439 440 /** 441 * Get the number of updates sent to the server in assured safe read mode that 442 * timed out. 443 * @return The number of updates sent to the server in assured safe read mode 444 * that timed out. 445 */ 446 public AtomicInteger getAssuredSrSentUpdatesTimeout() 447 { 448 return assuredSrSentUpdatesTimeout; 449 } 450 451 /** 452 * Returns the Replication Server Domain to which belongs this server handler. 453 * 454 * @return The replication server domain. 455 */ 456 public ReplicationServerDomain getDomain() 457 { 458 return replicationServerDomain; 459 } 460 461 /** 462 * Returns the value of generationId for that handler. 463 * @return The value of the generationId. 464 */ 465 public long getGenerationId() 466 { 467 return generationId; 468 } 469 470 /** 471 * Gets the group id of the server represented by this object. 472 * @return The group id of the server represented by this object. 473 */ 474 public byte getGroupId() 475 { 476 return groupId; 477 } 478 479 /** 480 * Get our heartbeat interval. 481 * @return Our heartbeat interval. 482 */ 483 public long getHeartbeatInterval() 484 { 485 return heartbeatInterval; 486 } 487 488 @Override 489 public MonitorData getMonitorData() 490 { 491 // Get the generic ones 492 MonitorData attributes = super.getMonitorData(); 493 494 attributes.add("server-id", serverId); 495 attributes.add("domain-name", getBaseDN()); 496 497 // Deprecated 498 attributes.add("max-waiting-changes", maxQueueSize); 499 attributes.add("sent-updates", getOutCount()); 500 attributes.add("received-updates", getInCount()); 501 502 // Assured counters 503 attributes.add("assured-sr-received-updates", getAssuredSrReceivedUpdates()); 504 attributes.add("assured-sr-received-updates-timeout", getAssuredSrReceivedUpdatesTimeout()); 505 attributes.add("assured-sr-sent-updates", getAssuredSrSentUpdates()); 506 attributes.add("assured-sr-sent-updates-timeout", getAssuredSrSentUpdatesTimeout()); 507 attributes.add("assured-sd-received-updates", getAssuredSdReceivedUpdates()); 508 if (!isDataServer()) 509 { 510 attributes.add("assured-sd-sent-updates", getAssuredSdSentUpdates()); 511 attributes.add("assured-sd-sent-updates-timeout", getAssuredSdSentUpdatesTimeout()); 512 } else 513 { 514 attributes.add("assured-sd-received-updates-timeout", getAssuredSdReceivedUpdatesTimeout()); 515 } 516 517 // Window stats 518 attributes.add("max-send-window", sendWindowSize); 519 attributes.add("current-send-window", sendWindow.availablePermits()); 520 attributes.add("max-rcv-window", maxRcvWindow); 521 attributes.add("current-rcv-window", rcvWindow); 522 523 // Encryption 524 attributes.add("ssl-encryption", session.isEncrypted()); 525 526 // Data generation 527 attributes.add("generation-id", generationId); 528 529 return attributes; 530 } 531 532 /** 533 * Retrieves the name of this monitor provider. It should be unique among all 534 * monitor providers, including all instances of the same monitor provider. 535 * 536 * @return The name of this monitor provider. 537 */ 538 @Override 539 public abstract String getMonitorInstanceName(); 540 541 /** 542 * Gets the protocol version used with this remote server. 543 * @return The protocol version used with this remote server. 544 */ 545 public short getProtocolVersion() 546 { 547 return session.getProtocolVersion(); 548 } 549 550 /** 551 * Get the Server Id. 552 * 553 * @return the ID of the server to which this object is linked 554 */ 555 public int getServerId() 556 { 557 return serverId; 558 } 559 560 /** 561 * Retrieves the URL for this server handler. 562 * 563 * @return The URL for this server handler, in the form of an address and 564 * port separated by a colon. 565 */ 566 public String getServerURL() 567 { 568 return serverURL; 569 } 570 571 /** 572 * Return the ServerStatus. 573 * @return The server status. 574 */ 575 protected abstract ServerStatus getStatus(); 576 577 /** 578 * Increment the number of updates received from the server in assured safe 579 * data mode. 580 */ 581 public void incrementAssuredSdReceivedUpdates() 582 { 583 assuredSdReceivedUpdates++; 584 } 585 586 /** 587 * Increment the number of updates received from the server in assured safe 588 * data mode that timed out. 589 */ 590 public void incrementAssuredSdReceivedUpdatesTimeout() 591 { 592 assuredSdReceivedUpdatesTimeout.incrementAndGet(); 593 } 594 595 /** 596 * Increment the number of updates sent to the server in assured safe data 597 * mode that timed out. 598 */ 599 public void incrementAssuredSdSentUpdatesTimeout() 600 { 601 assuredSdSentUpdatesTimeout.incrementAndGet(); 602 } 603 604 /** 605 * Increment the number of updates received from the server in assured safe 606 * read mode. 607 */ 608 public void incrementAssuredSrReceivedUpdates() 609 { 610 assuredSrReceivedUpdates++; 611 } 612 613 /** 614 * Increment the number of updates received from the server in assured safe 615 * read mode that timed out. 616 */ 617 public void incrementAssuredSrReceivedUpdatesTimeout() 618 { 619 assuredSrReceivedUpdatesTimeout.incrementAndGet(); 620 } 621 622 /** 623 * Increment the number of updates sent to the server in assured safe read 624 * mode that timed out. 625 */ 626 public void incrementAssuredSrSentUpdatesTimeout() 627 { 628 assuredSrSentUpdatesTimeout.incrementAndGet(); 629 } 630 631 /** {@inheritDoc} */ 632 @Override 633 public void initializeMonitorProvider(MonitorProviderCfg configuration) 634 throws ConfigException, InitializationException 635 { 636 // Nothing to do for now 637 } 638 639 /** 640 * Check if the server associated to this ServerHandler is a data server 641 * in the topology. 642 * @return true if the server is a data server. 643 */ 644 public abstract boolean isDataServer(); 645 646 /** 647 * Check if the server associated to this ServerHandler is a replication 648 * server. 649 * @return true if the server is a replication server. 650 */ 651 public boolean isReplicationServer() 652 { 653 return !isDataServer(); 654 } 655 656 // The handshake phase must be done by blocking any access to structures 657 // keeping info on connected servers, so that one can safely check for 658 // pre-existence of a server, send a coherent snapshot of known topology to 659 // peers, update the local view of the topology... 660 // 661 // For instance a kind of problem could be that while we connect with a 662 // peer RS, a DS is connecting at the same time and we could publish the 663 // connected DSs to the peer RS forgetting this last DS in the TopologyMsg. 664 // 665 // This method and every others that need to read/make changes to the 666 // structures holding topology for the domain should: 667 // - call ReplicationServerDomain.lock() 668 // - read/modify structures 669 // - call ReplicationServerDomain.release() 670 // 671 // More information is provided in comment of ReplicationServerDomain.lock() 672 673 /** 674 * Lock the domain without a timeout. 675 * <p> 676 * If domain already exists, lock it until handshake is finished otherwise it 677 * will be created and locked later in the method 678 * 679 * @throws DirectoryException 680 * When an exception occurs. 681 * @throws InterruptedException 682 * If the current thread was interrupted while waiting for the lock. 683 */ 684 public void lockDomainNoTimeout() throws DirectoryException, 685 InterruptedException 686 { 687 if (!replicationServerDomain.hasLock()) 688 { 689 replicationServerDomain.lock(); 690 } 691 } 692 693 /** 694 * Lock the domain with a timeout. 695 * <p> 696 * Take the lock on the domain. WARNING: Here we try to acquire the lock with 697 * a timeout. This is for preventing a deadlock that may happen if there are 698 * cross connection attempts (for same domain) from this replication server 699 * and from a peer one. 700 * <p> 701 * Here is the scenario: 702 * <ol> 703 * <li>RS1 connect thread takes the domain lock and starts connection to RS2 704 * </li> 705 * <li>at the same time RS2 connect thread takes his domain lock and start 706 * connection to RS2</li> 707 * <li>RS2 listen thread starts processing received ReplServerStartMsg from 708 * RS1 and wants to acquire the lock on the domain (here) but cannot as RS2 709 * connect thread already has it</li> 710 * <li>RS1 listen thread starts processing received ReplServerStartMsg from 711 * RS2 and wants to acquire the lock on the domain (here) but cannot as RS1 712 * connect thread already has it</li> 713 * </ol> 714 * => Deadlock: 4 threads are locked. 715 * <p> 716 * To prevent threads locking in such situation, the listen threads here will 717 * both timeout trying to acquire the lock. The random time for the timeout 718 * should allow on connection attempt to be aborted whereas the other one 719 * should have time to finish in the same time. 720 * <p> 721 * Warning: the minimum time (3s) should be big enough to allow normal 722 * situation connections to terminate. The added random time should represent 723 * a big enough range so that the chance to have one listen thread timing out 724 * a lot before the peer one is great. When the first listen thread times out, 725 * the remote connect thread should release the lock and allow the peer listen 726 * thread to take the lock it was waiting for and process the connection 727 * attempt. 728 * 729 * @throws DirectoryException 730 * When an exception occurs. 731 * @throws InterruptedException 732 * If the current thread was interrupted while waiting for the lock. 733 */ 734 public void lockDomainWithTimeout() throws DirectoryException, 735 InterruptedException 736 { 737 final Random random = new Random(); 738 final int randomTime = random.nextInt(6); // Random from 0 to 5 739 // Wait at least 3 seconds + (0 to 5 seconds) 740 final long timeout = 3000 + randomTime * 1000; 741 final boolean lockAcquired = replicationServerDomain.tryLock(timeout); 742 if (!lockAcquired) 743 { 744 LocalizableMessage message = WARN_TIMEOUT_WHEN_CROSS_CONNECTION.get( 745 getBaseDN(), serverId, session.getReadableRemoteAddress(), getReplicationServerId()); 746 throw new DirectoryException(ResultCode.OTHER, message); 747 } 748 } 749 750 /** 751 * Processes a routable message. 752 * 753 * @param msg The message to be processed. 754 */ 755 void process(RoutableMsg msg) 756 { 757 if (logger.isTraceEnabled()) 758 { 759 logger.trace("In " 760 + replicationServerDomain.getLocalRSMonitorInstanceName() + " " 761 + this + " processes routable msg received:" + msg); 762 } 763 replicationServerDomain.process(msg, this); 764 } 765 766 /** 767 * Responds to a monitor request message. 768 * 769 * @param msg 770 * The monitor request message. 771 */ 772 void processMonitorRequestMsg(MonitorRequestMsg msg) 773 { 774 replicationServerDomain.processMonitorRequestMsg(msg, this); 775 } 776 777 /** 778 * Responds to a monitor message. 779 * 780 * @param msg 781 * The monitor message. 782 */ 783 void processMonitorMsg(MonitorMsg msg) 784 { 785 replicationServerDomain.processMonitorMsg(msg, this); 786 } 787 788 /** 789 * Processes a change time heartbeat msg. 790 * 791 * @param msg 792 * The message to be processed. 793 * @throws DirectoryException 794 * When an exception is raised. 795 */ 796 void process(ChangeTimeHeartbeatMsg msg) throws DirectoryException 797 { 798 if (logger.isTraceEnabled()) 799 { 800 logger.trace("In " 801 + replicationServerDomain.getLocalRSMonitorInstanceName() + " " 802 + this + " processes received msg:\n" + msg); 803 } 804 replicationServerDomain.processChangeTimeHeartbeatMsg(this, msg); 805 } 806 807 /** 808 * Process the reception of a WindowProbeMsg message. 809 * 810 * @throws IOException 811 * When the session becomes unavailable. 812 */ 813 public void replyToWindowProbe() throws IOException 814 { 815 if (rcvWindow > 0) 816 { 817 // The LDAP server believes that its window is closed while it is not, 818 // this means that some problem happened in the window exchange procedure! 819 // lets update the LDAP server with out current window size and hope 820 // that everything will work better in the future. 821 // TODO also log an error message. 822 session.publish(new WindowMsg(rcvWindow)); 823 } 824 else 825 { 826 // Both the LDAP server and the replication server believes that the 827 // window is closed. Lets check the flowcontrol in case we 828 // can now resume operations and send a windowMessage if necessary. 829 checkWindow(); 830 } 831 } 832 833 /** 834 * Sends the provided TopologyMsg to the peer server. 835 * 836 * @param topoMsg 837 * The TopologyMsg message to be sent. 838 * @throws IOException 839 * When it occurs while sending the message, 840 */ 841 public void sendTopoInfo(TopologyMsg topoMsg) throws IOException 842 { 843 // V1 Rs do not support the TopologyMsg 844 if (getProtocolVersion() > ProtocolVersion.REPLICATION_PROTOCOL_V1) 845 { 846 send(topoMsg); 847 } 848 } 849 850 /** 851 * Set a new generation ID. 852 * 853 * @param generationId The new generation ID 854 * 855 */ 856 public void setGenerationId(long generationId) 857 { 858 this.generationId = generationId; 859 } 860 861 /** 862 * Sets the window size when used when sending to the remote. 863 * @param size The provided window size. 864 */ 865 protected void setSendWindowSize(int size) 866 { 867 this.sendWindowSize = size; 868 } 869 870 /** 871 * Shutdown This ServerHandler. 872 */ 873 @Override 874 public void shutdown() 875 { 876 shutdownWriter = true; 877 setConsumerActive(false); 878 super.shutdown(); 879 880 if (session != null) 881 { 882 session.close(); 883 } 884 if (heartbeatThread != null) 885 { 886 heartbeatThread.shutdown(); 887 } 888 889 DirectoryServer.deregisterMonitorProvider(this); 890 891 /* 892 * Be sure to wait for ServerWriter and ServerReader death 893 * It does not matter if we try to stop a thread which is us (reader 894 * or writer), but we must not wait for our own thread death. 895 */ 896 try 897 { 898 if (writer != null && !Thread.currentThread().equals(writer)) 899 { 900 writer.join(SHUTDOWN_JOIN_TIMEOUT); 901 } 902 if (reader != null && !Thread.currentThread().equals(reader)) 903 { 904 reader.join(SHUTDOWN_JOIN_TIMEOUT); 905 } 906 } catch (InterruptedException e) 907 { 908 // don't try anymore to join and return. 909 } 910 if (logger.isTraceEnabled()) 911 { 912 logger.trace("SH.shutdowned(" + this + ")"); 913 } 914 } 915 916 /** 917 * Select the next update that must be sent to the server managed by this 918 * ServerHandler. 919 * 920 * @return the next update that must be sent to the server managed by this 921 * ServerHandler. 922 * @throws ChangelogException 923 * If a problem occurs when reading the changelog 924 */ 925 public UpdateMsg take() throws ChangelogException 926 { 927 final UpdateMsg msg = getNextMessage(); 928 929 acquirePermitInSendWindow(); 930 931 if (msg != null) 932 { 933 incrementOutCount(); 934 if (msg.isAssured()) 935 { 936 incrementAssuredStats(msg); 937 } 938 return msg; 939 } 940 return null; 941 } 942 943 private void acquirePermitInSendWindow() 944 { 945 boolean acquired = false; 946 boolean interrupted = true; 947 do 948 { 949 try 950 { 951 acquired = sendWindow.tryAcquire(500, TimeUnit.MILLISECONDS); 952 interrupted = false; 953 } catch (InterruptedException e) 954 { 955 // loop until not interrupted 956 } 957 } while ((interrupted || !acquired) && !shutdownWriter); 958 } 959 960 private void incrementAssuredStats(final UpdateMsg msg) 961 { 962 if (msg.getAssuredMode() == AssuredMode.SAFE_READ_MODE) 963 { 964 assuredSrSentUpdates++; 965 } 966 else if (!isDataServer()) 967 { 968 assuredSdSentUpdates++; 969 } 970 } 971 972 /** 973 * Creates a RSInfo structure representing this remote RS. 974 * @return The RSInfo structure representing this remote RS 975 */ 976 public RSInfo toRSInfo() 977 { 978 return new RSInfo(serverId, serverURL, generationId, groupId, weight); 979 } 980 981 /** 982 * Update the send window size based on the credit specified in the 983 * given window message. 984 * 985 * @param windowMsg The Window LocalizableMessage containing the information 986 * necessary for updating the window size. 987 */ 988 public void updateWindow(WindowMsg windowMsg) 989 { 990 sendWindow.release(windowMsg.getNumAck()); 991 } 992 993 /** 994 * Log the messages involved in the start handshake. 995 * @param inStartMsg The message received first. 996 * @param outStartMsg The message sent in response. 997 */ 998 protected void logStartHandshakeRCVandSND( 999 StartMsg inStartMsg, 1000 StartMsg outStartMsg) 1001 { 1002 if (logger.isTraceEnabled()) 1003 { 1004 logger.trace("In " + this.replicationServer.getMonitorInstanceName() 1005 + ", " + getClass().getSimpleName() + " " + this + ":" 1006 + "\nSH START HANDSHAKE RECEIVED:\n" + inStartMsg 1007 + "\nAND REPLIED:\n" + outStartMsg); 1008 } 1009 } 1010 1011 /** 1012 * Log the messages involved in the start handshake. 1013 * @param outStartMsg The message sent first. 1014 * @param inStartMsg The message received in response. 1015 */ 1016 protected void logStartHandshakeSNDandRCV( 1017 StartMsg outStartMsg, 1018 StartMsg inStartMsg) 1019 { 1020 if (logger.isTraceEnabled()) 1021 { 1022 logger.trace("In " + this.replicationServer.getMonitorInstanceName() 1023 + ", " + getClass().getSimpleName() + " " + this + ":" 1024 + "\nSH START HANDSHAKE SENT:\n" + outStartMsg + "\nAND RECEIVED:\n" 1025 + inStartMsg); 1026 } 1027 } 1028 1029 /** 1030 * Log the messages involved in the Topology handshake. 1031 * @param inTopoMsg The message received first. 1032 * @param outTopoMsg The message sent in response. 1033 */ 1034 protected void logTopoHandshakeRCVandSND( 1035 TopologyMsg inTopoMsg, 1036 TopologyMsg outTopoMsg) 1037 { 1038 if (logger.isTraceEnabled()) 1039 { 1040 logger.trace("In " + this.replicationServer.getMonitorInstanceName() 1041 + ", " + getClass().getSimpleName() + " " + this + ":" 1042 + "\nSH TOPO HANDSHAKE RECEIVED:\n" + inTopoMsg + "\nAND REPLIED:\n" 1043 + outTopoMsg); 1044 } 1045 } 1046 1047 /** 1048 * Log the messages involved in the Topology handshake. 1049 * @param outTopoMsg The message sent first. 1050 * @param inTopoMsg The message received in response. 1051 */ 1052 protected void logTopoHandshakeSNDandRCV( 1053 TopologyMsg outTopoMsg, 1054 TopologyMsg inTopoMsg) 1055 { 1056 if (logger.isTraceEnabled()) 1057 { 1058 logger.trace("In " + this.replicationServer.getMonitorInstanceName() 1059 + ", " + getClass().getSimpleName() + " " + this + ":" 1060 + "\nSH TOPO HANDSHAKE SENT:\n" + outTopoMsg + "\nAND RECEIVED:\n" 1061 + inTopoMsg); 1062 } 1063 } 1064 1065 /** 1066 * Log the messages involved in the Topology/StartSession handshake. 1067 * @param inStartSessionMsg The message received first. 1068 * @param outTopoMsg The message sent in response. 1069 */ 1070 protected void logStartSessionHandshake( 1071 StartSessionMsg inStartSessionMsg, 1072 TopologyMsg outTopoMsg) 1073 { 1074 if (logger.isTraceEnabled()) 1075 { 1076 logger.trace("In " + this.replicationServer.getMonitorInstanceName() 1077 + ", " + getClass().getSimpleName() + " " + this + " :" 1078 + "\nSH SESSION HANDSHAKE RECEIVED:\n" + inStartSessionMsg 1079 + "\nAND REPLIED:\n" + outTopoMsg); 1080 } 1081 } 1082 1083 /** 1084 * Log stop message has been received. 1085 */ 1086 protected void logStopReceived() 1087 { 1088 if (logger.isTraceEnabled()) 1089 { 1090 logger.trace("In " + this.replicationServer.getMonitorInstanceName() 1091 + ", " + getClass().getSimpleName() + " " + this + " :" 1092 + "\nSH SESSION HANDSHAKE RECEIVED A STOP MESSAGE"); 1093 } 1094 } 1095 1096 /** 1097 * Process a Ack message received. 1098 * @param ack the message received. 1099 */ 1100 void processAck(AckMsg ack) 1101 { 1102 replicationServerDomain.processAck(ack, this); 1103 } 1104 1105 /** 1106 * Get the reference generation id (associated with the changes in the db). 1107 * @return the reference generation id. 1108 */ 1109 public long getReferenceGenId() 1110 { 1111 return replicationServerDomain.getGenerationId(); 1112 } 1113 1114 /** 1115 * Process a ResetGenerationIdMsg message received. 1116 * @param msg the message received. 1117 */ 1118 void processResetGenId(ResetGenerationIdMsg msg) 1119 { 1120 replicationServerDomain.resetGenerationId(this, msg); 1121 } 1122 1123 /** 1124 * Put a new update message received. 1125 * @param update the update message received. 1126 * @throws IOException when it occurs. 1127 */ 1128 public void put(UpdateMsg update) throws IOException 1129 { 1130 decAndCheckWindow(); 1131 replicationServerDomain.put(update, this); 1132 } 1133 1134 /** 1135 * Stop this handler. 1136 */ 1137 public void doStop() 1138 { 1139 replicationServerDomain.stopServer(this, false); 1140 } 1141 1142 /** 1143 * Creates a ReplServerStartMsg for the current ServerHandler. 1144 * 1145 * @return a new ReplServerStartMsg for the current ServerHandler. 1146 */ 1147 protected ReplServerStartMsg createReplServerStartMsg() 1148 { 1149 return new ReplServerStartMsg(getReplicationServerId(), 1150 getReplicationServerURL(), getBaseDN(), maxRcvWindow, 1151 replicationServerDomain.getLatestServerState(), localGenerationId, 1152 sslEncryption, getLocalGroupId(), 1153 replicationServer.getDegradedStatusThreshold()); 1154 } 1155 1156 /** 1157 * Returns a "badly disconnected" error message for this server handler. 1158 * 1159 * @return a "badly disconnected" error message for this server handler 1160 */ 1161 public LocalizableMessage getBadlyDisconnectedErrorMessage() 1162 { 1163 if (isDataServer()) 1164 { 1165 return ERR_DS_BADLY_DISCONNECTED.get(getReplicationServerId(), 1166 getServerId(), session.getReadableRemoteAddress(), getBaseDN()); 1167 } 1168 return ERR_RS_BADLY_DISCONNECTED.get(getReplicationServerId(), 1169 getServerId(), session.getReadableRemoteAddress(), getBaseDN()); 1170 } 1171}