001/* 002 * The contents of this file are subject to the terms of the Common Development and 003 * Distribution License (the License). You may not use this file except in compliance with the 004 * License. 005 * 006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the 007 * specific language governing permission and limitations under the License. 008 * 009 * When distributing Covered Software, include this CDDL Header Notice in each file and include 010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL 011 * Header, with the fields enclosed by brackets [] replaced by your own identifying 012 * information: "Portions Copyright [year] [name of copyright owner]". 013 * 014 * Copyright 2006-2010 Sun Microsystems, Inc. 015 * Portions Copyright 2011-2016 ForgeRock AS. 016 */ 017package org.opends.server.replication.service; 018 019import java.io.IOException; 020import java.math.BigDecimal; 021import java.math.MathContext; 022import java.math.RoundingMode; 023import java.net.*; 024import java.util.*; 025import java.util.Map.Entry; 026import java.util.concurrent.ConcurrentSkipListMap; 027import java.util.concurrent.Semaphore; 028import java.util.concurrent.TimeUnit; 029import java.util.concurrent.atomic.AtomicBoolean; 030import java.util.concurrent.atomic.AtomicReference; 031 032import net.jcip.annotations.GuardedBy; 033import net.jcip.annotations.Immutable; 034 035import org.forgerock.i18n.LocalizableMessage; 036import org.forgerock.i18n.slf4j.LocalizedLogger; 037import org.forgerock.util.Utils; 038import org.opends.server.admin.std.server.ReplicationDomainCfg; 039import org.opends.server.core.DirectoryServer; 040import org.opends.server.replication.common.*; 041import org.opends.server.replication.plugin.MultimasterReplication; 042import org.opends.server.replication.protocol.*; 043import org.forgerock.opendj.ldap.DN; 044import org.opends.server.types.HostPort; 045 046import static org.opends.messages.ReplicationMessages.*; 047import static org.opends.server.replication.protocol.ProtocolVersion.*; 048import static org.opends.server.replication.server.ReplicationServer.*; 049import static org.opends.server.util.StaticUtils.*; 050 051/** 052 * The broker for Multi-master Replication. 053 */ 054public class ReplicationBroker 055{ 056 057 /** 058 * Immutable class containing information about whether the broker is 059 * connected to an RS and data associated to this connected RS. 060 */ 061 @Immutable 062 private static final class ConnectedRS 063 { 064 065 private static final ConnectedRS NO_CONNECTED_RS = new ConnectedRS( 066 NO_CONNECTED_SERVER); 067 068 /** The info of the RS we are connected to. */ 069 private final ReplicationServerInfo rsInfo; 070 /** Contains a connected session to the RS if any exist, null otherwise. */ 071 private final Session session; 072 private final String replicationServer; 073 074 private ConnectedRS(String replicationServer) 075 { 076 this.rsInfo = null; 077 this.session = null; 078 this.replicationServer = replicationServer; 079 } 080 081 private ConnectedRS(ReplicationServerInfo rsInfo, Session session) 082 { 083 this.rsInfo = rsInfo; 084 this.session = session; 085 this.replicationServer = session != null ? 086 session.getReadableRemoteAddress() 087 : NO_CONNECTED_SERVER; 088 } 089 090 private static ConnectedRS stopped() 091 { 092 return new ConnectedRS("stopped"); 093 } 094 095 private static ConnectedRS noConnectedRS() 096 { 097 return NO_CONNECTED_RS; 098 } 099 100 public int getServerId() 101 { 102 return rsInfo != null ? rsInfo.getServerId() : -1; 103 } 104 105 private byte getGroupId() 106 { 107 return rsInfo != null ? rsInfo.getGroupId() : -1; 108 } 109 110 private boolean isConnected() 111 { 112 return session != null; 113 } 114 115 /** {@inheritDoc} */ 116 @Override 117 public String toString() 118 { 119 final StringBuilder sb = new StringBuilder(); 120 toString(sb); 121 return sb.toString(); 122 } 123 124 public void toString(StringBuilder sb) 125 { 126 sb.append("connected=").append(isConnected()).append(", "); 127 if (!isConnected()) 128 { 129 sb.append("no connectedRS"); 130 } 131 else 132 { 133 sb.append("connectedRS(serverId=").append(rsInfo.getServerId()) 134 .append(", serverUrl=").append(rsInfo.getServerURL()) 135 .append(", groupId=").append(rsInfo.getGroupId()) 136 .append(")"); 137 } 138 } 139 140 } 141 private static final LocalizedLogger logger = LocalizedLogger.getLoggerForThisClass(); 142 private volatile boolean shutdown; 143 private final Object startStopLock = new Object(); 144 private volatile ReplicationDomainCfg config; 145 /** 146 * String reported under CSN=monitor when there is no connected RS. 147 */ 148 static final String NO_CONNECTED_SERVER = "Not connected"; 149 private final ServerState state; 150 private Semaphore sendWindow; 151 private int maxSendWindow; 152 private int rcvWindow = 100; 153 private int halfRcvWindow = rcvWindow / 2; 154 private int timeout; 155 private final ReplSessionSecurity replSessionSecurity; 156 /** 157 * The RS this DS is currently connected to. 158 * <p> 159 * Always use {@link #setConnectedRS(ConnectedRS)} to set a new 160 * connected RS. 161 */ 162 // @NotNull // for the reference 163 private final AtomicReference<ConnectedRS> connectedRS = new AtomicReference<>(ConnectedRS.noConnectedRS()); 164 /** Our replication domain. */ 165 private final ReplicationDomain domain; 166 /** 167 * This object is used as a conditional event to be notified about 168 * the reception of monitor information from the Replication Server. 169 */ 170 private final AtomicBoolean monitorResponse = new AtomicBoolean(false); 171 /** 172 * A Map containing the ServerStates of all the replicas in the topology 173 * as seen by the ReplicationServer the last time it was polled or the last 174 * time it published monitoring information. 175 */ 176 private Map<Integer, ServerState> replicaStates = new HashMap<>(); 177 /** A thread to monitor heartbeats on the session. */ 178 private HeartbeatMonitor heartbeatMonitor; 179 /** The number of times the connection was lost. */ 180 private int numLostConnections; 181 /** 182 * When the broker cannot connect to any replication server 183 * it log an error and keeps continuing every second. 184 * This boolean is set when the first failure happens and is used 185 * to avoid repeating the error message for further failure to connect 186 * and to know that it is necessary to print a new message when the broker 187 * finally succeed to connect. 188 */ 189 private volatile boolean connectionError; 190 private final Object connectPhaseLock = new Object(); 191 /** 192 * The thread that publishes messages to the RS containing the current 193 * change time of this DS. 194 */ 195 private CTHeartbeatPublisherThread ctHeartbeatPublisherThread; 196 /* 197 * Properties for the last topology info received from the network. 198 */ 199 /** Contains the last known state of the replication topology. */ 200 private final AtomicReference<Topology> topology = new AtomicReference<>(new Topology()); 201 @GuardedBy("this") 202 private volatile int updateDoneCount; 203 private volatile boolean connectRequiresRecovery; 204 205 /** 206 * This integer defines when the best replication server checking algorithm 207 * should be engaged. 208 * Every time a monitoring message (each monitoring publisher period) is 209 * received, it is incremented. When it reaches 2, we run the checking 210 * algorithm to see if we must reconnect to another best replication server. 211 * Then we reset the value to 0. But when a topology message is received, the 212 * integer is reset to 0. This ensures that we wait at least one monitoring 213 * publisher period before running the algorithm, but also that we wait at 214 * least for a monitoring period after the last received topology message 215 * (topology stabilization). 216 */ 217 private int mustRunBestServerCheckingAlgorithm; 218 219 /** 220 * The monitor provider for this replication domain. 221 * <p> 222 * The name of the monitor includes the local address and must therefore be 223 * re-registered every time the session is re-established or destroyed. The 224 * monitor provider can only be created (i.e. non-null) if there is a 225 * replication domain, which is not the case in unit tests. 226 */ 227 private final ReplicationMonitor monitor; 228 229 /** 230 * Creates a new ReplicationServer Broker for a particular ReplicationDomain. 231 * 232 * @param replicationDomain The replication domain that is creating us. 233 * @param state The ServerState that should be used by this broker 234 * when negotiating the session with the replicationServer. 235 * @param config The configuration to use. 236 * @param replSessionSecurity The session security configuration. 237 */ 238 public ReplicationBroker(ReplicationDomain replicationDomain, 239 ServerState state, ReplicationDomainCfg config, 240 ReplSessionSecurity replSessionSecurity) 241 { 242 this.domain = replicationDomain; 243 this.state = state; 244 this.config = config; 245 this.replSessionSecurity = replSessionSecurity; 246 this.rcvWindow = getMaxRcvWindow(); 247 this.halfRcvWindow = rcvWindow / 2; 248 this.shutdown = true; 249 250 /* 251 * Only create a monitor if there is a replication domain (this is not the 252 * case in some unit tests). 253 */ 254 this.monitor = replicationDomain != null ? new ReplicationMonitor( 255 replicationDomain) : null; 256 registerReplicationMonitor(); 257 } 258 259 /** 260 * Start the ReplicationBroker. 261 */ 262 public void start() 263 { 264 synchronized (startStopLock) 265 { 266 if (!shutdown) 267 { 268 return; 269 } 270 shutdown = false; 271 this.rcvWindow = getMaxRcvWindow(); 272 connectAsDataServer(); 273 } 274 } 275 276 /** 277 * Gets the group id of the RS we are connected to. 278 * @return The group id of the RS we are connected to 279 */ 280 public byte getRsGroupId() 281 { 282 return connectedRS.get().getGroupId(); 283 } 284 285 /** 286 * Gets the server id of the RS we are connected to. 287 * @return The server id of the RS we are connected to 288 */ 289 public int getRsServerId() 290 { 291 return connectedRS.get().getServerId(); 292 } 293 294 /** 295 * Gets the server id. 296 * @return The server id 297 */ 298 public int getServerId() 299 { 300 return config.getServerId(); 301 } 302 303 private DN getBaseDN() 304 { 305 return config.getBaseDN(); 306 } 307 308 private Set<String> getReplicationServerUrls() 309 { 310 return config.getReplicationServer(); 311 } 312 313 private byte getGroupId() 314 { 315 return (byte) config.getGroupId(); 316 } 317 318 /** 319 * Gets the server id. 320 * @return The server id 321 */ 322 private long getGenerationID() 323 { 324 return domain.getGenerationID(); 325 } 326 327 /** 328 * Set the generation id - for test purpose. 329 * @param generationID The generation id 330 */ 331 public void setGenerationID(long generationID) 332 { 333 domain.setGenerationID(generationID); 334 } 335 336 /** 337 * Compares 2 replication servers addresses and returns true if they both 338 * represent the same replication server instance. 339 * @param rs1Url Replication server 1 address 340 * @param rs2Url Replication server 2 address 341 * @return True if both replication server addresses represent the same 342 * replication server instance, false otherwise. 343 */ 344 private static boolean isSameReplicationServerUrl(String rs1Url, 345 String rs2Url) 346 { 347 try 348 { 349 final HostPort hp1 = HostPort.valueOf(rs1Url); 350 final HostPort hp2 = HostPort.valueOf(rs2Url); 351 return hp1.isEquivalentTo(hp2); 352 } 353 catch (RuntimeException ex) 354 { 355 // Not a RS url or not a valid port number: should not happen 356 return false; 357 } 358 } 359 360 /** 361 * Bag class for keeping info we get from a replication server in order to 362 * compute the best one to connect to. This is in fact a wrapper to a 363 * ReplServerStartMsg (V3) or a ReplServerStartDSMsg (V4). This can also be 364 * updated with a info coming from received topology messages or monitoring 365 * messages. 366 */ 367 static class ReplicationServerInfo 368 { 369 private RSInfo rsInfo; 370 private final short protocolVersion; 371 private final DN baseDN; 372 private final int windowSize; 373 // @NotNull 374 private final ServerState serverState; 375 private final boolean sslEncryption; 376 private final int degradedStatusThreshold; 377 /** Keeps the 0 value if created with a ReplServerStartMsg. */ 378 private int connectedDSNumber; 379 // @NotNull 380 private Set<Integer> connectedDSs; 381 /** 382 * Is this RS locally configured? (the RS is recognized as a usable server). 383 */ 384 private boolean locallyConfigured = true; 385 386 /** 387 * Create a new instance of ReplicationServerInfo wrapping the passed 388 * message. 389 * @param msg LocalizableMessage to wrap. 390 * @param newServerURL Override serverURL. 391 * @return The new instance wrapping the passed message. 392 * @throws IllegalArgumentException If the passed message has an unexpected 393 * type. 394 */ 395 private static ReplicationServerInfo newInstance( 396 ReplicationMsg msg, String newServerURL) throws IllegalArgumentException 397 { 398 final ReplicationServerInfo rsInfo = newInstance(msg); 399 rsInfo.setServerURL(newServerURL); 400 return rsInfo; 401 } 402 403 /** 404 * Create a new instance of ReplicationServerInfo wrapping the passed 405 * message. 406 * @param msg LocalizableMessage to wrap. 407 * @return The new instance wrapping the passed message. 408 * @throws IllegalArgumentException If the passed message has an unexpected 409 * type. 410 */ 411 static ReplicationServerInfo newInstance(ReplicationMsg msg) 412 throws IllegalArgumentException 413 { 414 if (msg instanceof ReplServerStartMsg) 415 { 416 // RS uses protocol V3 or lower 417 return new ReplicationServerInfo((ReplServerStartMsg) msg); 418 } 419 else if (msg instanceof ReplServerStartDSMsg) 420 { 421 // RS uses protocol V4 or higher 422 return new ReplicationServerInfo((ReplServerStartDSMsg) msg); 423 } 424 425 // Unsupported message type: should not happen 426 throw new IllegalArgumentException("Unexpected PDU type: " 427 + msg.getClass().getName() + ":\n" + msg); 428 } 429 430 /** 431 * Constructs a ReplicationServerInfo object wrapping a 432 * {@link ReplServerStartMsg}. 433 * 434 * @param msg 435 * The {@link ReplServerStartMsg} this object will wrap. 436 */ 437 private ReplicationServerInfo(ReplServerStartMsg msg) 438 { 439 this.protocolVersion = msg.getVersion(); 440 this.rsInfo = new RSInfo(msg.getServerId(), msg.getServerURL(), 441 msg.getGenerationId(), msg.getGroupId(), 1); 442 this.baseDN = msg.getBaseDN(); 443 this.windowSize = msg.getWindowSize(); 444 final ServerState ss = msg.getServerState(); 445 this.serverState = ss != null ? ss : new ServerState(); 446 this.sslEncryption = msg.getSSLEncryption(); 447 this.degradedStatusThreshold = msg.getDegradedStatusThreshold(); 448 } 449 450 /** 451 * Constructs a ReplicationServerInfo object wrapping a 452 * {@link ReplServerStartDSMsg}. 453 * 454 * @param msg 455 * The {@link ReplServerStartDSMsg} this object will wrap. 456 */ 457 private ReplicationServerInfo(ReplServerStartDSMsg msg) 458 { 459 this.rsInfo = new RSInfo(msg.getServerId(), msg.getServerURL(), 460 msg.getGenerationId(), msg.getGroupId(), msg.getWeight()); 461 this.protocolVersion = msg.getVersion(); 462 this.baseDN = msg.getBaseDN(); 463 this.windowSize = msg.getWindowSize(); 464 final ServerState ss = msg.getServerState(); 465 this.serverState = ss != null ? ss : new ServerState(); 466 this.sslEncryption = msg.getSSLEncryption(); 467 this.degradedStatusThreshold = msg.getDegradedStatusThreshold(); 468 this.connectedDSNumber = msg.getConnectedDSNumber(); 469 } 470 471 /** 472 * Constructs a new replication server info with the passed RSInfo internal 473 * values and the passed connected DSs. 474 * 475 * @param rsInfo 476 * The RSinfo to use for the update 477 * @param connectedDSs 478 * The new connected DSs 479 */ 480 ReplicationServerInfo(RSInfo rsInfo, Set<Integer> connectedDSs) 481 { 482 this.rsInfo = 483 new RSInfo(rsInfo.getId(), rsInfo.getServerUrl(), rsInfo 484 .getGenerationId(), rsInfo.getGroupId(), rsInfo.getWeight()); 485 this.protocolVersion = 0; 486 this.baseDN = null; 487 this.windowSize = 0; 488 this.connectedDSs = connectedDSs; 489 this.connectedDSNumber = connectedDSs.size(); 490 this.sslEncryption = false; 491 this.degradedStatusThreshold = -1; 492 this.serverState = new ServerState(); 493 } 494 495 /** 496 * Get the server state. 497 * @return The server state 498 */ 499 public ServerState getServerState() 500 { 501 return serverState; 502 } 503 504 /** 505 * Get the group id. 506 * @return The group id 507 */ 508 public byte getGroupId() 509 { 510 return rsInfo.getGroupId(); 511 } 512 513 /** 514 * Get the server protocol version. 515 * @return the protocolVersion 516 */ 517 public short getProtocolVersion() 518 { 519 return protocolVersion; 520 } 521 522 /** 523 * Get the generation id. 524 * @return the generationId 525 */ 526 public long getGenerationId() 527 { 528 return rsInfo.getGenerationId(); 529 } 530 531 /** 532 * Get the server id. 533 * @return the serverId 534 */ 535 public int getServerId() 536 { 537 return rsInfo.getId(); 538 } 539 540 /** 541 * Get the server URL. 542 * @return the serverURL 543 */ 544 public String getServerURL() 545 { 546 return rsInfo.getServerUrl(); 547 } 548 549 /** 550 * Get the base DN. 551 * 552 * @return the base DN 553 */ 554 public DN getBaseDN() 555 { 556 return baseDN; 557 } 558 559 /** 560 * Get the window size. 561 * @return the windowSize 562 */ 563 public int getWindowSize() 564 { 565 return windowSize; 566 } 567 568 /** 569 * Get the ssl encryption. 570 * @return the sslEncryption 571 */ 572 public boolean isSslEncryption() 573 { 574 return sslEncryption; 575 } 576 577 /** 578 * Get the degraded status threshold. 579 * @return the degradedStatusThreshold 580 */ 581 public int getDegradedStatusThreshold() 582 { 583 return degradedStatusThreshold; 584 } 585 586 /** 587 * Get the weight. 588 * @return the weight. Null if this object is a wrapper for 589 * a ReplServerStartMsg. 590 */ 591 public int getWeight() 592 { 593 return rsInfo.getWeight(); 594 } 595 596 /** 597 * Get the connected DS number. 598 * @return the connectedDSNumber. Null if this object is a wrapper for 599 * a ReplServerStartMsg. 600 */ 601 public int getConnectedDSNumber() 602 { 603 return connectedDSNumber; 604 } 605 606 /** 607 * Converts the object to a RSInfo object. 608 * @return The RSInfo object matching this object. 609 */ 610 RSInfo toRSInfo() 611 { 612 return rsInfo; 613 } 614 615 /** 616 * Updates replication server info with the passed RSInfo internal values 617 * and the passed connected DSs. 618 * @param rsInfo The RSinfo to use for the update 619 * @param connectedDSs The new connected DSs 620 */ 621 private void update(RSInfo rsInfo, Set<Integer> connectedDSs) 622 { 623 this.rsInfo = new RSInfo(this.rsInfo.getId(), this.rsInfo.getServerUrl(), 624 rsInfo.getGenerationId(), rsInfo.getGroupId(), rsInfo.getWeight()); 625 this.connectedDSs = connectedDSs; 626 this.connectedDSNumber = connectedDSs.size(); 627 } 628 629 private void setServerURL(String newServerURL) 630 { 631 rsInfo = new RSInfo(rsInfo.getId(), newServerURL, 632 rsInfo.getGenerationId(), rsInfo.getGroupId(), rsInfo.getWeight()); 633 } 634 635 /** 636 * Updates replication server info with the passed server state. 637 * @param serverState The ServerState to use for the update 638 */ 639 private void update(ServerState serverState) 640 { 641 this.serverState.update(serverState); 642 } 643 644 /** 645 * Get the getConnectedDSs. 646 * @return the getConnectedDSs 647 */ 648 public Set<Integer> getConnectedDSs() 649 { 650 return connectedDSs; 651 } 652 653 /** 654 * Gets the locally configured status for this RS. 655 * @return the locallyConfigured 656 */ 657 public boolean isLocallyConfigured() 658 { 659 return locallyConfigured; 660 } 661 662 /** 663 * Sets the locally configured status for this RS. 664 * @param locallyConfigured the locallyConfigured to set 665 */ 666 public void setLocallyConfigured(boolean locallyConfigured) 667 { 668 this.locallyConfigured = locallyConfigured; 669 } 670 671 /** 672 * Returns a string representation of this object. 673 * @return A string representation of this object. 674 */ 675 @Override 676 public String toString() 677 { 678 return "ReplServerInfo Url:" + getServerURL() 679 + " ServerId:" + getServerId() 680 + " GroupId:" + getGroupId() 681 + " connectedDSs:" + connectedDSs; 682 } 683 } 684 685 /** 686 * Contacts all replication servers to get information from them and being 687 * able to choose the more suitable. 688 * @return the collected information. 689 */ 690 private Map<Integer, ReplicationServerInfo> collectReplicationServersInfo() 691 { 692 final Map<Integer, ReplicationServerInfo> rsInfos = new ConcurrentSkipListMap<>(); 693 694 for (String serverUrl : getReplicationServerUrls()) 695 { 696 // Connect to server + get and store info about it 697 final ConnectedRS rs = performPhaseOneHandshake(serverUrl, false); 698 final ReplicationServerInfo rsInfo = rs.rsInfo; 699 if (rsInfo != null) 700 { 701 rsInfos.put(rsInfo.getServerId(), rsInfo); 702 } 703 } 704 705 return rsInfos; 706 } 707 708 /** 709 * Connect to a ReplicationServer. 710 * 711 * Handshake sequences between a DS and a RS is divided into 2 logical 712 * consecutive phases (phase 1 and phase 2). DS always initiates connection 713 * and always sends first message: 714 * 715 * DS<->RS: 716 * ------- 717 * 718 * phase 1: 719 * DS --- ServerStartMsg ---> RS 720 * DS <--- ReplServerStartDSMsg --- RS 721 * phase 2: 722 * DS --- StartSessionMsg ---> RS 723 * DS <--- TopologyMsg --- RS 724 * 725 * Before performing a full handshake sequence, DS searches for best suitable 726 * RS by making only phase 1 handshake to every RS he knows then closing 727 * connection. This allows to gather information on available RSs and then 728 * decide with which RS the full handshake (phase 1 then phase 2) will be 729 * finally performed. 730 * 731 * @throws NumberFormatException address was invalid 732 */ 733 private void connectAsDataServer() 734 { 735 /* 736 * If a first connect or a connection failure occur, we go through here. 737 * force status machine to NOT_CONNECTED_STATUS so that monitoring can see 738 * that we are not connected. 739 */ 740 domain.toNotConnectedStatus(); 741 742 /* 743 Stop any existing heartbeat monitor and changeTime publisher 744 from a previous session. 745 */ 746 stopRSHeartBeatMonitoring(); 747 stopChangeTimeHeartBeatPublishing(); 748 mustRunBestServerCheckingAlgorithm = 0; 749 750 synchronized (connectPhaseLock) 751 { 752 final int serverId = getServerId(); 753 final DN baseDN = getBaseDN(); 754 755 /* 756 * Connect to each replication server and get their ServerState then find 757 * out which one is the best to connect to. 758 */ 759 if (logger.isTraceEnabled()) 760 { 761 debugInfo("phase 1 : will perform PhaseOneH with each RS in order to elect the preferred one"); 762 } 763 764 // Get info from every available replication servers 765 Map<Integer, ReplicationServerInfo> rsInfos = 766 collectReplicationServersInfo(); 767 computeNewTopology(toRSInfos(rsInfos)); 768 769 if (rsInfos.isEmpty()) 770 { 771 setConnectedRS(ConnectedRS.noConnectedRS()); 772 } 773 else 774 { 775 // At least one server answered, find the best one. 776 RSEvaluations evals = computeBestReplicationServer(true, -1, state, 777 rsInfos, serverId, getGroupId(), getGenerationID()); 778 779 // Best found, now initialize connection to this one (handshake phase 1) 780 if (logger.isTraceEnabled()) 781 { 782 debugInfo("phase 2 : will perform PhaseOneH with the preferred RS=" + evals.getBestRS()); 783 } 784 785 final ConnectedRS electedRS = performPhaseOneHandshake( 786 evals.getBestRS().getServerURL(), true); 787 final ReplicationServerInfo electedRsInfo = electedRS.rsInfo; 788 if (electedRsInfo != null) 789 { 790 /* 791 Update replication server info with potentially more up to date 792 data (server state for instance may have changed) 793 */ 794 rsInfos.put(electedRsInfo.getServerId(), electedRsInfo); 795 796 // Handshake phase 1 exchange went well 797 798 // Compute in which status we are starting the session to tell the RS 799 final ServerStatus initStatus = computeInitialServerStatus( 800 electedRsInfo.getGenerationId(), electedRsInfo.getServerState(), 801 electedRsInfo.getDegradedStatusThreshold(), getGenerationID()); 802 803 // Perform session start (handshake phase 2) 804 final TopologyMsg topologyMsg = 805 performPhaseTwoHandshake(electedRS, initStatus); 806 807 if (topologyMsg != null) // Handshake phase 2 exchange went well 808 { 809 connectToReplicationServer(electedRS, initStatus, topologyMsg); 810 } // Could perform handshake phase 2 with best 811 } // Could perform handshake phase 1 with best 812 } 813 814 // connectedRS has been updated by calls above, reload it 815 final ConnectedRS rs = connectedRS.get(); 816 if (rs.isConnected()) 817 { 818 connectPhaseLock.notify(); 819 820 final long rsGenId = rs.rsInfo.getGenerationId(); 821 final int rsServerId = rs.rsInfo.getServerId(); 822 if (rsGenId == getGenerationID() || rsGenId == -1) 823 { 824 logger.info(NOTE_NOW_FOUND_SAME_GENERATION_CHANGELOG, serverId, rsServerId, baseDN, 825 rs.replicationServer, getGenerationID()); 826 } 827 else 828 { 829 logger.warn(WARN_NOW_FOUND_BAD_GENERATION_CHANGELOG, serverId, rsServerId, baseDN, 830 rs.replicationServer, getGenerationID(), rsGenId); 831 } 832 } 833 else 834 { 835 // This server could not find any replicationServer. 836 // It's going to start in degraded mode. Log a message. 837 if (!connectionError) 838 { 839 connectionError = true; 840 connectPhaseLock.notify(); 841 842 if (!rsInfos.isEmpty()) 843 { 844 logger.warn(WARN_COULD_NOT_FIND_CHANGELOG, serverId, baseDN, 845 Utils.joinAsString(", ", rsInfos.keySet())); 846 } 847 else 848 { 849 logger.warn(WARN_NO_AVAILABLE_CHANGELOGS, serverId, baseDN); 850 } 851 } 852 } 853 } 854 } 855 856 private void computeNewTopology(List<RSInfo> newRSInfos) 857 { 858 final int rsServerId = getRsServerId(); 859 860 Topology oldTopo; 861 Topology newTopo; 862 do 863 { 864 oldTopo = topology.get(); 865 newTopo = new Topology(oldTopo.replicaInfos, newRSInfos, getServerId(), 866 rsServerId, getReplicationServerUrls(), oldTopo.rsInfos); 867 } 868 while (!topology.compareAndSet(oldTopo, newTopo)); 869 870 if (logger.isTraceEnabled()) 871 { 872 debugInfo(topologyChange(rsServerId, oldTopo, newTopo)); 873 } 874 } 875 876 private StringBuilder topologyChange(int rsServerId, Topology oldTopo, 877 Topology newTopo) 878 { 879 final StringBuilder sb = new StringBuilder(); 880 sb.append("rsServerId=").append(rsServerId); 881 if (newTopo.equals(oldTopo)) 882 { 883 sb.append(", unchangedTopology=").append(newTopo); 884 } 885 else 886 { 887 sb.append(", oldTopology=").append(oldTopo); 888 sb.append(", newTopology=").append(newTopo); 889 } 890 return sb; 891 } 892 893 /** 894 * Connects to a replication server. 895 * 896 * @param rs 897 * the Replication Server to connect to 898 * @param initStatus 899 * The status to enter the state machine with 900 * @param topologyMsg 901 * the message containing the topology information 902 */ 903 private void connectToReplicationServer(ConnectedRS rs, 904 ServerStatus initStatus, TopologyMsg topologyMsg) 905 { 906 final DN baseDN = getBaseDN(); 907 final ReplicationServerInfo rsInfo = rs.rsInfo; 908 909 boolean connectCompleted = false; 910 try 911 { 912 maxSendWindow = rsInfo.getWindowSize(); 913 914 receiveTopo(topologyMsg, rs.getServerId()); 915 916 /* 917 Log a message to let the administrator know that the failure was resolved. 918 Wake up all the thread that were waiting on the window 919 on the previous connection. 920 */ 921 connectionError = false; 922 if (sendWindow != null) 923 { 924 /* 925 * Fix (hack) for OPENDJ-401: we want to ensure that no threads holding 926 * this semaphore will get blocked when they acquire it. However, we 927 * also need to make sure that we don't overflow the semaphore by 928 * releasing too many permits. 929 */ 930 final int MAX_PERMITS = Integer.MAX_VALUE >>> 2; 931 if (sendWindow.availablePermits() < MAX_PERMITS) 932 { 933 /* 934 * At least 2^29 acquisitions would need to occur for this to be 935 * insufficient. In addition, at least 2^30 releases would need to 936 * occur for this to potentially overflow. Hopefully this is unlikely 937 * to happen. 938 */ 939 sendWindow.release(MAX_PERMITS); 940 } 941 } 942 sendWindow = new Semaphore(maxSendWindow); 943 rcvWindow = getMaxRcvWindow(); 944 945 domain.sessionInitiated(initStatus, rsInfo.getServerState()); 946 947 final byte groupId = getGroupId(); 948 if (rs.getGroupId() != groupId) 949 { 950 /* 951 Connected to replication server with wrong group id: 952 warn user and start heartbeat monitor to recover when a server 953 with the right group id shows up. 954 */ 955 logger.warn(WARN_CONNECTED_TO_SERVER_WITH_WRONG_GROUP_ID, 956 groupId, rs.getServerId(), rsInfo.getServerURL(), rs.getGroupId(), baseDN, getServerId()); 957 } 958 startRSHeartBeatMonitoring(rs); 959 if (rsInfo.getProtocolVersion() >= 960 ProtocolVersion.REPLICATION_PROTOCOL_V3) 961 { 962 startChangeTimeHeartBeatPublishing(rs); 963 } 964 connectCompleted = true; 965 } 966 catch (Exception e) 967 { 968 logger.error(ERR_COMPUTING_FAKE_OPS, baseDN, rsInfo.getServerURL(), 969 e.getLocalizedMessage() + " " + stackTraceToSingleLineString(e)); 970 } 971 finally 972 { 973 if (!connectCompleted) 974 { 975 setConnectedRS(ConnectedRS.noConnectedRS()); 976 } 977 } 978 } 979 980 /** 981 * Determines the status we are starting with according to our state and the 982 * RS state. 983 * 984 * @param rsGenId The generation id of the RS 985 * @param rsState The server state of the RS 986 * @param degradedStatusThreshold The degraded status threshold of the RS 987 * @param dsGenId The local generation id 988 * @return The initial status 989 */ 990 private ServerStatus computeInitialServerStatus(long rsGenId, 991 ServerState rsState, int degradedStatusThreshold, long dsGenId) 992 { 993 if (rsGenId == -1) 994 { 995 // RS has no generation id 996 return ServerStatus.NORMAL_STATUS; 997 } 998 else if (rsGenId != dsGenId) 999 { 1000 // DS and RS do not have same generation id 1001 return ServerStatus.BAD_GEN_ID_STATUS; 1002 } 1003 else 1004 { 1005 /* 1006 DS and RS have same generation id 1007 1008 Determine if we are late or not to replay changes. RS uses a 1009 threshold value for pending changes to be replayed by a DS to 1010 determine if the DS is in normal status or in degraded status. 1011 Let's compare the local and remote server state using this threshold 1012 value to determine if we are late or not 1013 */ 1014 1015 int nChanges = ServerState.diffChanges(rsState, state); 1016 if (logger.isTraceEnabled()) 1017 { 1018 debugInfo("computed " + nChanges + " changes late."); 1019 } 1020 1021 /* 1022 Check status to know if it is relevant to change the status. Do not 1023 take RSD lock to test. If we attempt to change the status whereas 1024 we are in a status that do not allows that, this will be noticed by 1025 the changeStatusFromStatusAnalyzer method. This allows to take the 1026 lock roughly only when needed versus every sleep time timeout. 1027 */ 1028 if (degradedStatusThreshold > 0 && nChanges >= degradedStatusThreshold) 1029 { 1030 return ServerStatus.DEGRADED_STATUS; 1031 } 1032 // degradedStatusThreshold value of '0' means no degrading system used 1033 // (no threshold): force normal status 1034 return ServerStatus.NORMAL_STATUS; 1035 } 1036 } 1037 1038 1039 1040 /** 1041 * Connect to the provided server performing the first phase handshake (start 1042 * messages exchange) and return the reply message from the replication 1043 * server, wrapped in a ReplicationServerInfo object. 1044 * 1045 * @param serverURL 1046 * Server to connect to. 1047 * @param keepSession 1048 * Do we keep session opened or not after handshake. Use true if want 1049 * to perform handshake phase 2 with the same session and keep the 1050 * session to create as the current one. 1051 * @return The answer from the server . Null if could not get an answer. 1052 */ 1053 private ConnectedRS performPhaseOneHandshake(String serverURL, boolean keepSession) 1054 { 1055 Session newSession = null; 1056 Socket socket = null; 1057 boolean hasConnected = false; 1058 LocalizableMessage errorMessage = null; 1059 1060 try 1061 { 1062 // Open a socket connection to the next candidate. 1063 socket = new Socket(); 1064 socket.setReceiveBufferSize(1000000); 1065 socket.setTcpNoDelay(true); 1066 if (config.getSourceAddress() != null) 1067 { 1068 InetSocketAddress local = new InetSocketAddress(config.getSourceAddress(), 0); 1069 socket.bind(local); 1070 } 1071 int timeoutMS = MultimasterReplication.getConnectionTimeoutMS(); 1072 socket.connect(HostPort.valueOf(serverURL).toInetSocketAddress(), timeoutMS); 1073 newSession = replSessionSecurity.createClientSession(socket, timeoutMS); 1074 boolean isSslEncryption = replSessionSecurity.isSslEncryption(); 1075 1076 // Send our ServerStartMsg. 1077 final HostPort hp = new HostPort( 1078 socket.getLocalAddress().getHostName(), socket.getLocalPort()); 1079 final String url = hp.toString(); 1080 final StartMsg serverStartMsg = new ServerStartMsg(getServerId(), url, getBaseDN(), 1081 getMaxRcvWindow(), config.getHeartbeatInterval(), state, 1082 getGenerationID(), isSslEncryption, getGroupId()); 1083 newSession.publish(serverStartMsg); 1084 1085 // Read the ReplServerStartMsg or ReplServerStartDSMsg that should 1086 // come back. 1087 ReplicationMsg msg = newSession.receive(); 1088 if (logger.isTraceEnabled()) 1089 { 1090 debugInfo("RB HANDSHAKE SENT:\n" + serverStartMsg + "\nAND RECEIVED:\n" 1091 + msg); 1092 } 1093 1094 // Wrap received message in a server info object 1095 final ReplicationServerInfo replServerInfo = 1096 ReplicationServerInfo.newInstance(msg, serverURL); 1097 1098 // Sanity check 1099 final DN repDN = replServerInfo.getBaseDN(); 1100 if (!getBaseDN().equals(repDN)) 1101 { 1102 errorMessage = ERR_DS_DN_DOES_NOT_MATCH.get(repDN, getBaseDN()); 1103 return setConnectedRS(ConnectedRS.noConnectedRS()); 1104 } 1105 1106 /* 1107 * We have sent our own protocol version to the replication server. The 1108 * replication server will use the same one (or an older one if it is an 1109 * old replication server). 1110 */ 1111 newSession.setProtocolVersion( 1112 getCompatibleVersion(replServerInfo.getProtocolVersion())); 1113 1114 if (!isSslEncryption) 1115 { 1116 newSession.stopEncryption(); 1117 } 1118 1119 hasConnected = true; 1120 1121 if (keepSession) 1122 { 1123 // cannot store it yet, 1124 // only store after a successful phase two handshake 1125 return new ConnectedRS(replServerInfo, newSession); 1126 } 1127 return new ConnectedRS(replServerInfo, null); 1128 } 1129 catch (ConnectException e) 1130 { 1131 logger.traceException(e); 1132 errorMessage = WARN_NO_CHANGELOG_SERVER_LISTENING.get(getServerId(), serverURL, getBaseDN()); 1133 } 1134 catch (SocketTimeoutException e) 1135 { 1136 logger.traceException(e); 1137 errorMessage = WARN_TIMEOUT_CONNECTING_TO_RS.get(getServerId(), serverURL, getBaseDN()); 1138 } 1139 catch (Exception e) 1140 { 1141 logger.traceException(e); 1142 errorMessage = WARN_EXCEPTION_STARTING_SESSION_PHASE.get( 1143 getServerId(), serverURL, getBaseDN(), stackTraceToSingleLineString(e)); 1144 } 1145 finally 1146 { 1147 if (!hasConnected || !keepSession) 1148 { 1149 close(newSession); 1150 close(socket); 1151 } 1152 1153 if (!hasConnected && errorMessage != null && !connectionError) 1154 { 1155 // There was no server waiting on this host:port 1156 // Log a notice and will try the next replicationServer in the list 1157 if (keepSession) // Log error message only for final connection 1158 { 1159 // log the error message only once to avoid overflowing the error log 1160 logger.error(errorMessage); 1161 } 1162 1163 logger.trace(errorMessage); 1164 } 1165 } 1166 return setConnectedRS(ConnectedRS.noConnectedRS()); 1167 } 1168 1169 /** 1170 * Performs the second phase handshake (send StartSessionMsg and receive 1171 * TopologyMsg messages exchange) and return the reply message from the 1172 * replication server. 1173 * 1174 * @param electedRS Server we are connecting with. 1175 * @param initStatus The status we are starting with 1176 * @return The ReplServerStartMsg the server replied. Null if could not 1177 * get an answer. 1178 */ 1179 private TopologyMsg performPhaseTwoHandshake(ConnectedRS electedRS, 1180 ServerStatus initStatus) 1181 { 1182 try 1183 { 1184 // Send our StartSessionMsg. 1185 final StartSessionMsg startSessionMsg; 1186 startSessionMsg = new StartSessionMsg( 1187 initStatus, 1188 domain.getRefUrls(), 1189 domain.isAssured(), 1190 domain.getAssuredMode(), 1191 domain.getAssuredSdLevel()); 1192 startSessionMsg.setEclIncludes( 1193 domain.getEclIncludes(domain.getServerId()), 1194 domain.getEclIncludesForDeletes(domain.getServerId())); 1195 final Session session = electedRS.session; 1196 session.publish(startSessionMsg); 1197 1198 // Read the TopologyMsg that should come back. 1199 final TopologyMsg topologyMsg = (TopologyMsg) session.receive(); 1200 1201 if (logger.isTraceEnabled()) 1202 { 1203 debugInfo("RB HANDSHAKE SENT:\n" + startSessionMsg 1204 + "\nAND RECEIVED:\n" + topologyMsg); 1205 } 1206 1207 // Alright set the timeout to the desired value 1208 session.setSoTimeout(timeout); 1209 setConnectedRS(electedRS); 1210 return topologyMsg; 1211 } 1212 catch (Exception e) 1213 { 1214 logger.error(WARN_EXCEPTION_STARTING_SESSION_PHASE, 1215 getServerId(), electedRS.rsInfo.getServerURL(), getBaseDN(), stackTraceToSingleLineString(e)); 1216 1217 setConnectedRS(ConnectedRS.noConnectedRS()); 1218 return null; 1219 } 1220 } 1221 1222 /** 1223 * Class holding evaluation results for electing the best replication server 1224 * for the local directory server. 1225 */ 1226 static class RSEvaluations 1227 { 1228 private final int localServerId; 1229 private Map<Integer, ReplicationServerInfo> bestRSs; 1230 private final Map<Integer, LocalizableMessage> rsEvals = new HashMap<>(); 1231 1232 /** 1233 * Ctor. 1234 * 1235 * @param localServerId 1236 * the serverId for the local directory server 1237 * @param rsInfos 1238 * a Map of serverId => {@link ReplicationServerInfo} with all the 1239 * candidate replication servers 1240 */ 1241 RSEvaluations(int localServerId, 1242 Map<Integer, ReplicationServerInfo> rsInfos) 1243 { 1244 this.localServerId = localServerId; 1245 this.bestRSs = rsInfos; 1246 } 1247 1248 private boolean keepBest(LocalEvaluation eval) 1249 { 1250 if (eval.hasAcceptedAny()) 1251 { 1252 bestRSs = eval.getAccepted(); 1253 rsEvals.putAll(eval.getRejected()); 1254 return true; 1255 } 1256 return false; 1257 } 1258 1259 /** 1260 * Sets the elected best replication server, rejecting all the other 1261 * replication servers with the supplied evaluation. 1262 * 1263 * @param bestRsId 1264 * the serverId of the elected replication server 1265 * @param rejectedRSsEval 1266 * the evaluation for all the rejected replication servers 1267 */ 1268 private void setBestRS(int bestRsId, LocalizableMessage rejectedRSsEval) 1269 { 1270 for (Iterator<Entry<Integer, ReplicationServerInfo>> it = 1271 this.bestRSs.entrySet().iterator(); it.hasNext();) 1272 { 1273 final Entry<Integer, ReplicationServerInfo> entry = it.next(); 1274 final Integer rsId = entry.getKey(); 1275 final ReplicationServerInfo rsInfo = entry.getValue(); 1276 if (rsInfo.getServerId() != bestRsId) 1277 { 1278 it.remove(); 1279 } 1280 rsEvals.put(rsId, rejectedRSsEval); 1281 } 1282 } 1283 1284 private void discardAll(LocalizableMessage eval) 1285 { 1286 for (Integer rsId : bestRSs.keySet()) 1287 { 1288 rsEvals.put(rsId, eval); 1289 } 1290 } 1291 1292 private boolean foundBestRS() 1293 { 1294 return bestRSs.size() == 1; 1295 } 1296 1297 /** 1298 * Returns the {@link ReplicationServerInfo} for the best replication 1299 * server. 1300 * 1301 * @return the {@link ReplicationServerInfo} for the best replication server 1302 */ 1303 ReplicationServerInfo getBestRS() 1304 { 1305 if (foundBestRS()) 1306 { 1307 return bestRSs.values().iterator().next(); 1308 } 1309 return null; 1310 } 1311 1312 /** 1313 * Returns the evaluations for all the candidate replication servers. 1314 * 1315 * @return a Map of serverId => LocalizableMessage containing the evaluation for each 1316 * candidate replication servers. 1317 */ 1318 Map<Integer, LocalizableMessage> getEvaluations() 1319 { 1320 if (foundBestRS()) 1321 { 1322 final Integer bestRSServerId = getBestRS().getServerId(); 1323 if (rsEvals.get(bestRSServerId) == null) 1324 { 1325 final LocalizableMessage eval = NOTE_BEST_RS.get(bestRSServerId, localServerId); 1326 rsEvals.put(bestRSServerId, eval); 1327 } 1328 } 1329 return Collections.unmodifiableMap(rsEvals); 1330 } 1331 1332 /** 1333 * Returns the evaluation for the supplied replication server Id. 1334 * <p> 1335 * Note: "unknown RS" message is returned if the supplied replication server 1336 * was not part of the candidate replication servers. 1337 * 1338 * @param rsServerId 1339 * the supplied replication server Id 1340 * @return the evaluation {@link LocalizableMessage} for the supplied replication 1341 * server Id 1342 */ 1343 private LocalizableMessage getEvaluation(int rsServerId) 1344 { 1345 final LocalizableMessage evaluation = getEvaluations().get(rsServerId); 1346 if (evaluation != null) 1347 { 1348 return evaluation; 1349 } 1350 return NOTE_UNKNOWN_RS.get(rsServerId, localServerId); 1351 } 1352 1353 /** {@inheritDoc} */ 1354 @Override 1355 public String toString() 1356 { 1357 return "Current best replication server Ids: " + bestRSs.keySet() 1358 + ", Evaluation of connected replication servers" 1359 + " (ServerId => Evaluation): " + rsEvals.keySet() 1360 + ", Any replication server not appearing here" 1361 + " could not be contacted."; 1362 } 1363 } 1364 1365 /** 1366 * Evaluation local to one filter. 1367 */ 1368 private static class LocalEvaluation 1369 { 1370 private final Map<Integer, ReplicationServerInfo> accepted = new HashMap<>(); 1371 private final Map<ReplicationServerInfo, LocalizableMessage> rsEvals = new HashMap<>(); 1372 1373 private void accept(Integer rsId, ReplicationServerInfo rsInfo) 1374 { 1375 // forget previous eval, including undoing reject 1376 this.rsEvals.remove(rsInfo); 1377 this.accepted.put(rsId, rsInfo); 1378 } 1379 1380 private void reject(ReplicationServerInfo rsInfo, LocalizableMessage reason) 1381 { 1382 this.accepted.remove(rsInfo.getServerId()); // undo accept 1383 this.rsEvals.put(rsInfo, reason); 1384 } 1385 1386 private Map<Integer, ReplicationServerInfo> getAccepted() 1387 { 1388 return accepted; 1389 } 1390 1391 private ReplicationServerInfo[] getAcceptedRSInfos() 1392 { 1393 return accepted.values().toArray( 1394 new ReplicationServerInfo[accepted.size()]); 1395 } 1396 1397 public Map<Integer, LocalizableMessage> getRejected() 1398 { 1399 final Map<Integer, LocalizableMessage> result = new HashMap<>(); 1400 for (Entry<ReplicationServerInfo, LocalizableMessage> entry : rsEvals.entrySet()) 1401 { 1402 result.put(entry.getKey().getServerId(), entry.getValue()); 1403 } 1404 return result; 1405 } 1406 1407 private boolean hasAcceptedAny() 1408 { 1409 return !accepted.isEmpty(); 1410 } 1411 1412 } 1413 1414 /** 1415 * Returns the replication server that best fits our need so that we can 1416 * connect to it or determine if we must disconnect from current one to 1417 * re-connect to best server. 1418 * <p> 1419 * Note: this method is static for test purpose (access from unit tests) 1420 * 1421 * @param firstConnection True if we run this method for the very first 1422 * connection of the broker. False if we run this method to determine if the 1423 * replication server we are currently connected to is still the best or not. 1424 * @param rsServerId The id of the replication server we are currently 1425 * connected to. Only used when firstConnection is false. 1426 * @param myState The local server state. 1427 * @param rsInfos The list of available replication servers and their 1428 * associated information (choice will be made among them). 1429 * @param localServerId The server id for the suffix we are working for. 1430 * @param groupId The groupId we prefer being connected to if possible 1431 * @param generationId The generation id we are using 1432 * @return The computed best replication server. If the returned value is 1433 * null, the best replication server is undetermined but the local server must 1434 * disconnect (so the best replication server is another one than the current 1435 * one). Null can only be returned when firstConnection is false. 1436 */ 1437 static RSEvaluations computeBestReplicationServer( 1438 boolean firstConnection, int rsServerId, ServerState myState, 1439 Map<Integer, ReplicationServerInfo> rsInfos, int localServerId, 1440 byte groupId, long generationId) 1441 { 1442 final RSEvaluations evals = new RSEvaluations(localServerId, rsInfos); 1443 // Shortcut, if only one server, this is the best 1444 if (evals.foundBestRS()) 1445 { 1446 return evals; 1447 } 1448 1449 /** 1450 * Apply some filtering criteria to determine the best servers list from 1451 * the available ones. The ordered list of criteria is (from more important 1452 * to less important): 1453 * - replication server has the same group id as the local DS one 1454 * - replication server has the same generation id as the local DS one 1455 * - replication server is up to date regarding changes generated by the 1456 * local DS 1457 * - replication server in the same VM as local DS one 1458 */ 1459 /* 1460 The list of best replication servers is filtered with each criteria. At 1461 each criteria, the list is replaced with the filtered one if there 1462 are some servers from the filtering, otherwise, the list is left as is 1463 and the new filtering for the next criteria is applied and so on. 1464 1465 Use only servers locally configured: those are servers declared in 1466 the local configuration. When the current method is called, for 1467 sure, at least one server from the list is locally configured 1468 */ 1469 filterServersLocallyConfigured(evals, localServerId); 1470 // Some servers with same group id ? 1471 filterServersWithSameGroupId(evals, localServerId, groupId); 1472 // Some servers with same generation id ? 1473 final boolean rssWithSameGenerationIdExist = 1474 filterServersWithSameGenerationId(evals, localServerId, generationId); 1475 if (rssWithSameGenerationIdExist) 1476 { 1477 // If some servers with the right generation id this is useful to 1478 // run the local DS change criteria 1479 filterServersWithAllLocalDSChanges(evals, myState, localServerId); 1480 } 1481 // Some servers in the local VM or local host? 1482 filterServersOnSameHost(evals, localServerId); 1483 1484 if (evals.foundBestRS()) 1485 { 1486 return evals; 1487 } 1488 1489 /** 1490 * Now apply the choice based on the weight to the best servers list 1491 */ 1492 if (firstConnection) 1493 { 1494 // We are not connected to a server yet 1495 computeBestServerForWeight(evals, -1, -1); 1496 } 1497 else 1498 { 1499 /* 1500 * We are already connected to a RS: compute the best RS as far as the 1501 * weights is concerned. If this is another one, some DS must disconnect. 1502 */ 1503 computeBestServerForWeight(evals, rsServerId, localServerId); 1504 } 1505 return evals; 1506 } 1507 1508 /** 1509 * Creates a new list that contains only replication servers that are locally 1510 * configured. 1511 * @param evals The evaluation object 1512 */ 1513 private static void filterServersLocallyConfigured(RSEvaluations evals, 1514 int localServerId) 1515 { 1516 final LocalEvaluation eval = new LocalEvaluation(); 1517 for (Entry<Integer, ReplicationServerInfo> entry : evals.bestRSs.entrySet()) 1518 { 1519 final Integer rsId = entry.getKey(); 1520 final ReplicationServerInfo rsInfo = entry.getValue(); 1521 if (rsInfo.isLocallyConfigured()) 1522 { 1523 eval.accept(rsId, rsInfo); 1524 } 1525 else 1526 { 1527 eval.reject(rsInfo, 1528 NOTE_RS_NOT_LOCALLY_CONFIGURED.get(rsId, localServerId)); 1529 } 1530 } 1531 evals.keepBest(eval); 1532 } 1533 1534 /** 1535 * Creates a new list that contains only replication servers that have the 1536 * passed group id, from a passed replication server list. 1537 * @param evals The evaluation object 1538 * @param groupId The group id that must match 1539 */ 1540 private static void filterServersWithSameGroupId(RSEvaluations evals, 1541 int localServerId, byte groupId) 1542 { 1543 final LocalEvaluation eval = new LocalEvaluation(); 1544 for (Entry<Integer, ReplicationServerInfo> entry : evals.bestRSs.entrySet()) 1545 { 1546 final Integer rsId = entry.getKey(); 1547 final ReplicationServerInfo rsInfo = entry.getValue(); 1548 if (rsInfo.getGroupId() == groupId) 1549 { 1550 eval.accept(rsId, rsInfo); 1551 } 1552 else 1553 { 1554 eval.reject(rsInfo, NOTE_RS_HAS_DIFFERENT_GROUP_ID_THAN_DS.get( 1555 rsId, rsInfo.getGroupId(), localServerId, groupId)); 1556 } 1557 } 1558 evals.keepBest(eval); 1559 } 1560 1561 /** 1562 * Creates a new list that contains only replication servers that have the 1563 * provided generation id, from a provided replication server list. 1564 * When the selected replication servers have no change (empty serverState) 1565 * then the 'empty'(generationId==-1) replication servers are also included 1566 * in the result list. 1567 * 1568 * @param evals The evaluation object 1569 * @param generationId The generation id that must match 1570 * @return whether some replication server passed the filter 1571 */ 1572 private static boolean filterServersWithSameGenerationId( 1573 RSEvaluations evals, long localServerId, long generationId) 1574 { 1575 final Map<Integer, ReplicationServerInfo> bestServers = evals.bestRSs; 1576 final LocalEvaluation eval = new LocalEvaluation(); 1577 boolean emptyState = true; 1578 1579 for (Entry<Integer, ReplicationServerInfo> entry : bestServers.entrySet()) 1580 { 1581 final Integer rsId = entry.getKey(); 1582 final ReplicationServerInfo rsInfo = entry.getValue(); 1583 if (rsInfo.getGenerationId() == generationId) 1584 { 1585 eval.accept(rsId, rsInfo); 1586 if (!rsInfo.serverState.isEmpty()) 1587 { 1588 emptyState = false; 1589 } 1590 } 1591 else if (rsInfo.getGenerationId() == -1) 1592 { 1593 eval.reject(rsInfo, NOTE_RS_HAS_NO_GENERATION_ID.get(rsId, 1594 generationId, localServerId)); 1595 } 1596 else 1597 { 1598 eval.reject(rsInfo, NOTE_RS_HAS_DIFFERENT_GENERATION_ID_THAN_DS.get( 1599 rsId, rsInfo.getGenerationId(), localServerId, generationId)); 1600 } 1601 } 1602 1603 if (emptyState) 1604 { 1605 // If the RS with a generationId have all an empty state, 1606 // then the 'empty'(genId=-1) RSes are also candidate 1607 for (Entry<Integer, ReplicationServerInfo> entry : bestServers.entrySet()) 1608 { 1609 ReplicationServerInfo rsInfo = entry.getValue(); 1610 if (rsInfo.getGenerationId() == -1) 1611 { 1612 // will undo the reject of previously rejected RSs 1613 eval.accept(entry.getKey(), rsInfo); 1614 } 1615 } 1616 } 1617 1618 return evals.keepBest(eval); 1619 } 1620 1621 /** 1622 * Creates a new list that contains only replication servers that have the 1623 * latest changes from the passed DS, from a passed replication server list. 1624 * @param evals The evaluation object 1625 * @param localState The state of the local DS 1626 * @param localServerId The server id to consider for the changes 1627 */ 1628 private static void filterServersWithAllLocalDSChanges( 1629 RSEvaluations evals, ServerState localState, int localServerId) 1630 { 1631 // Extract the CSN of the latest change generated by the local server 1632 final CSN localCSN = getCSN(localState, localServerId); 1633 1634 /** 1635 * Find replication servers that are up to date (or more up to date than us, 1636 * if for instance we failed and restarted, having sent some changes to the 1637 * RS but without having time to store our own state) regarding our own 1638 * server id. If some servers are more up to date, prefer this list but take 1639 * only the latest CSN. 1640 */ 1641 final LocalEvaluation mostUpToDateEval = new LocalEvaluation(); 1642 boolean foundRSMoreUpToDateThanLocalDS = false; 1643 CSN latestRsCSN = null; 1644 for (Entry<Integer, ReplicationServerInfo> entry : evals.bestRSs.entrySet()) 1645 { 1646 final Integer rsId = entry.getKey(); 1647 final ReplicationServerInfo rsInfo = entry.getValue(); 1648 final CSN rsCSN = getCSN(rsInfo.getServerState(), localServerId); 1649 1650 // Has this replication server the latest local change ? 1651 if (rsCSN.isOlderThan(localCSN)) 1652 { 1653 mostUpToDateEval.reject(rsInfo, NOTE_RS_LATER_THAN_LOCAL_DS.get( 1654 rsId, rsCSN.toStringUI(), localServerId, localCSN.toStringUI())); 1655 } 1656 else if (rsCSN.equals(localCSN)) 1657 { 1658 // This replication server has exactly the latest change from the 1659 // local server 1660 if (!foundRSMoreUpToDateThanLocalDS) 1661 { 1662 mostUpToDateEval.accept(rsId, rsInfo); 1663 } 1664 else 1665 { 1666 mostUpToDateEval.reject(rsInfo, 1667 NOTE_RS_LATER_THAN_ANOTHER_RS_MORE_UP_TO_DATE_THAN_LOCAL_DS.get( 1668 rsId, rsCSN.toStringUI(), localServerId, localCSN.toStringUI())); 1669 } 1670 } 1671 else if (rsCSN.isNewerThan(localCSN)) 1672 { 1673 // This replication server is even more up to date than the local server 1674 if (latestRsCSN == null) 1675 { 1676 foundRSMoreUpToDateThanLocalDS = true; 1677 // all previous results are now outdated, reject them all 1678 rejectAllWithRSIsLaterThanBestRS(mostUpToDateEval, localServerId, 1679 localCSN); 1680 // Initialize the latest CSN 1681 latestRsCSN = rsCSN; 1682 } 1683 1684 if (rsCSN.equals(latestRsCSN)) 1685 { 1686 mostUpToDateEval.accept(rsId, rsInfo); 1687 } 1688 else if (rsCSN.isNewerThan(latestRsCSN)) 1689 { 1690 // This RS is even more up to date, reject all previously accepted RSs 1691 // and store this new RS 1692 rejectAllWithRSIsLaterThanBestRS(mostUpToDateEval, localServerId, 1693 localCSN); 1694 mostUpToDateEval.accept(rsId, rsInfo); 1695 latestRsCSN = rsCSN; 1696 } 1697 else 1698 { 1699 mostUpToDateEval.reject(rsInfo, 1700 NOTE_RS_LATER_THAN_ANOTHER_RS_MORE_UP_TO_DATE_THAN_LOCAL_DS.get( 1701 rsId, rsCSN.toStringUI(), localServerId, localCSN.toStringUI())); 1702 } 1703 } 1704 } 1705 evals.keepBest(mostUpToDateEval); 1706 } 1707 1708 private static CSN getCSN(ServerState state, int serverId) 1709 { 1710 final CSN csn = state.getCSN(serverId); 1711 if (csn != null) 1712 { 1713 return csn; 1714 } 1715 return new CSN(0, 0, serverId); 1716 } 1717 1718 private static void rejectAllWithRSIsLaterThanBestRS( 1719 final LocalEvaluation eval, int localServerId, CSN localCSN) 1720 { 1721 for (ReplicationServerInfo rsInfo : eval.getAcceptedRSInfos()) 1722 { 1723 final String rsCSN = 1724 getCSN(rsInfo.getServerState(), localServerId).toStringUI(); 1725 final LocalizableMessage reason = 1726 NOTE_RS_LATER_THAN_ANOTHER_RS_MORE_UP_TO_DATE_THAN_LOCAL_DS.get( 1727 rsInfo.getServerId(), rsCSN, localServerId, localCSN.toStringUI()); 1728 eval.reject(rsInfo, reason); 1729 } 1730 } 1731 1732 /** 1733 * Creates a new list that contains only replication servers that are on the 1734 * same host as the local DS, from a passed replication server list. This 1735 * method will gives priority to any replication server which is in the same 1736 * VM as this DS. 1737 * 1738 * @param evals The evaluation object 1739 */ 1740 private static void filterServersOnSameHost(RSEvaluations evals, 1741 int localServerId) 1742 { 1743 /* 1744 * Initially look for all servers on the same host. If we find one in the 1745 * same VM, then narrow the search. 1746 */ 1747 boolean foundRSInSameVM = false; 1748 final LocalEvaluation eval = new LocalEvaluation(); 1749 for (Entry<Integer, ReplicationServerInfo> entry : evals.bestRSs.entrySet()) 1750 { 1751 final Integer rsId = entry.getKey(); 1752 final ReplicationServerInfo rsInfo = entry.getValue(); 1753 final HostPort hp = HostPort.valueOf(rsInfo.getServerURL()); 1754 if (hp.isLocalAddress()) 1755 { 1756 if (isLocalReplicationServerPort(hp.getPort())) 1757 { 1758 if (!foundRSInSameVM) 1759 { 1760 // An RS in the same VM will always have priority. 1761 // Narrow the search to only include servers in this VM. 1762 rejectAllWithRSOnDifferentVMThanDS(eval, localServerId); 1763 foundRSInSameVM = true; 1764 } 1765 eval.accept(rsId, rsInfo); 1766 } 1767 else if (!foundRSInSameVM) 1768 { 1769 // OK, accept RSs on the same machine because we have not found an RS 1770 // in the same VM yet 1771 eval.accept(rsId, rsInfo); 1772 } 1773 else 1774 { 1775 // Skip: we have found some RSs in the same VM, but this RS is not. 1776 eval.reject(rsInfo, NOTE_RS_ON_DIFFERENT_VM_THAN_DS.get(rsId, 1777 localServerId)); 1778 } 1779 } 1780 else 1781 { 1782 eval.reject(rsInfo, NOTE_RS_ON_DIFFERENT_HOST_THAN_DS.get(rsId, 1783 localServerId)); 1784 } 1785 } 1786 evals.keepBest(eval); 1787 } 1788 1789 private static void rejectAllWithRSOnDifferentVMThanDS(LocalEvaluation eval, 1790 int localServerId) 1791 { 1792 for (ReplicationServerInfo rsInfo : eval.getAcceptedRSInfos()) 1793 { 1794 eval.reject(rsInfo, NOTE_RS_ON_DIFFERENT_VM_THAN_DS.get( 1795 rsInfo.getServerId(), localServerId)); 1796 } 1797 } 1798 1799 /** 1800 * Computes the best replication server the local server should be connected 1801 * to so that the load is correctly spread across the topology, following the 1802 * weights guidance. 1803 * Warning: This method is expected to be called with at least 2 servers in 1804 * bestServers 1805 * Note: this method is static for test purpose (access from unit tests) 1806 * @param evals The evaluation object 1807 * @param currentRsServerId The replication server the local server is 1808 * currently connected to. -1 if the local server is not yet connected 1809 * to any replication server. 1810 * @param localServerId The server id of the local server. This is not used 1811 * when it is not connected to a replication server 1812 * (currentRsServerId = -1) 1813 */ 1814 static void computeBestServerForWeight(RSEvaluations evals, 1815 int currentRsServerId, int localServerId) 1816 { 1817 final Map<Integer, ReplicationServerInfo> bestServers = evals.bestRSs; 1818 /* 1819 * - Compute the load goal of each RS, deducing it from the weights affected 1820 * to them. 1821 * - Compute the current load of each RS, deducing it from the DSs 1822 * currently connected to them. 1823 * - Compute the differences between the load goals and the current loads of 1824 * the RSs. 1825 */ 1826 // Sum of the weights 1827 int sumOfWeights = 0; 1828 // Sum of the connected DSs 1829 int sumOfConnectedDSs = 0; 1830 for (ReplicationServerInfo rsInfo : bestServers.values()) 1831 { 1832 sumOfWeights += rsInfo.getWeight(); 1833 sumOfConnectedDSs += rsInfo.getConnectedDSNumber(); 1834 } 1835 1836 // Distance (difference) of the current loads to the load goals of each RS: 1837 // key:server id, value: distance 1838 Map<Integer, BigDecimal> loadDistances = new HashMap<>(); 1839 // Precision for the operations (number of digits after the dot) 1840 final MathContext mathContext = new MathContext(32, RoundingMode.HALF_UP); 1841 for (Entry<Integer, ReplicationServerInfo> entry : bestServers.entrySet()) 1842 { 1843 final Integer rsId = entry.getKey(); 1844 final ReplicationServerInfo rsInfo = entry.getValue(); 1845 1846 // load goal = rs weight / sum of weights 1847 BigDecimal loadGoalBd = BigDecimal.valueOf(rsInfo.getWeight()).divide( 1848 BigDecimal.valueOf(sumOfWeights), mathContext); 1849 BigDecimal currentLoadBd = BigDecimal.ZERO; 1850 if (sumOfConnectedDSs != 0) 1851 { 1852 // current load = number of connected DSs / total number of DSs 1853 int connectedDSs = rsInfo.getConnectedDSNumber(); 1854 currentLoadBd = BigDecimal.valueOf(connectedDSs).divide( 1855 BigDecimal.valueOf(sumOfConnectedDSs), mathContext); 1856 } 1857 // load distance = load goal - current load 1858 BigDecimal loadDistanceBd = 1859 loadGoalBd.subtract(currentLoadBd, mathContext); 1860 loadDistances.put(rsId, loadDistanceBd); 1861 } 1862 1863 if (currentRsServerId == -1) 1864 { 1865 // The local server is not connected yet, find best server to connect to, 1866 // taking the weights into account. 1867 computeBestServerWhenNotConnected(evals, loadDistances, localServerId); 1868 } 1869 else 1870 { 1871 // The local server is currently connected to a RS, let's see if it must 1872 // disconnect or not, taking the weights into account. 1873 computeBestServerWhenConnected(evals, loadDistances, localServerId, 1874 currentRsServerId, sumOfWeights, sumOfConnectedDSs); 1875 } 1876 } 1877 1878 private static void computeBestServerWhenNotConnected(RSEvaluations evals, 1879 Map<Integer, BigDecimal> loadDistances, int localServerId) 1880 { 1881 final Map<Integer, ReplicationServerInfo> bestServers = evals.bestRSs; 1882 /* 1883 * Find the server with the current highest distance to its load goal and 1884 * choose it. Make an exception if every server is correctly balanced, 1885 * that is every current load distances are equal to 0, in that case, 1886 * choose the server with the highest weight 1887 */ 1888 int bestRsId = 0; // If all server equal, return the first one 1889 float highestDistance = Float.NEGATIVE_INFINITY; 1890 boolean allRsWithZeroDistance = true; 1891 int highestWeightRsId = -1; 1892 int highestWeight = -1; 1893 for (Integer rsId : bestServers.keySet()) 1894 { 1895 float loadDistance = loadDistances.get(rsId).floatValue(); 1896 if (loadDistance > highestDistance) 1897 { 1898 // This server is far more from its balance point 1899 bestRsId = rsId; 1900 highestDistance = loadDistance; 1901 } 1902 if (loadDistance != 0) 1903 { 1904 allRsWithZeroDistance = false; 1905 } 1906 int weight = bestServers.get(rsId).getWeight(); 1907 if (weight > highestWeight) 1908 { 1909 // This server has a higher weight 1910 highestWeightRsId = rsId; 1911 highestWeight = weight; 1912 } 1913 } 1914 // All servers with a 0 distance ? 1915 if (allRsWithZeroDistance) 1916 { 1917 // Choose server with the highest weight 1918 bestRsId = highestWeightRsId; 1919 } 1920 evals.setBestRS(bestRsId, NOTE_BIGGEST_WEIGHT_RS.get(localServerId, 1921 bestRsId)); 1922 } 1923 1924 private static void computeBestServerWhenConnected(RSEvaluations evals, 1925 Map<Integer, BigDecimal> loadDistances, int localServerId, 1926 int currentRsServerId, int sumOfWeights, int sumOfConnectedDSs) 1927 { 1928 final Map<Integer, ReplicationServerInfo> bestServers = evals.bestRSs; 1929 final MathContext mathContext = new MathContext(32, RoundingMode.HALF_UP); 1930 float currentLoadDistance = 1931 loadDistances.get(currentRsServerId).floatValue(); 1932 if (currentLoadDistance < 0) 1933 { 1934 /* 1935 Too much DSs connected to the current RS, compared with its load 1936 goal: 1937 Determine the potential number of DSs to disconnect from the current 1938 RS and see if the local DS is part of them: the DSs that must 1939 disconnect are those with the lowest server id. 1940 Compute the sum of the distances of the load goals of the other RSs 1941 */ 1942 BigDecimal sumOfLoadDistancesOfOtherRSsBd = BigDecimal.ZERO; 1943 for (Integer rsId : bestServers.keySet()) 1944 { 1945 if (rsId != currentRsServerId) 1946 { 1947 sumOfLoadDistancesOfOtherRSsBd = sumOfLoadDistancesOfOtherRSsBd.add( 1948 loadDistances.get(rsId), mathContext); 1949 } 1950 } 1951 1952 if (sumOfLoadDistancesOfOtherRSsBd.floatValue() > 0) 1953 { 1954 /* 1955 The average distance of the other RSs shows a lack of DSs. 1956 Compute the number of DSs to disconnect from the current RS, 1957 rounding to the nearest integer number. Do only this if there is 1958 no risk of yoyo effect: when the exact balance cannot be 1959 established due to the current number of DSs connected, do not 1960 disconnect a DS. A simple example where the balance cannot be 1961 reached is: 1962 - RS1 has weight 1 and 2 DSs 1963 - RS2 has weight 1 and 1 DS 1964 => disconnecting a DS from RS1 to reconnect it to RS2 would have no 1965 sense as this would lead to the reverse situation. In that case, 1966 the perfect balance cannot be reached and we must stick to the 1967 current situation, otherwise the DS would keep move between the 2 1968 RSs 1969 */ 1970 float notRoundedOverloadingDSsNumber = sumOfLoadDistancesOfOtherRSsBd. 1971 multiply(BigDecimal.valueOf(sumOfConnectedDSs), mathContext) 1972 .floatValue(); 1973 int overloadingDSsNumber = Math.round(notRoundedOverloadingDSsNumber); 1974 1975 // Avoid yoyo effect 1976 if (overloadingDSsNumber == 1) 1977 { 1978 // What would be the new load distance for the current RS if 1979 // we disconnect some DSs ? 1980 ReplicationServerInfo currentReplicationServerInfo = 1981 bestServers.get(currentRsServerId); 1982 1983 int currentRsWeight = currentReplicationServerInfo.getWeight(); 1984 BigDecimal currentRsWeightBd = BigDecimal.valueOf(currentRsWeight); 1985 BigDecimal sumOfWeightsBd = BigDecimal.valueOf(sumOfWeights); 1986 BigDecimal currentRsLoadGoalBd = 1987 currentRsWeightBd.divide(sumOfWeightsBd, mathContext); 1988 BigDecimal potentialCurrentRsNewLoadBd = BigDecimal.ZERO; 1989 if (sumOfConnectedDSs != 0) 1990 { 1991 int connectedDSs = currentReplicationServerInfo. 1992 getConnectedDSNumber(); 1993 BigDecimal potentialNewConnectedDSsBd = 1994 BigDecimal.valueOf(connectedDSs - 1); 1995 BigDecimal sumOfConnectedDSsBd = 1996 BigDecimal.valueOf(sumOfConnectedDSs); 1997 potentialCurrentRsNewLoadBd = 1998 potentialNewConnectedDSsBd.divide(sumOfConnectedDSsBd, 1999 mathContext); 2000 } 2001 BigDecimal potentialCurrentRsNewLoadDistanceBd = 2002 currentRsLoadGoalBd.subtract(potentialCurrentRsNewLoadBd, 2003 mathContext); 2004 2005 // What would be the new load distance for the other RSs ? 2006 BigDecimal additionalDsLoadBd = 2007 BigDecimal.ONE.divide( 2008 BigDecimal.valueOf(sumOfConnectedDSs), mathContext); 2009 BigDecimal potentialNewSumOfLoadDistancesOfOtherRSsBd = 2010 sumOfLoadDistancesOfOtherRSsBd.subtract(additionalDsLoadBd, 2011 mathContext); 2012 2013 /* 2014 Now compare both values: we must not disconnect the DS if this 2015 is for going in a situation where the load distance of the other 2016 RSs is the opposite of the future load distance of the local RS 2017 or we would evaluate that we should disconnect just after being 2018 arrived on the new RS. But we should disconnect if we reach the 2019 perfect balance (both values are 0). 2020 */ 2021 if (mustAvoidYoyoEffect(potentialCurrentRsNewLoadDistanceBd, 2022 potentialNewSumOfLoadDistancesOfOtherRSsBd)) 2023 { 2024 // Avoid the yoyo effect, and keep the local DS connected to its 2025 // current RS 2026 evals.setBestRS(currentRsServerId, 2027 NOTE_AVOID_YOYO_EFFECT.get(localServerId, currentRsServerId)); 2028 return; 2029 } 2030 } 2031 2032 ReplicationServerInfo currentRsInfo = 2033 bestServers.get(currentRsServerId); 2034 if (isServerOverloadingRS(localServerId, currentRsInfo, 2035 overloadingDSsNumber)) 2036 { 2037 // The local server is part of the DSs to disconnect 2038 evals.discardAll(NOTE_DISCONNECT_DS_FROM_OVERLOADED_RS.get( 2039 localServerId, currentRsServerId)); 2040 } 2041 else 2042 { 2043 // The local server is not part of the servers to disconnect from the 2044 // current RS. 2045 evals.setBestRS(currentRsServerId, 2046 NOTE_DO_NOT_DISCONNECT_DS_FROM_OVERLOADED_RS.get(localServerId, 2047 currentRsServerId)); 2048 } 2049 } else { 2050 // The average distance of the other RSs does not show a lack of DSs: 2051 // no need to disconnect any DS from the current RS. 2052 evals.setBestRS(currentRsServerId, 2053 NOTE_NO_NEED_TO_REBALANCE_DSS_BETWEEN_RSS.get(localServerId, 2054 currentRsServerId)); 2055 } 2056 } else { 2057 // The RS load goal is reached or there are not enough DSs connected to 2058 // it to reach it: do not disconnect from this RS and return rsInfo for 2059 // this RS 2060 evals.setBestRS(currentRsServerId, 2061 NOTE_DO_NOT_DISCONNECT_DS_FROM_ACCEPTABLE_LOAD_RS.get(localServerId, 2062 currentRsServerId)); 2063 } 2064 } 2065 2066 private static boolean mustAvoidYoyoEffect(BigDecimal rsNewLoadDistance, 2067 BigDecimal otherRSsNewSumOfLoadDistances) 2068 { 2069 final MathContext roundCtx = new MathContext(6, RoundingMode.DOWN); 2070 final BigDecimal rsLoadDistance = rsNewLoadDistance.round(roundCtx); 2071 final BigDecimal otherRSsSumOfLoadDistances = 2072 otherRSsNewSumOfLoadDistances.round(roundCtx); 2073 2074 return rsLoadDistance.compareTo(BigDecimal.ZERO) != 0 2075 && rsLoadDistance.compareTo(otherRSsSumOfLoadDistances.negate()) == 0; 2076 } 2077 2078 /** 2079 * Returns whether the local DS is overloading the RS. 2080 * <p> 2081 * There are an "overloadingDSsNumber" of DS overloading the RS. The list of 2082 * DSs connected to this RS is ordered by serverId to use a consistent 2083 * ordering across all nodes in the topology. The serverIds which index in the 2084 * List are lower than "overloadingDSsNumber" will be evicted first. 2085 * <p> 2086 * This ordering is unfair since nodes with the lower serverIds will be 2087 * evicted more often than nodes with higher serverIds. However, it is a 2088 * consistent and reliable ordering applicable anywhere in the topology. 2089 */ 2090 private static boolean isServerOverloadingRS(int localServerId, 2091 ReplicationServerInfo currentRsInfo, int overloadingDSsNumber) 2092 { 2093 List<Integer> serversConnectedToCurrentRS = new ArrayList<>(currentRsInfo.getConnectedDSs()); 2094 Collections.sort(serversConnectedToCurrentRS); 2095 2096 final int idx = serversConnectedToCurrentRS.indexOf(localServerId); 2097 return idx != -1 && idx < overloadingDSsNumber; 2098 } 2099 2100 /** 2101 * Start the heartbeat monitor thread. 2102 */ 2103 private void startRSHeartBeatMonitoring(ConnectedRS rs) 2104 { 2105 final long heartbeatInterval = config.getHeartbeatInterval(); 2106 if (heartbeatInterval > 0) 2107 { 2108 heartbeatMonitor = new HeartbeatMonitor(getServerId(), rs.getServerId(), 2109 getBaseDN().toString(), rs.session, heartbeatInterval); 2110 heartbeatMonitor.start(); 2111 } 2112 } 2113 2114 /** 2115 * Stop the heartbeat monitor thread. 2116 */ 2117 private synchronized void stopRSHeartBeatMonitoring() 2118 { 2119 if (heartbeatMonitor != null) 2120 { 2121 heartbeatMonitor.shutdown(); 2122 heartbeatMonitor = null; 2123 } 2124 } 2125 2126 /** 2127 * Restart the ReplicationBroker. 2128 * @param infiniteTry the socket which failed 2129 */ 2130 public void reStart(boolean infiniteTry) 2131 { 2132 reStart(connectedRS.get().session, infiniteTry); 2133 } 2134 2135 /** 2136 * Restart the ReplicationServer broker after a failure. 2137 * 2138 * @param failingSession the socket which failed 2139 * @param infiniteTry the socket which failed 2140 */ 2141 private void reStart(Session failingSession, boolean infiniteTry) 2142 { 2143 if (failingSession != null) 2144 { 2145 failingSession.close(); 2146 numLostConnections++; 2147 } 2148 2149 ConnectedRS rs = connectedRS.get(); 2150 if (failingSession == rs.session && !rs.equals(ConnectedRS.noConnectedRS())) 2151 { 2152 rs = setConnectedRS(ConnectedRS.noConnectedRS()); 2153 } 2154 2155 while (true) 2156 { 2157 // Synchronize inside the loop in order to allow shutdown. 2158 synchronized (startStopLock) 2159 { 2160 if (rs.isConnected() || shutdown) 2161 { 2162 break; 2163 } 2164 2165 try 2166 { 2167 connectAsDataServer(); 2168 rs = connectedRS.get(); 2169 } 2170 catch (Exception e) 2171 { 2172 logger.error(NOTE_EXCEPTION_RESTARTING_SESSION, 2173 getBaseDN(), e.getLocalizedMessage() + " " + stackTraceToSingleLineString(e)); 2174 } 2175 2176 if (rs.isConnected() || !infiniteTry) 2177 { 2178 break; 2179 } 2180 } 2181 try 2182 { 2183 Thread.sleep(500); 2184 } 2185 catch (InterruptedException ignored) 2186 { 2187 // ignore 2188 } 2189 } 2190 2191 if (logger.isTraceEnabled()) 2192 { 2193 debugInfo("end restart : connected=" + rs.isConnected() + " with RS(" 2194 + rs.getServerId() + ") genId=" + getGenerationID()); 2195 } 2196 } 2197 2198 /** 2199 * Publish a message to the other servers. 2200 * @param msg the message to publish 2201 */ 2202 public void publish(ReplicationMsg msg) 2203 { 2204 publish(msg, false, true); 2205 } 2206 2207 /** 2208 * Publish a message to the other servers. 2209 * @param msg The message to publish. 2210 * @param retryOnFailure Whether reconnect should automatically be done. 2211 * @return Whether publish succeeded. 2212 */ 2213 boolean publish(ReplicationMsg msg, boolean retryOnFailure) 2214 { 2215 return publish(msg, false, retryOnFailure); 2216 } 2217 2218 /** 2219 * Publish a recovery message to the other servers. 2220 * @param msg the message to publish 2221 */ 2222 public void publishRecovery(ReplicationMsg msg) 2223 { 2224 publish(msg, true, true); 2225 } 2226 2227 /** 2228 * Publish a message to the other servers. 2229 * @param msg the message to publish 2230 * @param recoveryMsg the message is a recovery LocalizableMessage 2231 * @param retryOnFailure whether retry should be done on failure 2232 * @return whether the message was successfully sent. 2233 */ 2234 private boolean publish(ReplicationMsg msg, boolean recoveryMsg, 2235 boolean retryOnFailure) 2236 { 2237 boolean done = false; 2238 2239 while (!done && !shutdown) 2240 { 2241 if (connectionError) 2242 { 2243 /* 2244 It was not possible to connect to any replication server. 2245 Since the operation was already processed, we have no other 2246 choice than to return without sending the ReplicationMsg 2247 and relying on the resend procedure of the connect phase to 2248 fix the problem when we finally connect. 2249 */ 2250 2251 if (logger.isTraceEnabled()) 2252 { 2253 debugInfo("publish(): Publishing a message is not possible due to" 2254 + " existing connection error."); 2255 } 2256 2257 return false; 2258 } 2259 2260 try 2261 { 2262 /* 2263 save the session at the time when we acquire the 2264 sendwindow credit so that we can make sure later 2265 that the session did not change in between. 2266 This is necessary to make sure that we don't publish a message 2267 on a session with a credit that was acquired from a previous 2268 session. 2269 */ 2270 Session currentSession; 2271 Semaphore currentWindowSemaphore; 2272 synchronized (connectPhaseLock) 2273 { 2274 currentSession = connectedRS.get().session; 2275 currentWindowSemaphore = sendWindow; 2276 } 2277 2278 /* 2279 If the Replication domain has decided that there is a need to 2280 recover some changes then it is not allowed to send this 2281 change but it will be the responsibility of the recovery thread to 2282 do it. 2283 */ 2284 if (!recoveryMsg & connectRequiresRecovery) 2285 { 2286 return false; 2287 } 2288 2289 boolean credit; 2290 if (msg instanceof UpdateMsg) 2291 { 2292 /* 2293 Acquiring the window credit must be done outside of the 2294 connectPhaseLock because it can be blocking and we don't 2295 want to hold off reconnection in case the connection dropped. 2296 */ 2297 credit = 2298 currentWindowSemaphore.tryAcquire(500, TimeUnit.MILLISECONDS); 2299 } 2300 else 2301 { 2302 credit = true; 2303 } 2304 2305 if (credit) 2306 { 2307 synchronized (connectPhaseLock) 2308 { 2309 /* 2310 session may have been set to null in the connection phase 2311 when restarting the broker for example. 2312 Check the session. If it has changed, some disconnection or 2313 reconnection happened and we need to restart from scratch. 2314 */ 2315 final Session session = connectedRS.get().session; 2316 if (session != null && session == currentSession) 2317 { 2318 session.publish(msg); 2319 done = true; 2320 } 2321 } 2322 } 2323 if (!credit && currentWindowSemaphore.availablePermits() == 0) 2324 { 2325 synchronized (connectPhaseLock) 2326 { 2327 /* 2328 the window is still closed. 2329 Send a WindowProbeMsg message to wake up the receiver in case the 2330 window update message was lost somehow... 2331 then loop to check again if connection was closed. 2332 */ 2333 Session session = connectedRS.get().session; 2334 if (session != null) 2335 { 2336 session.publish(new WindowProbeMsg()); 2337 } 2338 } 2339 } 2340 } 2341 catch (IOException e) 2342 { 2343 if (logger.isTraceEnabled()) 2344 { 2345 debugInfo("publish(): IOException caught: " 2346 + stackTraceToSingleLineString(e)); 2347 } 2348 if (!retryOnFailure) 2349 { 2350 return false; 2351 } 2352 2353 // The receive threads should handle reconnection or 2354 // mark this broker in error. Just retry. 2355 synchronized (connectPhaseLock) 2356 { 2357 try 2358 { 2359 connectPhaseLock.wait(100); 2360 } 2361 catch (InterruptedException ignored) 2362 { 2363 if (logger.isTraceEnabled()) 2364 { 2365 debugInfo("publish(): InterruptedException caught 1: " 2366 + stackTraceToSingleLineString(ignored)); 2367 } 2368 } 2369 } 2370 } 2371 catch (InterruptedException ignored) 2372 { 2373 // just loop. 2374 if (logger.isTraceEnabled()) 2375 { 2376 debugInfo("publish(): InterruptedException caught 2: " 2377 + stackTraceToSingleLineString(ignored)); 2378 } 2379 } 2380 } 2381 return true; 2382 } 2383 2384 /** 2385 * Receive a message. 2386 * This method is not thread-safe and should either always be 2387 * called in a single thread or protected by a locking mechanism 2388 * before being called. This is a wrapper to the method with a boolean version 2389 * so that we do not have to modify existing tests. 2390 * 2391 * @return the received message 2392 * @throws SocketTimeoutException if the timeout set by setSoTimeout 2393 * has expired 2394 */ 2395 public ReplicationMsg receive() throws SocketTimeoutException 2396 { 2397 return receive(false, true, false); 2398 } 2399 2400 /** 2401 * Receive a message. 2402 * This method is not thread-safe and should either always be 2403 * called in a single thread or protected by a locking mechanism 2404 * before being called. 2405 * 2406 * @param reconnectToTheBestRS Whether broker will automatically switch 2407 * to the best suitable RS. 2408 * @param reconnectOnFailure Whether broker will automatically reconnect 2409 * on failure. 2410 * @param returnOnTopoChange Whether broker should return TopologyMsg 2411 * received. 2412 * @return the received message 2413 * 2414 * @throws SocketTimeoutException if the timeout set by setSoTimeout 2415 * has expired 2416 */ 2417 ReplicationMsg receive(boolean reconnectToTheBestRS, 2418 boolean reconnectOnFailure, boolean returnOnTopoChange) 2419 throws SocketTimeoutException 2420 { 2421 while (!shutdown) 2422 { 2423 ConnectedRS rs = connectedRS.get(); 2424 if (reconnectOnFailure && !rs.isConnected()) 2425 { 2426 // infinite try to reconnect 2427 reStart(null, true); 2428 continue; 2429 } 2430 2431 // Save session information for later in case we need it for log messages 2432 // after the session has been closed and/or failed. 2433 if (rs.session == null) 2434 { 2435 // Must be shutting down. 2436 break; 2437 } 2438 2439 final int serverId = getServerId(); 2440 final DN baseDN = getBaseDN(); 2441 final int previousRsServerID = rs.getServerId(); 2442 try 2443 { 2444 ReplicationMsg msg = rs.session.receive(); 2445 if (msg instanceof UpdateMsg) 2446 { 2447 synchronized (this) 2448 { 2449 rcvWindow--; 2450 } 2451 } 2452 if (msg instanceof WindowMsg) 2453 { 2454 final WindowMsg windowMsg = (WindowMsg) msg; 2455 sendWindow.release(windowMsg.getNumAck()); 2456 } 2457 else if (msg instanceof TopologyMsg) 2458 { 2459 final TopologyMsg topoMsg = (TopologyMsg) msg; 2460 receiveTopo(topoMsg, getRsServerId()); 2461 if (reconnectToTheBestRS) 2462 { 2463 // Reset wait time before next computation of best server 2464 mustRunBestServerCheckingAlgorithm = 0; 2465 } 2466 2467 // Caller wants to check what's changed 2468 if (returnOnTopoChange) 2469 { 2470 return msg; 2471 } 2472 } 2473 else if (msg instanceof StopMsg) 2474 { 2475 // RS performs a proper disconnection 2476 logger.warn(WARN_REPLICATION_SERVER_PROPERLY_DISCONNECTED, previousRsServerID, rs.replicationServer, 2477 serverId, baseDN); 2478 2479 // Try to find a suitable RS 2480 reStart(rs.session, true); 2481 } 2482 else if (msg instanceof MonitorMsg) 2483 { 2484 // This is the response to a MonitorRequest that was sent earlier or 2485 // the regular message of the monitoring publisher of the RS. 2486 MonitorMsg monitorMsg = (MonitorMsg) msg; 2487 2488 // Extract and store replicas ServerStates 2489 final Map<Integer, ServerState> newReplicaStates = new HashMap<>(); 2490 for (int srvId : toIterable(monitorMsg.ldapIterator())) 2491 { 2492 newReplicaStates.put(srvId, monitorMsg.getLDAPServerState(srvId)); 2493 } 2494 replicaStates = newReplicaStates; 2495 2496 // Notify the sender that the response was received. 2497 synchronized (monitorResponse) 2498 { 2499 monitorResponse.set(true); 2500 monitorResponse.notify(); 2501 } 2502 2503 // Update the replication servers ServerStates with new received info 2504 Map<Integer, ReplicationServerInfo> rsInfos = topology.get().rsInfos; 2505 for (int srvId : toIterable(monitorMsg.rsIterator())) 2506 { 2507 final ReplicationServerInfo rsInfo = rsInfos.get(srvId); 2508 if (rsInfo != null) 2509 { 2510 rsInfo.update(monitorMsg.getRSServerState(srvId)); 2511 } 2512 } 2513 2514 /* 2515 Now if it is allowed, compute the best replication server to see if 2516 it is still the one we are currently connected to. If not, 2517 disconnect properly and let the connection algorithm re-connect to 2518 best replication server 2519 */ 2520 if (reconnectToTheBestRS) 2521 { 2522 mustRunBestServerCheckingAlgorithm++; 2523 if (mustRunBestServerCheckingAlgorithm == 2) 2524 { 2525 // Stable topology (no topo msg since few seconds): proceed with 2526 // best server checking. 2527 final RSEvaluations evals = computeBestReplicationServer( 2528 false, previousRsServerID, state, 2529 rsInfos, serverId, getGroupId(), getGenerationID()); 2530 final ReplicationServerInfo bestServerInfo = evals.getBestRS(); 2531 if (previousRsServerID != -1 2532 && (bestServerInfo == null 2533 || bestServerInfo.getServerId() != previousRsServerID)) 2534 { 2535 // The best replication server is no more the one we are 2536 // currently using. Disconnect properly then reconnect. 2537 LocalizableMessage message; 2538 if (bestServerInfo == null) 2539 { 2540 message = NOTE_LOAD_BALANCE_REPLICATION_SERVER.get( 2541 serverId, previousRsServerID, rs.replicationServer, baseDN); 2542 } 2543 else 2544 { 2545 final int bestRsServerId = bestServerInfo.getServerId(); 2546 message = NOTE_NEW_BEST_REPLICATION_SERVER.get( 2547 serverId, previousRsServerID, rs.replicationServer, bestRsServerId, baseDN, 2548 evals.getEvaluation(previousRsServerID), 2549 evals.getEvaluation(bestRsServerId)); 2550 } 2551 logger.info(message); 2552 if (logger.isTraceEnabled()) 2553 { 2554 debugInfo("best replication servers evaluation results: " + evals); 2555 } 2556 reStart(true); 2557 } 2558 2559 // Reset wait time before next computation of best server 2560 mustRunBestServerCheckingAlgorithm = 0; 2561 } 2562 } 2563 } 2564 else 2565 { 2566 return msg; 2567 } 2568 } 2569 catch (SocketTimeoutException e) 2570 { 2571 throw e; 2572 } 2573 catch (Exception e) 2574 { 2575 logger.traceException(e); 2576 2577 if (!shutdown) 2578 { 2579 if (rs.session == null || !rs.session.closeInitiated()) 2580 { 2581 // We did not initiate the close on our side, log an error message. 2582 logger.error(WARN_REPLICATION_SERVER_BADLY_DISCONNECTED, 2583 serverId, baseDN, previousRsServerID, rs.replicationServer); 2584 } 2585 2586 if (!reconnectOnFailure) 2587 { 2588 break; // does not seem necessary to explicitly disconnect .. 2589 } 2590 2591 reStart(rs.session, true); 2592 } 2593 } 2594 } // while !shutdown 2595 return null; 2596 } 2597 2598 /** 2599 * Gets the States of all the Replicas currently in the Topology. When this 2600 * method is called, a Monitoring message will be sent to the Replication 2601 * Server to which this domain is currently connected so that it computes a 2602 * table containing information about all Directory Servers in the topology. 2603 * This Computation involves communications will all the servers currently 2604 * connected and 2605 * 2606 * @return The States of all Replicas in the topology (except us) 2607 */ 2608 public Map<Integer, ServerState> getReplicaStates() 2609 { 2610 monitorResponse.set(false); 2611 2612 // publish Monitor Request LocalizableMessage to the Replication Server 2613 publish(new MonitorRequestMsg(getServerId(), getRsServerId())); 2614 2615 // wait for Response up to 10 seconds. 2616 try 2617 { 2618 synchronized (monitorResponse) 2619 { 2620 if (!monitorResponse.get()) 2621 { 2622 monitorResponse.wait(10000); 2623 } 2624 } 2625 } catch (InterruptedException e) 2626 { 2627 Thread.currentThread().interrupt(); 2628 } 2629 return replicaStates; 2630 } 2631 2632 /** 2633 * This method allows to do the necessary computing for the window 2634 * management after treatment by the worker threads. 2635 * 2636 * This should be called once the replay thread have done their job 2637 * and the window can be open again. 2638 */ 2639 public synchronized void updateWindowAfterReplay() 2640 { 2641 try 2642 { 2643 updateDoneCount++; 2644 final Session session = connectedRS.get().session; 2645 if (updateDoneCount >= halfRcvWindow && session != null) 2646 { 2647 session.publish(new WindowMsg(updateDoneCount)); 2648 rcvWindow += updateDoneCount; 2649 updateDoneCount = 0; 2650 } 2651 } catch (IOException e) 2652 { 2653 // Any error on the socket will be handled by the thread calling receive() 2654 // just ignore. 2655 } 2656 } 2657 2658 /** Stop the server. */ 2659 public void stop() 2660 { 2661 if (logger.isTraceEnabled() && !shutdown) 2662 { 2663 debugInfo("is stopping and will close the connection to RS(" + getRsServerId() + ")"); 2664 } 2665 2666 synchronized (startStopLock) 2667 { 2668 if (shutdown) 2669 { 2670 return; 2671 } 2672 domain.publishReplicaOfflineMsg(); 2673 shutdown = true; 2674 setConnectedRS(ConnectedRS.stopped()); 2675 stopRSHeartBeatMonitoring(); 2676 stopChangeTimeHeartBeatPublishing(); 2677 deregisterReplicationMonitor(); 2678 } 2679 } 2680 2681 /** 2682 * Set a timeout value. 2683 * With this option set to a non-zero value, calls to the receive() method 2684 * block for only this amount of time after which a 2685 * java.net.SocketTimeoutException is raised. 2686 * The Broker is valid and usable even after such an Exception is raised. 2687 * 2688 * @param timeout the specified timeout, in milliseconds. 2689 * @throws SocketException if there is an error in the underlying protocol, 2690 * such as a TCP error. 2691 */ 2692 public void setSoTimeout(int timeout) throws SocketException 2693 { 2694 this.timeout = timeout; 2695 final Session session = connectedRS.get().session; 2696 if (session != null) 2697 { 2698 session.setSoTimeout(timeout); 2699 } 2700 } 2701 2702 /** 2703 * Get the name of the replicationServer to which this broker is currently 2704 * connected. 2705 * 2706 * @return the name of the replicationServer to which this domain 2707 * is currently connected. 2708 */ 2709 public String getReplicationServer() 2710 { 2711 return connectedRS.get().replicationServer; 2712 } 2713 2714 /** 2715 * Get the maximum receive window size. 2716 * 2717 * @return The maximum receive window size. 2718 */ 2719 public int getMaxRcvWindow() 2720 { 2721 return config.getWindowSize(); 2722 } 2723 2724 /** 2725 * Get the current receive window size. 2726 * 2727 * @return The current receive window size. 2728 */ 2729 public int getCurrentRcvWindow() 2730 { 2731 return rcvWindow; 2732 } 2733 2734 /** 2735 * Get the maximum send window size. 2736 * 2737 * @return The maximum send window size. 2738 */ 2739 public int getMaxSendWindow() 2740 { 2741 return maxSendWindow; 2742 } 2743 2744 /** 2745 * Get the current send window size. 2746 * 2747 * @return The current send window size. 2748 */ 2749 public int getCurrentSendWindow() 2750 { 2751 if (isConnected()) 2752 { 2753 return sendWindow.availablePermits(); 2754 } 2755 return 0; 2756 } 2757 2758 /** 2759 * Get the number of times the connection was lost. 2760 * @return The number of times the connection was lost. 2761 */ 2762 public int getNumLostConnections() 2763 { 2764 return numLostConnections; 2765 } 2766 2767 /** 2768 * Change some configuration parameters. 2769 * 2770 * @param newConfig The new config to use. 2771 * @return A boolean indicating if the changes 2772 * requires to restart the service. 2773 */ 2774 boolean changeConfig(ReplicationDomainCfg newConfig) 2775 { 2776 // These parameters needs to be renegotiated with the ReplicationServer 2777 // so if they have changed, that requires restarting the session with 2778 // the ReplicationServer. 2779 // A new session is necessary only when information regarding 2780 // the connection is modified 2781 boolean needToRestartSession = 2782 !newConfig.getReplicationServer().equals(config.getReplicationServer()) 2783 || newConfig.getWindowSize() != config.getWindowSize() 2784 || newConfig.getHeartbeatInterval() != config.getHeartbeatInterval() 2785 || newConfig.getGroupId() != config.getGroupId(); 2786 2787 this.config = newConfig; 2788 this.rcvWindow = newConfig.getWindowSize(); 2789 this.halfRcvWindow = this.rcvWindow / 2; 2790 2791 return needToRestartSession; 2792 } 2793 2794 /** 2795 * Get the version of the replication protocol. 2796 * @return The version of the replication protocol. 2797 */ 2798 public short getProtocolVersion() 2799 { 2800 final Session session = connectedRS.get().session; 2801 if (session != null) 2802 { 2803 return session.getProtocolVersion(); 2804 } 2805 return ProtocolVersion.getCurrentVersion(); 2806 } 2807 2808 /** 2809 * Check if the broker is connected to a ReplicationServer and therefore 2810 * ready to received and send Replication Messages. 2811 * 2812 * @return true if the server is connected, false if not. 2813 */ 2814 public boolean isConnected() 2815 { 2816 return connectedRS.get().isConnected(); 2817 } 2818 2819 /** 2820 * Determine whether the connection to the replication server is encrypted. 2821 * @return true if the connection is encrypted, false otherwise. 2822 */ 2823 public boolean isSessionEncrypted() 2824 { 2825 final Session session = connectedRS.get().session; 2826 return session != null ? session.isEncrypted() : false; 2827 } 2828 2829 /** 2830 * Signals the RS we just entered a new status. 2831 * @param newStatus The status the local DS just entered 2832 */ 2833 public void signalStatusChange(ServerStatus newStatus) 2834 { 2835 try 2836 { 2837 connectedRS.get().session.publish( 2838 new ChangeStatusMsg(ServerStatus.INVALID_STATUS, newStatus)); 2839 } catch (IOException ex) 2840 { 2841 logger.error(ERR_EXCEPTION_SENDING_CS, getBaseDN(), getServerId(), 2842 ex.getLocalizedMessage() + " " + stackTraceToSingleLineString(ex)); 2843 } 2844 } 2845 2846 /** 2847 * Gets the info for DSs in the topology (except us). 2848 * @return The info for DSs in the topology (except us) 2849 */ 2850 public Map<Integer, DSInfo> getReplicaInfos() 2851 { 2852 return topology.get().replicaInfos; 2853 } 2854 2855 /** 2856 * Gets the info for RSs in the topology (except the one we are connected 2857 * to). 2858 * @return The info for RSs in the topology (except the one we are connected 2859 * to) 2860 */ 2861 public List<RSInfo> getRsInfos() 2862 { 2863 return toRSInfos(topology.get().rsInfos); 2864 } 2865 2866 private List<RSInfo> toRSInfos(Map<Integer, ReplicationServerInfo> rsInfos) 2867 { 2868 final List<RSInfo> result = new ArrayList<>(); 2869 for (ReplicationServerInfo rsInfo : rsInfos.values()) 2870 { 2871 result.add(rsInfo.toRSInfo()); 2872 } 2873 return result; 2874 } 2875 2876 /** 2877 * Processes an incoming TopologyMsg. 2878 * Updates the structures for the local view of the topology. 2879 * 2880 * @param topoMsg 2881 * The topology information received from RS. 2882 * @param rsServerId 2883 * the serverId to use for the connectedDS 2884 */ 2885 private void receiveTopo(TopologyMsg topoMsg, int rsServerId) 2886 { 2887 final Topology newTopo = computeNewTopology(topoMsg, rsServerId); 2888 for (DSInfo dsInfo : newTopo.replicaInfos.values()) 2889 { 2890 domain.setEclIncludes(dsInfo.getDsId(), dsInfo.getEclIncludes(), dsInfo 2891 .getEclIncludesForDeletes()); 2892 } 2893 } 2894 2895 private Topology computeNewTopology(TopologyMsg topoMsg, int rsServerId) 2896 { 2897 Topology oldTopo; 2898 Topology newTopo; 2899 do 2900 { 2901 oldTopo = topology.get(); 2902 newTopo = new Topology(topoMsg, getServerId(), rsServerId, 2903 getReplicationServerUrls(), oldTopo.rsInfos); 2904 } 2905 while (!topology.compareAndSet(oldTopo, newTopo)); 2906 2907 if (logger.isTraceEnabled()) 2908 { 2909 final StringBuilder sb = topologyChange(rsServerId, oldTopo, newTopo); 2910 sb.append(" received TopologyMsg=").append(topoMsg); 2911 debugInfo(sb); 2912 } 2913 return newTopo; 2914 } 2915 2916 /** 2917 * Contains the last known state of the replication topology. 2918 */ 2919 static final class Topology 2920 { 2921 2922 /** 2923 * The RS's serverId that this DS was connected to when this topology state 2924 * was computed. 2925 */ 2926 private final int rsServerId; 2927 /** 2928 * Info for other DSs. 2929 * <p> 2930 * Warning: does not contain info for us (for our server id) 2931 */ 2932 final Map<Integer, DSInfo> replicaInfos; 2933 /** 2934 * The map of replication server info initialized at connection time and 2935 * regularly updated. This is used to decide to which best suitable 2936 * replication server one wants to connect. Key: replication server id 2937 * Value: replication server info for the matching replication server id 2938 */ 2939 final Map<Integer, ReplicationServerInfo> rsInfos; 2940 2941 private Topology() 2942 { 2943 this.rsServerId = -1; 2944 this.replicaInfos = Collections.emptyMap(); 2945 this.rsInfos = Collections.emptyMap(); 2946 } 2947 2948 /** 2949 * Constructor to use when only the RSInfos need to be recomputed. 2950 * 2951 * @param dsInfosToKeep 2952 * the DSInfos that will be stored as is 2953 * @param newRSInfos 2954 * the new RSInfos from which to compute the new topology 2955 * @param dsServerId 2956 * the DS serverId 2957 * @param rsServerId 2958 * the current connected RS serverId 2959 * @param configuredReplicationServerUrls 2960 * the configured replication server URLs 2961 * @param previousRsInfos 2962 * the RSInfos computed in the previous Topology object 2963 */ 2964 Topology(Map<Integer, DSInfo> dsInfosToKeep, List<RSInfo> newRSInfos, 2965 int dsServerId, int rsServerId, 2966 Set<String> configuredReplicationServerUrls, 2967 Map<Integer, ReplicationServerInfo> previousRsInfos) 2968 { 2969 this.rsServerId = rsServerId; 2970 this.replicaInfos = dsInfosToKeep == null 2971 ? Collections.<Integer, DSInfo>emptyMap() : dsInfosToKeep; 2972 this.rsInfos = computeRSInfos(dsServerId, newRSInfos, 2973 previousRsInfos, configuredReplicationServerUrls); 2974 } 2975 2976 /** 2977 * Constructor to use when a new TopologyMsg has been received. 2978 * 2979 * @param topoMsg 2980 * the topology message containing the new DSInfos and RSInfos from 2981 * which to compute the new topology 2982 * @param dsServerId 2983 * the DS serverId 2984 * @param rsServerId 2985 * the current connected RS serverId 2986 * @param configuredReplicationServerUrls 2987 * the configured replication server URLs 2988 * @param previousRsInfos 2989 * the RSInfos computed in the previous Topology object 2990 */ 2991 Topology(TopologyMsg topoMsg, int dsServerId, 2992 int rsServerId, Set<String> configuredReplicationServerUrls, 2993 Map<Integer, ReplicationServerInfo> previousRsInfos) 2994 { 2995 this.rsServerId = rsServerId; 2996 this.replicaInfos = removeThisDs(topoMsg.getReplicaInfos(), dsServerId); 2997 this.rsInfos = computeRSInfos(dsServerId, topoMsg.getRsInfos(), 2998 previousRsInfos, configuredReplicationServerUrls); 2999 } 3000 3001 private Map<Integer, DSInfo> removeThisDs(Map<Integer, DSInfo> dsInfos, 3002 int dsServerId) 3003 { 3004 final Map<Integer, DSInfo> copy = new HashMap<>(dsInfos); 3005 copy.remove(dsServerId); 3006 return Collections.unmodifiableMap(copy); 3007 } 3008 3009 private Map<Integer, ReplicationServerInfo> computeRSInfos( 3010 int dsServerId, List<RSInfo> newRsInfos, 3011 Map<Integer, ReplicationServerInfo> previousRsInfos, 3012 Set<String> configuredReplicationServerUrls) 3013 { 3014 final Map<Integer, ReplicationServerInfo> results = new HashMap<>(previousRsInfos); 3015 3016 // Update replication server info list with the received topology info 3017 final Set<Integer> rssToKeep = new HashSet<>(); 3018 for (RSInfo newRSInfo : newRsInfos) 3019 { 3020 final int rsId = newRSInfo.getId(); 3021 rssToKeep.add(rsId); // Mark this server as still existing 3022 Set<Integer> connectedDSs = 3023 computeDSsConnectedTo(rsId, dsServerId); 3024 ReplicationServerInfo rsInfo = results.get(rsId); 3025 if (rsInfo == null) 3026 { 3027 // New replication server, create info for it add it to the list 3028 rsInfo = new ReplicationServerInfo(newRSInfo, connectedDSs); 3029 setLocallyConfiguredFlag(rsInfo, configuredReplicationServerUrls); 3030 results.put(rsId, rsInfo); 3031 } 3032 else 3033 { 3034 // Update the existing info for the replication server 3035 rsInfo.update(newRSInfo, connectedDSs); 3036 } 3037 } 3038 3039 // Remove any replication server that may have disappeared from the 3040 // topology 3041 results.keySet().retainAll(rssToKeep); 3042 3043 return Collections.unmodifiableMap(results); 3044 } 3045 3046 /** Computes the list of DSs connected to a particular RS. */ 3047 private Set<Integer> computeDSsConnectedTo(int rsId, int dsServerId) 3048 { 3049 final Set<Integer> connectedDSs = new HashSet<>(); 3050 if (rsServerId == rsId) 3051 { 3052 /* 3053 * If we are computing connected DSs for the RS we are connected to, we 3054 * should count the local DS as the DSInfo of the local DS is not sent 3055 * by the replication server in the topology message. We must count 3056 * ourselves as a connected server. 3057 */ 3058 connectedDSs.add(dsServerId); 3059 } 3060 3061 for (DSInfo dsInfo : replicaInfos.values()) 3062 { 3063 if (dsInfo.getRsId() == rsId) 3064 { 3065 connectedDSs.add(dsInfo.getDsId()); 3066 } 3067 } 3068 3069 return connectedDSs; 3070 } 3071 3072 /** 3073 * Sets the locally configured flag for the passed ReplicationServerInfo 3074 * object, analyzing the local configuration. 3075 * 3076 * @param rsInfo 3077 * the Replication server to check and update 3078 * @param configuredReplicationServerUrls 3079 */ 3080 private void setLocallyConfiguredFlag(ReplicationServerInfo rsInfo, 3081 Set<String> configuredReplicationServerUrls) 3082 { 3083 // Determine if the passed ReplicationServerInfo has a URL that is present 3084 // in the locally configured replication servers 3085 String rsUrl = rsInfo.getServerURL(); 3086 if (rsUrl == null) 3087 { 3088 // The ReplicationServerInfo has been generated from a server with 3089 // no URL in TopologyMsg (i.e: with replication protocol version < 4): 3090 // ignore this server as we do not know how to connect to it 3091 rsInfo.setLocallyConfigured(false); 3092 return; 3093 } 3094 for (String serverUrl : configuredReplicationServerUrls) 3095 { 3096 if (isSameReplicationServerUrl(serverUrl, rsUrl)) 3097 { 3098 // This RS is locally configured, mark this 3099 rsInfo.setLocallyConfigured(true); 3100 rsInfo.setServerURL(serverUrl); 3101 return; 3102 } 3103 } 3104 rsInfo.setLocallyConfigured(false); 3105 } 3106 3107 /** {@inheritDoc} */ 3108 @Override 3109 public boolean equals(Object obj) 3110 { 3111 if (this == obj) 3112 { 3113 return true; 3114 } 3115 if (obj == null || getClass() != obj.getClass()) 3116 { 3117 return false; 3118 } 3119 final Topology other = (Topology) obj; 3120 return rsServerId == other.rsServerId 3121 && Objects.equals(replicaInfos, other.replicaInfos) 3122 && Objects.equals(rsInfos, other.rsInfos) 3123 && urlsEqual1(replicaInfos, other.replicaInfos) 3124 && urlsEqual2(rsInfos, other.rsInfos); 3125 } 3126 3127 private boolean urlsEqual1(Map<Integer, DSInfo> replicaInfos1, 3128 Map<Integer, DSInfo> replicaInfos2) 3129 { 3130 for (Entry<Integer, DSInfo> entry : replicaInfos1.entrySet()) 3131 { 3132 DSInfo dsInfo = replicaInfos2.get(entry.getKey()); 3133 if (!Objects.equals(entry.getValue().getDsUrl(), dsInfo.getDsUrl())) 3134 { 3135 return false; 3136 } 3137 } 3138 return true; 3139 } 3140 3141 private boolean urlsEqual2(Map<Integer, ReplicationServerInfo> rsInfos1, 3142 Map<Integer, ReplicationServerInfo> rsInfos2) 3143 { 3144 for (Entry<Integer, ReplicationServerInfo> entry : rsInfos1.entrySet()) 3145 { 3146 ReplicationServerInfo rsInfo = rsInfos2.get(entry.getKey()); 3147 if (!Objects.equals(entry.getValue().getServerURL(), rsInfo.getServerURL())) 3148 { 3149 return false; 3150 } 3151 } 3152 return true; 3153 } 3154 3155 /** {@inheritDoc} */ 3156 @Override 3157 public int hashCode() 3158 { 3159 final int prime = 31; 3160 int result = 1; 3161 result = prime * result + rsServerId; 3162 result = prime * result 3163 + (replicaInfos == null ? 0 : replicaInfos.hashCode()); 3164 result = prime * result + (rsInfos == null ? 0 : rsInfos.hashCode()); 3165 return result; 3166 } 3167 3168 /** {@inheritDoc} */ 3169 @Override 3170 public String toString() 3171 { 3172 return getClass().getSimpleName() 3173 + " rsServerId=" + rsServerId 3174 + ", replicaInfos=" + replicaInfos.values() 3175 + ", rsInfos=" + rsInfos.values(); 3176 } 3177 } 3178 3179 /** 3180 * Check if the broker could not find any Replication Server and therefore 3181 * connection attempt failed. 3182 * 3183 * @return true if the server could not connect to any Replication Server. 3184 */ 3185 boolean hasConnectionError() 3186 { 3187 return connectionError; 3188 } 3189 3190 /** 3191 * Starts publishing to the RS the current timestamp used in this server. 3192 */ 3193 private void startChangeTimeHeartBeatPublishing(ConnectedRS rs) 3194 { 3195 // Start a CSN heartbeat thread. 3196 long changeTimeHeartbeatInterval = config.getChangetimeHeartbeatInterval(); 3197 if (changeTimeHeartbeatInterval > 0) 3198 { 3199 final String threadName = "Replica DS(" + getServerId() 3200 + ") change time heartbeat publisher for domain \"" + getBaseDN() 3201 + "\" to RS(" + rs.getServerId() + ") at " + rs.replicationServer; 3202 3203 ctHeartbeatPublisherThread = new CTHeartbeatPublisherThread( 3204 threadName, rs.session, changeTimeHeartbeatInterval, getServerId()); 3205 ctHeartbeatPublisherThread.start(); 3206 } 3207 else 3208 { 3209 if (logger.isTraceEnabled()) 3210 { 3211 debugInfo("is not configured to send CSN heartbeat interval"); 3212 } 3213 } 3214 } 3215 3216 /** 3217 * Stops publishing to the RS the current timestamp used in this server. 3218 */ 3219 private synchronized void stopChangeTimeHeartBeatPublishing() 3220 { 3221 if (ctHeartbeatPublisherThread != null) 3222 { 3223 ctHeartbeatPublisherThread.shutdown(); 3224 ctHeartbeatPublisherThread = null; 3225 } 3226 } 3227 3228 /** 3229 * Set the connectRequiresRecovery to the provided value. 3230 * This flag is used to indicate if a recovery of Update is necessary 3231 * after a reconnection to a RS. 3232 * It is the responsibility of the ReplicationDomain to set it during the 3233 * sessionInitiated phase. 3234 * 3235 * @param b the new value of the connectRequiresRecovery. 3236 */ 3237 public void setRecoveryRequired(boolean b) 3238 { 3239 connectRequiresRecovery = b; 3240 } 3241 3242 /** 3243 * Returns whether the broker is shutting down. 3244 * @return whether the broker is shutting down. 3245 */ 3246 boolean shuttingDown() 3247 { 3248 return shutdown; 3249 } 3250 3251 /** 3252 * Returns the local address of this replication domain, or the empty string 3253 * if it is not yet connected. 3254 * 3255 * @return The local address. 3256 */ 3257 String getLocalUrl() 3258 { 3259 final Session session = connectedRS.get().session; 3260 return session != null ? session.getLocalUrl() : ""; 3261 } 3262 3263 /** 3264 * Returns the replication monitor instance name associated with this broker. 3265 * 3266 * @return The replication monitor instance name. 3267 */ 3268 String getReplicationMonitorInstanceName() 3269 { 3270 // Only invoked by replication domain so always non-null. 3271 return monitor.getMonitorInstanceName(); 3272 } 3273 3274 private ConnectedRS setConnectedRS(final ConnectedRS newRS) 3275 { 3276 final ConnectedRS oldRS = connectedRS.getAndSet(newRS); 3277 if (!oldRS.equals(newRS) && oldRS.session != null) 3278 { 3279 // monitor name is changing, deregister before registering again 3280 deregisterReplicationMonitor(); 3281 oldRS.session.close(); 3282 registerReplicationMonitor(); 3283 } 3284 return newRS; 3285 } 3286 3287 /** 3288 * Must be invoked each time the session changes because, the monitor name is 3289 * dynamically created with the session name, while monitor registration is 3290 * static. 3291 * 3292 * @see #monitor 3293 */ 3294 private void registerReplicationMonitor() 3295 { 3296 // The monitor should not be registered if this is a unit test 3297 // because the replication domain is null. 3298 if (monitor != null) 3299 { 3300 DirectoryServer.registerMonitorProvider(monitor); 3301 } 3302 } 3303 3304 private void deregisterReplicationMonitor() 3305 { 3306 // The monitor should not be deregistered if this is a unit test 3307 // because the replication domain is null. 3308 if (monitor != null) 3309 { 3310 DirectoryServer.deregisterMonitorProvider(monitor); 3311 } 3312 } 3313 3314 /** {@inheritDoc} */ 3315 @Override 3316 public String toString() 3317 { 3318 final StringBuilder sb = new StringBuilder(); 3319 sb.append(getClass().getSimpleName()) 3320 .append(" \"").append(getBaseDN()).append(" ") 3321 .append(getServerId()).append("\",") 3322 .append(" groupId=").append(getGroupId()) 3323 .append(", genId=").append(getGenerationID()) 3324 .append(", "); 3325 connectedRS.get().toString(sb); 3326 return sb.toString(); 3327 } 3328 3329 private void debugInfo(CharSequence message) 3330 { 3331 logger.trace(getClass().getSimpleName() + " for baseDN=" + getBaseDN() 3332 + " and serverId=" + getServerId() + ": " + message); 3333 } 3334}