001/* 002 * CDDL HEADER START 003 * 004 * The contents of this file are subject to the terms of the 005 * Common Development and Distribution License, Version 1.0 only 006 * (the "License"). You may not use this file except in compliance 007 * with the License. 008 * 009 * You can obtain a copy of the license at legal-notices/CDDLv1_0.txt 010 * or http://forgerock.org/license/CDDLv1.0.html. 011 * See the License for the specific language governing permissions 012 * and limitations under the License. 013 * 014 * When distributing Covered Code, include this CDDL HEADER in each 015 * file and include the License file at legal-notices/CDDLv1_0.txt. 016 * If applicable, add the following below this CDDL HEADER, with the 017 * fields enclosed by brackets "[]" replaced with your own identifying 018 * information: 019 * Portions Copyright [yyyy] [name of copyright owner] 020 * 021 * CDDL HEADER END 022 * 023 * 024 * Copyright 2013-2015 ForgeRock AS 025 */ 026package org.opends.server.replication.server.changelog.file; 027 028import java.util.Map.Entry; 029import java.util.Set; 030import java.util.concurrent.ConcurrentSkipListSet; 031 032import org.forgerock.i18n.slf4j.LocalizedLogger; 033import org.opends.server.api.DirectoryThread; 034import org.opends.server.backends.ChangelogBackend; 035import org.opends.server.replication.common.CSN; 036import org.opends.server.replication.common.MultiDomainServerState; 037import org.opends.server.replication.common.ServerState; 038import org.opends.server.replication.protocol.ReplicaOfflineMsg; 039import org.opends.server.replication.protocol.UpdateMsg; 040import org.opends.server.replication.server.changelog.api.AbortedChangelogCursorException; 041import org.opends.server.replication.server.changelog.api.ChangeNumberIndexRecord; 042import org.opends.server.replication.server.changelog.api.ChangelogDB; 043import org.opends.server.replication.server.changelog.api.ChangelogException; 044import org.opends.server.replication.server.changelog.api.DBCursor.CursorOptions; 045import org.opends.server.replication.server.changelog.api.ReplicationDomainDB; 046import org.opends.server.replication.server.changelog.api.ChangelogStateProvider; 047import org.opends.server.types.DN; 048 049import static org.opends.messages.ReplicationMessages.*; 050import static org.opends.server.replication.server.changelog.api.DBCursor.KeyMatchingStrategy.*; 051import static org.opends.server.replication.server.changelog.api.DBCursor.PositionStrategy.*; 052import static org.opends.server.util.StaticUtils.*; 053 054/** 055 * Thread responsible for inserting replicated changes into the ChangeNumber 056 * Index DB (CNIndexDB for short). 057 * <p> 058 * Only changes older than the medium consistency point are inserted in the 059 * CNIndexDB. As a consequence this class is also responsible for maintaining 060 * the medium consistency point (indirectly through an 061 * {@link ECLMultiDomainDBCursor}). 062 */ 063public class ChangeNumberIndexer extends DirectoryThread 064{ 065 /** The tracer object for the debug logger. */ 066 private static final LocalizedLogger logger = LocalizedLogger.getLoggerForThisClass(); 067 068 /** 069 * If it contains nothing, then the run method executes normally. 070 * Otherwise, the {@link #run()} method must clear its state 071 * for the supplied domain baseDNs. If a supplied domain is 072 * {@link DN#NULL_DN}, then all domains will be cleared. 073 */ 074 private final ConcurrentSkipListSet<DN> domainsToClear = new ConcurrentSkipListSet<>(); 075 private final ChangelogDB changelogDB; 076 private final ChangelogStateProvider changelogStateProvider; 077 private final ECLEnabledDomainPredicate predicate; 078 079 /* 080 * The following MultiDomainServerState fields must be thread safe, because 081 * 1) initialization can happen while the replication server starts receiving 082 * updates 083 * 2) many updates can happen concurrently. 084 */ 085 /** 086 * Holds the last time each replica was seen alive, whether via updates or 087 * heartbeat notifications, or offline notifications. Data is held for each 088 * serverId cross domain. 089 * <p> 090 * Updates are persistent and stored in the replicaDBs, heartbeats are 091 * transient and are easily constructed on normal operations. 092 * <p> 093 * Note: This object is updated by both heartbeats and changes/updates. 094 */ 095 private final MultiDomainServerState lastAliveCSNs = new MultiDomainServerState(); 096 097 /** Note: This object is updated by replica offline messages. */ 098 private final MultiDomainServerState replicasOffline = new MultiDomainServerState(); 099 100 /** 101 * Cursor across all the replicaDBs for all the replication domains. It is 102 * positioned on the next change that needs to be inserted in the CNIndexDB. 103 * <p> 104 * Note: it is only accessed from the {@link #run()} method. 105 * 106 * @NonNull 107 */ 108 private ECLMultiDomainDBCursor nextChangeForInsertDBCursor; 109 private MultiDomainServerState cookie = new MultiDomainServerState(); 110 111 /** 112 * Builds a ChangeNumberIndexer object. 113 * @param changelogDB 114 * the changelogDB 115 * @param changelogStateProvider 116 * the replication environment information for access to changelog state 117 */ 118 public ChangeNumberIndexer(ChangelogDB changelogDB, ChangelogStateProvider changelogStateProvider) 119 { 120 this(changelogDB, changelogStateProvider, new ECLEnabledDomainPredicate()); 121 } 122 123 /** 124 * Builds a ChangeNumberIndexer object. 125 * @param changelogDB 126 * the changelogDB 127 * @param changelogStateProvider 128 * the changelog state used for initialization 129 * @param predicate 130 */ 131 ChangeNumberIndexer(ChangelogDB changelogDB, ChangelogStateProvider changelogStateProvider, 132 ECLEnabledDomainPredicate predicate) 133 { 134 super("Change number indexer"); 135 this.changelogDB = changelogDB; 136 this.changelogStateProvider = changelogStateProvider; 137 this.predicate = predicate; 138 } 139 140 /** 141 * Ensures the medium consistency point is updated by heartbeats. 142 * 143 * @param baseDN 144 * the baseDN of the domain for which the heartbeat is published 145 * @param heartbeatCSN 146 * the CSN coming from the heartbeat 147 */ 148 public void publishHeartbeat(DN baseDN, CSN heartbeatCSN) 149 { 150 if (!predicate.isECLEnabledDomain(baseDN)) 151 { 152 return; 153 } 154 155 final CSN oldestCSNBefore = getOldestLastAliveCSN(); 156 lastAliveCSNs.update(baseDN, heartbeatCSN); 157 tryNotify(oldestCSNBefore); 158 } 159 160 /** 161 * Indicates if the replica corresponding to provided domain DN and server id 162 * is offline. 163 * 164 * @param domainDN 165 * base DN of the replica 166 * @param serverId 167 * server id of the replica 168 * @return {@code true} if replica is offline, {@code false} otherwise 169 */ 170 public boolean isReplicaOffline(DN domainDN, int serverId) 171 { 172 return replicasOffline.getCSN(domainDN, serverId) != null; 173 } 174 175 /** 176 * Ensures the medium consistency point is updated by UpdateMsg. 177 * 178 * @param baseDN 179 * the baseDN of the domain for which the heartbeat is published 180 * @param updateMsg 181 * the updateMsg that will update the medium consistency point 182 * @throws ChangelogException 183 * If a database problem happened 184 */ 185 public void publishUpdateMsg(DN baseDN, UpdateMsg updateMsg) 186 throws ChangelogException 187 { 188 if (!predicate.isECLEnabledDomain(baseDN)) 189 { 190 return; 191 } 192 193 final CSN oldestCSNBefore = getOldestLastAliveCSN(); 194 lastAliveCSNs.update(baseDN, updateMsg.getCSN()); 195 tryNotify(oldestCSNBefore); 196 } 197 198 /** 199 * Signals a replica went offline. 200 * 201 * @param baseDN 202 * the replica's replication domain 203 * @param offlineCSN 204 * the serverId and time of the replica that went offline 205 */ 206 public void replicaOffline(DN baseDN, CSN offlineCSN) 207 { 208 if (!predicate.isECLEnabledDomain(baseDN)) 209 { 210 return; 211 } 212 213 replicasOffline.update(baseDN, offlineCSN); 214 final CSN oldestCSNBefore = getOldestLastAliveCSN(); 215 lastAliveCSNs.update(baseDN, offlineCSN); 216 tryNotify(oldestCSNBefore); 217 } 218 219 private CSN getOldestLastAliveCSN() 220 { 221 return lastAliveCSNs.getOldestCSNExcluding(replicasOffline).getSecond(); 222 } 223 224 /** 225 * Notifies the Change number indexer thread if it will be able to do some 226 * work. 227 */ 228 private void tryNotify(final CSN oldestCSNBefore) 229 { 230 if (mightMoveForwardMediumConsistencyPoint(oldestCSNBefore)) 231 { 232 synchronized (this) 233 { 234 notify(); 235 } 236 } 237 } 238 239 /** 240 * Used for waking up the {@link ChangeNumberIndexer} thread because it might 241 * have some work to do. 242 */ 243 private boolean mightMoveForwardMediumConsistencyPoint(CSN oldestCSNBefore) 244 { 245 final CSN oldestCSNAfter = getOldestLastAliveCSN(); 246 // ensure that all initial replicas alive information have been updated 247 // with CSNs that are acceptable for moving the medium consistency forward 248 return allInitialReplicasAreOfflineOrAlive() 249 && oldestCSNBefore != null // then oldestCSNAfter cannot be null 250 // has the oldest CSN changed? 251 && oldestCSNBefore.isOlderThan(oldestCSNAfter); 252 } 253 254 /** 255 * Used by the {@link ChangeNumberIndexer} thread to determine whether the CSN 256 * must be persisted to the change number index DB. 257 */ 258 private boolean canMoveForwardMediumConsistencyPoint(CSN nextCSNToPersist) 259 { 260 // ensure that all initial replicas alive information have been updated 261 // with CSNs that are acceptable for moving the medium consistency forward 262 return allInitialReplicasAreOfflineOrAlive() 263 // can we persist the next CSN? 264 && nextCSNToPersist.isOlderThanOrEqualTo(getOldestLastAliveCSN()); 265 } 266 267 /** 268 * Returns true only if the initial replicas known from the changelog state DB 269 * are either: 270 * <ul> 271 * <li>offline, so do not wait for them in order to compute medium consistency 272 * </li> 273 * <li>alive, because we received heartbeats or changes (so their last alive 274 * CSN has been updated to something past the oldest possible CSN), we have 275 * enough info to compute medium consistency</li> 276 * </ul> 277 * In both cases, we have enough information to compute medium consistency 278 * without waiting any further. 279 */ 280 private boolean allInitialReplicasAreOfflineOrAlive() 281 { 282 for (DN baseDN : lastAliveCSNs) 283 { 284 for (CSN csn : lastAliveCSNs.getServerState(baseDN)) 285 { 286 if (csn.getTime() == 0 287 && replicasOffline.getCSN(baseDN, csn.getServerId()) == null) 288 { 289 // this is the oldest possible CSN, but the replica is not offline 290 // we must wait for more up to date information from this replica 291 return false; 292 } 293 } 294 } 295 return true; 296 } 297 298 /** 299 * Restores in memory data needed to build the CNIndexDB. In particular, 300 * initializes the changes cursor to the medium consistency point. 301 */ 302 private void initialize() throws ChangelogException 303 { 304 final ReplicationDomainDB domainDB = changelogDB.getReplicationDomainDB(); 305 306 initializeLastAliveCSNs(domainDB); 307 initializeNextChangeCursor(domainDB); 308 initializeOfflineReplicas(); 309 } 310 311 private void initializeNextChangeCursor(final ReplicationDomainDB domainDB) throws ChangelogException 312 { 313 // Initialize the multi domain cursor only from the change number index record. 314 // The cookie is always empty at this stage. 315 final ChangeNumberIndexRecord newestRecord = changelogDB.getChangeNumberIndexDB().getNewestRecord(); 316 final CSN newestCsn = newestRecord != null ? newestRecord.getCSN() : null; 317 final CursorOptions options = new CursorOptions(LESS_THAN_OR_EQUAL_TO_KEY, ON_MATCHING_KEY, newestCsn); 318 final MultiDomainServerState unused = new MultiDomainServerState(); 319 MultiDomainDBCursor cursorInitializedToMediumConsistencyPoint = domainDB.getCursorFrom(unused, options); 320 321 nextChangeForInsertDBCursor = new ECLMultiDomainDBCursor(predicate, cursorInitializedToMediumConsistencyPoint); 322 ChangelogBackend.updateCookieToMediumConsistencyPoint(cookie, nextChangeForInsertDBCursor, newestRecord); 323 } 324 325 private void initializeLastAliveCSNs(final ReplicationDomainDB domainDB) 326 { 327 for (Entry<DN, Set<Integer>> entry : changelogStateProvider.getChangelogState().getDomainToServerIds().entrySet()) 328 { 329 final DN baseDN = entry.getKey(); 330 if (predicate.isECLEnabledDomain(baseDN)) 331 { 332 for (Integer serverId : entry.getValue()) 333 { 334 /* 335 * initialize with the oldest possible CSN in order for medium 336 * consistency to wait for all replicas to be alive before moving forward 337 */ 338 lastAliveCSNs.update(baseDN, oldestPossibleCSN(serverId)); 339 } 340 341 final ServerState latestKnownState = domainDB.getDomainNewestCSNs(baseDN); 342 lastAliveCSNs.update(baseDN, latestKnownState); 343 } 344 } 345 } 346 347 private void initializeOfflineReplicas() 348 { 349 final MultiDomainServerState offlineReplicas = changelogStateProvider.getChangelogState().getOfflineReplicas(); 350 for (DN baseDN : offlineReplicas) 351 { 352 for (CSN offlineCSN : offlineReplicas.getServerState(baseDN)) 353 { 354 if (predicate.isECLEnabledDomain(baseDN)) 355 { 356 replicasOffline.update(baseDN, offlineCSN); 357 // a replica offline message could also be the very last time 358 // we heard from this replica :) 359 lastAliveCSNs.update(baseDN, offlineCSN); 360 } 361 } 362 } 363 } 364 365 private CSN oldestPossibleCSN(int serverId) 366 { 367 return new CSN(0, 0, serverId); 368 } 369 370 /** {@inheritDoc} */ 371 @Override 372 public void initiateShutdown() 373 { 374 super.initiateShutdown(); 375 synchronized (this) 376 { 377 notify(); 378 } 379 } 380 381 /** {@inheritDoc} */ 382 @Override 383 public void run() 384 { 385 try 386 { 387 /* 388 * initialize here to allow fast application start up and avoid errors due 389 * cursors being created in a different thread to the one where they are used. 390 */ 391 initialize(); 392 393 while (!isShutdownInitiated()) 394 { 395 try 396 { 397 while (!domainsToClear.isEmpty()) 398 { 399 final DN baseDNToClear = domainsToClear.first(); 400 nextChangeForInsertDBCursor.removeDomain(baseDNToClear); 401 // Only release the waiting thread 402 // once this domain's state has been cleared. 403 domainsToClear.remove(baseDNToClear); 404 } 405 if (nextChangeForInsertDBCursor.shouldReInitialize()) 406 { 407 nextChangeForInsertDBCursor.close(); 408 initialize(); 409 } 410 // Do not call DBCursor.next() here 411 // because we might not have consumed the last record, 412 // for example if we could not move the MCP forward 413 final UpdateMsg msg = nextChangeForInsertDBCursor.getRecord(); 414 if (msg == null) 415 { 416 synchronized (this) 417 { 418 if (isShutdownInitiated()) 419 { 420 continue; 421 } 422 wait(); 423 } 424 // check whether new changes have been added to the ReplicaDBs 425 moveToNextChange(); 426 continue; 427 } 428 else if (msg instanceof ReplicaOfflineMsg) 429 { 430 moveToNextChange(); 431 continue; 432 } 433 434 final CSN csn = msg.getCSN(); 435 final DN baseDN = nextChangeForInsertDBCursor.getData(); 436 // FIXME problem: what if the serverId is not part of the ServerState? 437 // right now, change number will be blocked 438 if (!canMoveForwardMediumConsistencyPoint(csn)) 439 { 440 // the oldest record to insert is newer than the medium consistency 441 // point. Let's wait for a change that can be published. 442 synchronized (this) 443 { 444 // double check to protect against a missed call to notify() 445 if (!canMoveForwardMediumConsistencyPoint(csn)) 446 { 447 if (isShutdownInitiated()) 448 { 449 return; 450 } 451 wait(); 452 // loop to check if changes older than the medium consistency 453 // point have been added to the ReplicaDBs 454 continue; 455 } 456 } 457 } 458 459 // OK, the oldest change is older than the medium consistency point 460 // let's publish it to the CNIndexDB. 461 final long changeNumber = changelogDB.getChangeNumberIndexDB() 462 .addRecord(new ChangeNumberIndexRecord(baseDN, csn)); 463 if (!cookie.update(baseDN, csn)) 464 { 465 throw new IllegalStateException("It was expected that change (baseDN=" + baseDN + ", csn=" + csn 466 + ") would have updated the cookie=" + cookie + ", but it did not"); 467 } 468 notifyEntryAddedToChangelog(baseDN, changeNumber, cookie, msg); 469 moveForwardMediumConsistencyPoint(csn, baseDN); 470 } 471 catch (InterruptedException ignored) 472 { 473 // was shutdown called? loop to figure it out. 474 Thread.currentThread().interrupt(); 475 } 476 } 477 } 478 catch (RuntimeException e) 479 { 480 logUnexpectedException(e); 481 // Rely on the DirectoryThread uncaught exceptions handler for logging error + alert. 482 throw e; 483 } 484 catch (Exception e) 485 { 486 logUnexpectedException(e); 487 // Rely on the DirectoryThread uncaught exceptions handler for logging error + alert. 488 throw new RuntimeException(e); 489 } 490 finally 491 { 492 nextChangeForInsertDBCursor.close(); 493 nextChangeForInsertDBCursor = null; 494 } 495 } 496 497 private void moveToNextChange() throws ChangelogException 498 { 499 try 500 { 501 nextChangeForInsertDBCursor.next(); 502 } 503 catch (AbortedChangelogCursorException e) { 504 if (domainsToClear.isEmpty()) 505 { 506 // There is no domain to clear, thus it is 507 // not expected that a cursor is aborted 508 throw e; 509 } 510 // else assumes the aborted cursor is part of a domain 511 // that will be removed on the next iteration 512 logger.trace("Cursor was aborted: %s, but continuing because domainsToClear has size %s", 513 e, domainsToClear.size()); 514 } 515 } 516 517 /** 518 * Notifies the {@link ChangelogBackend} that a new entry has been added. 519 * 520 * @param baseDN 521 * the baseDN of the newly added entry. 522 * @param changeNumber 523 * the change number of the newly added entry. It will be greater 524 * than zero for entries added to the change number index and less 525 * than or equal to zero for entries added to any replica DB 526 * @param cookie 527 * the cookie of the newly added entry. This is only meaningful for 528 * entries added to the change number index 529 * @param msg 530 * the update message of the newly added entry 531 * @throws ChangelogException 532 * If a problem occurs while notifying of the newly added entry. 533 */ 534 protected void notifyEntryAddedToChangelog(DN baseDN, long changeNumber, 535 MultiDomainServerState cookie, UpdateMsg msg) throws ChangelogException 536 { 537 ChangelogBackend.getInstance().notifyChangeNumberEntryAdded(baseDN, changeNumber, cookie.toString(), msg); 538 } 539 540 /** 541 * Nothing can be done about it. 542 * <p> 543 * Rely on the DirectoryThread uncaught exceptions handler for logging error + 544 * alert. 545 * <p> 546 * Message logged here gives corrective information to the administrator. 547 */ 548 private void logUnexpectedException(Exception e) 549 { 550 logger.trace(ERR_CHANGE_NUMBER_INDEXER_UNEXPECTED_EXCEPTION, 551 getClass().getSimpleName(), stackTraceToSingleLineString(e)); 552 } 553 554 private void moveForwardMediumConsistencyPoint(final CSN mcCSN, final DN mcBaseDN) throws ChangelogException 555 { 556 final int mcServerId = mcCSN.getServerId(); 557 final CSN offlineCSN = replicasOffline.getCSN(mcBaseDN, mcServerId); 558 final CSN lastAliveCSN = lastAliveCSNs.getCSN(mcBaseDN, mcServerId); 559 if (offlineCSN != null) 560 { 561 if (lastAliveCSN != null && offlineCSN.isOlderThan(lastAliveCSN)) 562 { 563 // replica is back online, we can forget the last time it was offline 564 replicasOffline.removeCSN(mcBaseDN, offlineCSN); 565 } 566 else if (offlineCSN.isOlderThan(mcCSN)) 567 { 568 /* 569 * replica is not back online, Medium consistency point has gone past 570 * its last offline time, and there are no more changes after the 571 * offline CSN in the cursor: remove everything known about it 572 * (offlineCSN from lastAliveCSN and remove all knowledge of this replica 573 * from the medium consistency RUV). 574 */ 575 lastAliveCSNs.removeCSN(mcBaseDN, offlineCSN); 576 } 577 } 578 579 // advance the cursor we just read from, 580 // success/failure will be checked later 581 nextChangeForInsertDBCursor.next(); 582 } 583 584 /** 585 * Asks the current thread to clear its state for the specified domain. 586 * <p> 587 * Note: This method blocks the current thread until state is cleared. 588 * 589 * @param baseDN the baseDN to be cleared from this thread's state. 590 * {@code null} and {@link DN#NULL_DN} mean "clear all domains". 591 */ 592 public void clear(DN baseDN) 593 { 594 // Use DN.NULL_DN to say "clear all domains" 595 final DN baseDNToClear = baseDN != null ? baseDN : DN.NULL_DN; 596 domainsToClear.add(baseDNToClear); 597 while (domainsToClear.contains(baseDNToClear) 598 && !State.TERMINATED.equals(getState())) 599 { 600 // wait until clear() has been done by thread, always waking it up 601 synchronized (this) 602 { 603 notify(); 604 } 605 // ensures thread wait that this thread's state is cleaned up 606 Thread.yield(); 607 } 608 } 609 610}