001/*
002 * CDDL HEADER START
003 *
004 * The contents of this file are subject to the terms of the
005 * Common Development and Distribution License, Version 1.0 only
006 * (the "License").  You may not use this file except in compliance
007 * with the License.
008 *
009 * You can obtain a copy of the license at legal-notices/CDDLv1_0.txt
010 * or http://forgerock.org/license/CDDLv1.0.html.
011 * See the License for the specific language governing permissions
012 * and limitations under the License.
013 *
014 * When distributing Covered Code, include this CDDL HEADER in each
015 * file and include the License file at legal-notices/CDDLv1_0.txt.
016 * If applicable, add the following below this CDDL HEADER, with the
017 * fields enclosed by brackets "[]" replaced with your own identifying
018 * information:
019 *      Portions Copyright [yyyy] [name of copyright owner]
020 *
021 * CDDL HEADER END
022 *
023 *
024 *      Copyright 2013-2015 ForgeRock AS
025 */
026package org.opends.server.replication.server.changelog.file;
027
028import java.util.Map.Entry;
029import java.util.Set;
030import java.util.concurrent.ConcurrentSkipListSet;
031
032import org.forgerock.i18n.slf4j.LocalizedLogger;
033import org.opends.server.api.DirectoryThread;
034import org.opends.server.backends.ChangelogBackend;
035import org.opends.server.replication.common.CSN;
036import org.opends.server.replication.common.MultiDomainServerState;
037import org.opends.server.replication.common.ServerState;
038import org.opends.server.replication.protocol.ReplicaOfflineMsg;
039import org.opends.server.replication.protocol.UpdateMsg;
040import org.opends.server.replication.server.changelog.api.AbortedChangelogCursorException;
041import org.opends.server.replication.server.changelog.api.ChangeNumberIndexRecord;
042import org.opends.server.replication.server.changelog.api.ChangelogDB;
043import org.opends.server.replication.server.changelog.api.ChangelogException;
044import org.opends.server.replication.server.changelog.api.DBCursor.CursorOptions;
045import org.opends.server.replication.server.changelog.api.ReplicationDomainDB;
046import org.opends.server.replication.server.changelog.api.ChangelogStateProvider;
047import org.opends.server.types.DN;
048
049import static org.opends.messages.ReplicationMessages.*;
050import static org.opends.server.replication.server.changelog.api.DBCursor.KeyMatchingStrategy.*;
051import static org.opends.server.replication.server.changelog.api.DBCursor.PositionStrategy.*;
052import static org.opends.server.util.StaticUtils.*;
053
054/**
055 * Thread responsible for inserting replicated changes into the ChangeNumber
056 * Index DB (CNIndexDB for short).
057 * <p>
058 * Only changes older than the medium consistency point are inserted in the
059 * CNIndexDB. As a consequence this class is also responsible for maintaining
060 * the medium consistency point (indirectly through an
061 * {@link ECLMultiDomainDBCursor}).
062 */
063public class ChangeNumberIndexer extends DirectoryThread
064{
065  /** The tracer object for the debug logger. */
066  private static final LocalizedLogger logger = LocalizedLogger.getLoggerForThisClass();
067
068  /**
069   * If it contains nothing, then the run method executes normally.
070   * Otherwise, the {@link #run()} method must clear its state
071   * for the supplied domain baseDNs. If a supplied domain is
072   * {@link DN#NULL_DN}, then all domains will be cleared.
073   */
074  private final ConcurrentSkipListSet<DN> domainsToClear = new ConcurrentSkipListSet<>();
075  private final ChangelogDB changelogDB;
076  private final ChangelogStateProvider changelogStateProvider;
077  private final ECLEnabledDomainPredicate predicate;
078
079  /*
080   * The following MultiDomainServerState fields must be thread safe, because
081   * 1) initialization can happen while the replication server starts receiving
082   * updates
083   * 2) many updates can happen concurrently.
084   */
085  /**
086   * Holds the last time each replica was seen alive, whether via updates or
087   * heartbeat notifications, or offline notifications. Data is held for each
088   * serverId cross domain.
089   * <p>
090   * Updates are persistent and stored in the replicaDBs, heartbeats are
091   * transient and are easily constructed on normal operations.
092   * <p>
093   * Note: This object is updated by both heartbeats and changes/updates.
094   */
095  private final MultiDomainServerState lastAliveCSNs = new MultiDomainServerState();
096
097  /** Note: This object is updated by replica offline messages. */
098  private final MultiDomainServerState replicasOffline = new MultiDomainServerState();
099
100  /**
101   * Cursor across all the replicaDBs for all the replication domains. It is
102   * positioned on the next change that needs to be inserted in the CNIndexDB.
103   * <p>
104   * Note: it is only accessed from the {@link #run()} method.
105   *
106   * @NonNull
107   */
108  private ECLMultiDomainDBCursor nextChangeForInsertDBCursor;
109  private MultiDomainServerState cookie = new MultiDomainServerState();
110
111  /**
112   * Builds a ChangeNumberIndexer object.
113   *  @param changelogDB
114   *          the changelogDB
115   * @param changelogStateProvider
116   *          the replication environment information for access to changelog state
117   */
118  public ChangeNumberIndexer(ChangelogDB changelogDB, ChangelogStateProvider changelogStateProvider)
119  {
120    this(changelogDB, changelogStateProvider, new ECLEnabledDomainPredicate());
121  }
122
123  /**
124   * Builds a ChangeNumberIndexer object.
125   * @param changelogDB
126   *          the changelogDB
127   * @param changelogStateProvider
128   *          the changelog state used for initialization
129   * @param predicate
130   */
131  ChangeNumberIndexer(ChangelogDB changelogDB, ChangelogStateProvider changelogStateProvider,
132      ECLEnabledDomainPredicate predicate)
133  {
134    super("Change number indexer");
135    this.changelogDB = changelogDB;
136    this.changelogStateProvider = changelogStateProvider;
137    this.predicate = predicate;
138  }
139
140  /**
141   * Ensures the medium consistency point is updated by heartbeats.
142   *
143   * @param baseDN
144   *          the baseDN of the domain for which the heartbeat is published
145   * @param heartbeatCSN
146   *          the CSN coming from the heartbeat
147   */
148  public void publishHeartbeat(DN baseDN, CSN heartbeatCSN)
149  {
150    if (!predicate.isECLEnabledDomain(baseDN))
151    {
152      return;
153    }
154
155    final CSN oldestCSNBefore = getOldestLastAliveCSN();
156    lastAliveCSNs.update(baseDN, heartbeatCSN);
157    tryNotify(oldestCSNBefore);
158  }
159
160  /**
161   * Indicates if the replica corresponding to provided domain DN and server id
162   * is offline.
163   *
164   * @param domainDN
165   *          base DN of the replica
166   * @param serverId
167   *          server id of the replica
168   * @return {@code true} if replica is offline, {@code false} otherwise
169   */
170  public boolean isReplicaOffline(DN domainDN, int serverId)
171  {
172    return replicasOffline.getCSN(domainDN, serverId) != null;
173  }
174
175  /**
176   * Ensures the medium consistency point is updated by UpdateMsg.
177   *
178   * @param baseDN
179   *          the baseDN of the domain for which the heartbeat is published
180   * @param updateMsg
181   *          the updateMsg that will update the medium consistency point
182   * @throws ChangelogException
183   *           If a database problem happened
184   */
185  public void publishUpdateMsg(DN baseDN, UpdateMsg updateMsg)
186      throws ChangelogException
187  {
188    if (!predicate.isECLEnabledDomain(baseDN))
189    {
190      return;
191    }
192
193    final CSN oldestCSNBefore = getOldestLastAliveCSN();
194    lastAliveCSNs.update(baseDN, updateMsg.getCSN());
195    tryNotify(oldestCSNBefore);
196  }
197
198  /**
199   * Signals a replica went offline.
200   *
201   * @param baseDN
202   *          the replica's replication domain
203   * @param offlineCSN
204   *          the serverId and time of the replica that went offline
205   */
206  public void replicaOffline(DN baseDN, CSN offlineCSN)
207  {
208    if (!predicate.isECLEnabledDomain(baseDN))
209    {
210      return;
211    }
212
213    replicasOffline.update(baseDN, offlineCSN);
214    final CSN oldestCSNBefore = getOldestLastAliveCSN();
215    lastAliveCSNs.update(baseDN, offlineCSN);
216    tryNotify(oldestCSNBefore);
217  }
218
219  private CSN getOldestLastAliveCSN()
220  {
221    return lastAliveCSNs.getOldestCSNExcluding(replicasOffline).getSecond();
222  }
223
224  /**
225   * Notifies the Change number indexer thread if it will be able to do some
226   * work.
227   */
228  private void tryNotify(final CSN oldestCSNBefore)
229  {
230    if (mightMoveForwardMediumConsistencyPoint(oldestCSNBefore))
231    {
232      synchronized (this)
233      {
234        notify();
235      }
236    }
237  }
238
239  /**
240   * Used for waking up the {@link ChangeNumberIndexer} thread because it might
241   * have some work to do.
242   */
243  private boolean mightMoveForwardMediumConsistencyPoint(CSN oldestCSNBefore)
244  {
245    final CSN oldestCSNAfter = getOldestLastAliveCSN();
246    // ensure that all initial replicas alive information have been updated
247    // with CSNs that are acceptable for moving the medium consistency forward
248    return allInitialReplicasAreOfflineOrAlive()
249        && oldestCSNBefore != null // then oldestCSNAfter cannot be null
250        // has the oldest CSN changed?
251        && oldestCSNBefore.isOlderThan(oldestCSNAfter);
252  }
253
254  /**
255   * Used by the {@link ChangeNumberIndexer} thread to determine whether the CSN
256   * must be persisted to the change number index DB.
257   */
258  private boolean canMoveForwardMediumConsistencyPoint(CSN nextCSNToPersist)
259  {
260    // ensure that all initial replicas alive information have been updated
261    // with CSNs that are acceptable for moving the medium consistency forward
262    return allInitialReplicasAreOfflineOrAlive()
263        // can we persist the next CSN?
264        && nextCSNToPersist.isOlderThanOrEqualTo(getOldestLastAliveCSN());
265  }
266
267  /**
268   * Returns true only if the initial replicas known from the changelog state DB
269   * are either:
270   * <ul>
271   * <li>offline, so do not wait for them in order to compute medium consistency
272   * </li>
273   * <li>alive, because we received heartbeats or changes (so their last alive
274   * CSN has been updated to something past the oldest possible CSN), we have
275   * enough info to compute medium consistency</li>
276   * </ul>
277   * In both cases, we have enough information to compute medium consistency
278   * without waiting any further.
279   */
280  private boolean allInitialReplicasAreOfflineOrAlive()
281  {
282    for (DN baseDN : lastAliveCSNs)
283    {
284      for (CSN csn : lastAliveCSNs.getServerState(baseDN))
285      {
286        if (csn.getTime() == 0
287            && replicasOffline.getCSN(baseDN, csn.getServerId()) == null)
288        {
289          // this is the oldest possible CSN, but the replica is not offline
290          // we must wait for more up to date information from this replica
291          return false;
292        }
293      }
294    }
295    return true;
296  }
297
298  /**
299   * Restores in memory data needed to build the CNIndexDB. In particular,
300   * initializes the changes cursor to the medium consistency point.
301   */
302  private void initialize() throws ChangelogException
303  {
304    final ReplicationDomainDB domainDB = changelogDB.getReplicationDomainDB();
305
306    initializeLastAliveCSNs(domainDB);
307    initializeNextChangeCursor(domainDB);
308    initializeOfflineReplicas();
309  }
310
311  private void initializeNextChangeCursor(final ReplicationDomainDB domainDB) throws ChangelogException
312  {
313    // Initialize the multi domain cursor only from the change number index record.
314    // The cookie is always empty at this stage.
315    final ChangeNumberIndexRecord newestRecord = changelogDB.getChangeNumberIndexDB().getNewestRecord();
316    final CSN newestCsn = newestRecord != null ? newestRecord.getCSN() : null;
317    final CursorOptions options = new CursorOptions(LESS_THAN_OR_EQUAL_TO_KEY, ON_MATCHING_KEY, newestCsn);
318    final MultiDomainServerState unused = new MultiDomainServerState();
319    MultiDomainDBCursor cursorInitializedToMediumConsistencyPoint = domainDB.getCursorFrom(unused, options);
320
321    nextChangeForInsertDBCursor = new ECLMultiDomainDBCursor(predicate, cursorInitializedToMediumConsistencyPoint);
322    ChangelogBackend.updateCookieToMediumConsistencyPoint(cookie, nextChangeForInsertDBCursor, newestRecord);
323  }
324
325  private void initializeLastAliveCSNs(final ReplicationDomainDB domainDB)
326  {
327    for (Entry<DN, Set<Integer>> entry : changelogStateProvider.getChangelogState().getDomainToServerIds().entrySet())
328    {
329      final DN baseDN = entry.getKey();
330      if (predicate.isECLEnabledDomain(baseDN))
331      {
332        for (Integer serverId : entry.getValue())
333        {
334          /*
335           * initialize with the oldest possible CSN in order for medium
336           * consistency to wait for all replicas to be alive before moving forward
337           */
338          lastAliveCSNs.update(baseDN, oldestPossibleCSN(serverId));
339        }
340
341        final ServerState latestKnownState = domainDB.getDomainNewestCSNs(baseDN);
342        lastAliveCSNs.update(baseDN, latestKnownState);
343      }
344    }
345  }
346
347  private void initializeOfflineReplicas()
348  {
349    final MultiDomainServerState offlineReplicas = changelogStateProvider.getChangelogState().getOfflineReplicas();
350    for (DN baseDN : offlineReplicas)
351    {
352      for (CSN offlineCSN : offlineReplicas.getServerState(baseDN))
353      {
354        if (predicate.isECLEnabledDomain(baseDN))
355        {
356          replicasOffline.update(baseDN, offlineCSN);
357          // a replica offline message could also be the very last time
358          // we heard from this replica :)
359          lastAliveCSNs.update(baseDN, offlineCSN);
360        }
361      }
362    }
363  }
364
365  private CSN oldestPossibleCSN(int serverId)
366  {
367    return new CSN(0, 0, serverId);
368  }
369
370  /** {@inheritDoc} */
371  @Override
372  public void initiateShutdown()
373  {
374    super.initiateShutdown();
375    synchronized (this)
376    {
377      notify();
378    }
379  }
380
381  /** {@inheritDoc} */
382  @Override
383  public void run()
384  {
385    try
386    {
387      /*
388       * initialize here to allow fast application start up and avoid errors due
389       * cursors being created in a different thread to the one where they are used.
390       */
391      initialize();
392
393      while (!isShutdownInitiated())
394      {
395        try
396        {
397          while (!domainsToClear.isEmpty())
398          {
399            final DN baseDNToClear = domainsToClear.first();
400            nextChangeForInsertDBCursor.removeDomain(baseDNToClear);
401            // Only release the waiting thread
402            // once this domain's state has been cleared.
403            domainsToClear.remove(baseDNToClear);
404          }
405          if (nextChangeForInsertDBCursor.shouldReInitialize())
406          {
407            nextChangeForInsertDBCursor.close();
408            initialize();
409          }
410          // Do not call DBCursor.next() here
411          // because we might not have consumed the last record,
412          // for example if we could not move the MCP forward
413          final UpdateMsg msg = nextChangeForInsertDBCursor.getRecord();
414          if (msg == null)
415          {
416            synchronized (this)
417            {
418              if (isShutdownInitiated())
419              {
420                continue;
421              }
422              wait();
423            }
424            // check whether new changes have been added to the ReplicaDBs
425            moveToNextChange();
426            continue;
427          }
428          else if (msg instanceof ReplicaOfflineMsg)
429          {
430            moveToNextChange();
431            continue;
432          }
433
434          final CSN csn = msg.getCSN();
435          final DN baseDN = nextChangeForInsertDBCursor.getData();
436          // FIXME problem: what if the serverId is not part of the ServerState?
437          // right now, change number will be blocked
438          if (!canMoveForwardMediumConsistencyPoint(csn))
439          {
440            // the oldest record to insert is newer than the medium consistency
441            // point. Let's wait for a change that can be published.
442            synchronized (this)
443            {
444              // double check to protect against a missed call to notify()
445              if (!canMoveForwardMediumConsistencyPoint(csn))
446              {
447                if (isShutdownInitiated())
448                {
449                  return;
450                }
451                wait();
452                // loop to check if changes older than the medium consistency
453                // point have been added to the ReplicaDBs
454                continue;
455              }
456            }
457          }
458
459          // OK, the oldest change is older than the medium consistency point
460          // let's publish it to the CNIndexDB.
461          final long changeNumber = changelogDB.getChangeNumberIndexDB()
462              .addRecord(new ChangeNumberIndexRecord(baseDN, csn));
463          if (!cookie.update(baseDN, csn))
464          {
465            throw new IllegalStateException("It was expected that change (baseDN=" + baseDN + ", csn=" + csn
466                + ") would have updated the cookie=" + cookie + ", but it did not");
467          }
468          notifyEntryAddedToChangelog(baseDN, changeNumber, cookie, msg);
469          moveForwardMediumConsistencyPoint(csn, baseDN);
470        }
471        catch (InterruptedException ignored)
472        {
473          // was shutdown called? loop to figure it out.
474          Thread.currentThread().interrupt();
475        }
476      }
477    }
478    catch (RuntimeException e)
479    {
480      logUnexpectedException(e);
481      // Rely on the DirectoryThread uncaught exceptions handler for logging error + alert.
482      throw e;
483    }
484    catch (Exception e)
485    {
486      logUnexpectedException(e);
487      // Rely on the DirectoryThread uncaught exceptions handler for logging error + alert.
488      throw new RuntimeException(e);
489    }
490    finally
491    {
492      nextChangeForInsertDBCursor.close();
493      nextChangeForInsertDBCursor = null;
494    }
495  }
496
497  private void moveToNextChange() throws ChangelogException
498  {
499    try
500    {
501      nextChangeForInsertDBCursor.next();
502    }
503    catch (AbortedChangelogCursorException e) {
504      if (domainsToClear.isEmpty())
505      {
506        // There is no domain to clear, thus it is
507        // not expected that a cursor is aborted
508        throw e;
509      }
510      // else assumes the aborted cursor is part of a domain
511      // that will be removed on the next iteration
512      logger.trace("Cursor was aborted: %s, but continuing because domainsToClear has size %s",
513          e, domainsToClear.size());
514    }
515  }
516
517  /**
518   * Notifies the {@link ChangelogBackend} that a new entry has been added.
519   *
520   * @param baseDN
521   *          the baseDN of the newly added entry.
522   * @param changeNumber
523   *          the change number of the newly added entry. It will be greater
524   *          than zero for entries added to the change number index and less
525   *          than or equal to zero for entries added to any replica DB
526   * @param cookie
527   *          the cookie of the newly added entry. This is only meaningful for
528   *          entries added to the change number index
529   * @param msg
530   *          the update message of the newly added entry
531   * @throws ChangelogException
532   *           If a problem occurs while notifying of the newly added entry.
533   */
534  protected void notifyEntryAddedToChangelog(DN baseDN, long changeNumber,
535      MultiDomainServerState cookie, UpdateMsg msg) throws ChangelogException
536  {
537    ChangelogBackend.getInstance().notifyChangeNumberEntryAdded(baseDN, changeNumber, cookie.toString(), msg);
538  }
539
540  /**
541   * Nothing can be done about it.
542   * <p>
543   * Rely on the DirectoryThread uncaught exceptions handler for logging error +
544   * alert.
545   * <p>
546   * Message logged here gives corrective information to the administrator.
547   */
548  private void logUnexpectedException(Exception e)
549  {
550    logger.trace(ERR_CHANGE_NUMBER_INDEXER_UNEXPECTED_EXCEPTION,
551        getClass().getSimpleName(), stackTraceToSingleLineString(e));
552  }
553
554  private void moveForwardMediumConsistencyPoint(final CSN mcCSN, final DN mcBaseDN) throws ChangelogException
555  {
556    final int mcServerId = mcCSN.getServerId();
557    final CSN offlineCSN = replicasOffline.getCSN(mcBaseDN, mcServerId);
558    final CSN lastAliveCSN = lastAliveCSNs.getCSN(mcBaseDN, mcServerId);
559    if (offlineCSN != null)
560    {
561      if (lastAliveCSN != null && offlineCSN.isOlderThan(lastAliveCSN))
562      {
563        // replica is back online, we can forget the last time it was offline
564        replicasOffline.removeCSN(mcBaseDN, offlineCSN);
565      }
566      else if (offlineCSN.isOlderThan(mcCSN))
567      {
568        /*
569         * replica is not back online, Medium consistency point has gone past
570         * its last offline time, and there are no more changes after the
571         * offline CSN in the cursor: remove everything known about it
572         * (offlineCSN from lastAliveCSN and remove all knowledge of this replica
573         * from the medium consistency RUV).
574         */
575        lastAliveCSNs.removeCSN(mcBaseDN, offlineCSN);
576      }
577    }
578
579    // advance the cursor we just read from,
580    // success/failure will be checked later
581    nextChangeForInsertDBCursor.next();
582  }
583
584  /**
585   * Asks the current thread to clear its state for the specified domain.
586   * <p>
587   * Note: This method blocks the current thread until state is cleared.
588   *
589   * @param baseDN the baseDN to be cleared from this thread's state.
590   *               {@code null} and {@link DN#NULL_DN} mean "clear all domains".
591   */
592  public void clear(DN baseDN)
593  {
594    // Use DN.NULL_DN to say "clear all domains"
595    final DN baseDNToClear = baseDN != null ? baseDN : DN.NULL_DN;
596    domainsToClear.add(baseDNToClear);
597    while (domainsToClear.contains(baseDNToClear)
598        && !State.TERMINATED.equals(getState()))
599    {
600      // wait until clear() has been done by thread, always waking it up
601      synchronized (this)
602      {
603        notify();
604      }
605      // ensures thread wait that this thread's state is cleaned up
606      Thread.yield();
607    }
608  }
609
610}