001/*
002 * The contents of this file are subject to the terms of the Common Development and
003 * Distribution License (the License). You may not use this file except in compliance with the
004 * License.
005 *
006 * You can obtain a copy of the License at legal/CDDLv1.0.txt. See the License for the
007 * specific language governing permission and limitations under the License.
008 *
009 * When distributing Covered Software, include this CDDL Header Notice in each file and include
010 * the License file at legal/CDDLv1.0.txt. If applicable, add the following below the CDDL
011 * Header, with the fields enclosed by brackets [] replaced by your own identifying
012 * information: "Portions Copyright [year] [name of copyright owner]".
013 *
014 * Copyright 2015-2016 ForgeRock AS.
015 */
016package org.opends.server.types;
017
018import java.util.Iterator;
019import java.util.LinkedList;
020import java.util.concurrent.TimeUnit;
021import java.util.concurrent.atomic.AtomicInteger;
022import java.util.concurrent.locks.Lock;
023import java.util.concurrent.locks.ReentrantReadWriteLock;
024
025import org.forgerock.opendj.ldap.DN;
026import org.forgerock.util.Reject;
027
028/**
029 * A lock manager coordinates directory update operations so that the DIT structure remains in a
030 * consistent state, as well as providing repeatable read isolation. When accessing entries
031 * components need to ensure that they have the appropriate lock:
032 * <ul>
033 * <li>repeatable reads: repeatable read isolation is rarely needed in practice, since all backend
034 * reads are guaranteed to be performed with read-committed isolation, which is normally sufficient.
035 * Specifically, read-only operations such as compare and search do not require any additional
036 * locking. If repeatable read isolation is required then lock the entry using
037 * {@link #tryReadLockEntry(DN)}
038 * <li>modifying an entry: acquire an entry write-lock for the target entry using
039 * {@link #tryWriteLockEntry(DN)}. Updates are typically performed using a read-modify-write cycle,
040 * so the write lock should be acquired before performing the initial read in order to ensure
041 * consistency
042 * <li>adding an entry: client code must acquire an entry write-lock for the target entry using
043 * {@link #tryWriteLockEntry(DN)}. The parent entry will automatically be protected from deletion by
044 * an implicit subtree read lock on the parent
045 * <li>deleting an entry: client code must acquire a subtree write lock for the target entry using
046 * {@link #tryWriteLockSubtree(DN)}
047 * <li>renaming an entry: client code must acquire a subtree write lock for the old entry, and a
048 * subtree write lock for the new entry using {@link #tryWriteLockSubtree(DN)}. Care should be taken
049 * to avoid deadlocks, e.g. by locking the DN which sorts first.
050 * </ul>
051 * In addition, backend implementations may choose to use their own lock manager for enforcing
052 * atomicity and isolation. This is typically the case for backends which cannot take advantage of
053 * atomicity guarantees provided by an underlying DB (the task backend is one such example).
054 * <p>
055 * <b>Implementation Notes</b>
056 * <p>
057 * The lock table is conceptually a cache of locks keyed on DN, i.e. a {@code Map<DN, DNLock>}.
058 * Locks must be kept in the cache while they are locked, but may be removed once they are no longer
059 * locked by any threads. Locks are represented using a pair of read-write locks: the first lock is
060 * the "subtree" lock and the second is the "entry" lock.
061 * <p>
062 * In order to lock an entry for read or write a <b>subtree</b> read lock is first acquired on each
063 * of the parent entries from the root DN down to the immediate parent of the entry to be locked.
064 * Then the appropriate read or write <b>entry</b> lock is acquired for the target entry. Subtree
065 * write locking is performed by acquiring a <b>subtree</b> read lock on each of the parent entries
066 * from the root DN down to the immediate parent of the subtree to be locked. Then a <b>subtree</b>
067 * write lock is acquired for the target subtree.
068 * <p>
069 * The lock table itself is not represented using a {@code ConcurrentHashMap} because the JDK6/7
070 * APIs do not provide the ability to atomically add-and-lock or unlock-and-remove locks (this
071 * capability is provided in JDK8). Instead, we provide our own implementation comprising of a fixed
072 * number of buckets, a bucket being a {@code LinkedList} of {@code DNLock}s. In addition, it is
073 * important to be able to efficiently iterate up and down a chain of hierarchically related locks,
074 * so each lock maintains a reference to its parent lock. Modern directories tend to have a flat
075 * structure so it is also important to avoid contention on "hot" parent DNs. Typically, a lock
076 * attempt against a DN will involve a cache miss for the target DN and a cache hit for the parent,
077 * but the parent will be the same parent for all lock requests, resulting in a lot of contention on
078 * the same lock bucket. To avoid this the lock manager maintains a small-thread local cache of
079 * locks, so that parent locks can be acquired using a lock-free algorithm.
080 * <p>
081 * Since the thread local cache may reference locks which are not actively locked by anyone, a
082 * reference counting mechanism is used in order to prevent cached locks from being removed from the
083 * underlying lock table. The reference counting mechanism is also used for references between a
084 * lock and its parent lock. To summarize, locking a DN involves the following steps:
085 * <ul>
086 * <li>get the lock from the thread local cache. If the lock was not in the thread local cache then
087 * try fetching it from the lock table:
088 * <ul>
089 * <li><i>found</i> - store it in the thread local cache and bump the reference count
090 * <li><i>not found</i> - create a new lock. First fetch the parent lock using the same process,
091 * i.e. looking in the thread local cache, etc. Then create a new lock referencing the parent lock
092 * (bumps the reference count for the parent lock), and store it in the lock table and the thread
093 * local cache with a reference count of 1.
094 * </ul>
095 * <li>return the lock to the application and increment its reference count since the application
096 * now also has a reference to the lock.
097 * </ul>
098 * Locks are dereferenced when they are unlocked, when they are evicted from a thread local cache,
099 * and when a child lock's reference count reaches zero. A lock is completely removed from the lock
100 * table once its reference count reaches zero.
101 */
102@org.opends.server.types.PublicAPI(stability = org.opends.server.types.StabilityLevel.UNCOMMITTED,
103    mayInstantiate = false, mayExtend = false, mayInvoke = true)
104public final class LockManager
105{
106  /**
107   * A lock on an entry or subtree. A lock can only be unlocked once.
108   */
109  public final class DNLock
110  {
111    private final DNLockHolder lock;
112    private final Lock subtreeLock;
113    private final Lock entryLock;
114    private boolean isLocked = true;
115
116    private DNLock(final DNLockHolder lock, final Lock subtreeLock, final Lock entryLock)
117    {
118      this.lock = lock;
119      this.subtreeLock = subtreeLock;
120      this.entryLock = entryLock;
121    }
122
123    @Override
124    public String toString()
125    {
126      return lock.toString();
127    }
128
129    /**
130     * Unlocks this lock and releases any blocked threads.
131     *
132     * @throws IllegalStateException
133     *           If this lock has already been unlocked.
134     */
135    public void unlock()
136    {
137      if (!isLocked)
138      {
139        throw new IllegalStateException("Already unlocked");
140      }
141      lock.releaseParentSubtreeReadLock();
142      subtreeLock.unlock();
143      entryLock.unlock();
144      dereference(lock);
145      isLocked = false;
146    }
147
148    // For unit testing.
149    int refCount()
150    {
151      return lock.refCount.get();
152    }
153  }
154
155  /**
156   * Lock implementation
157   */
158  private final class DNLockHolder
159  {
160    private final AtomicInteger refCount = new AtomicInteger();
161    private final DNLockHolder parent;
162    private final DN dn;
163    private final int dnHashCode;
164    private final ReentrantReadWriteLock subtreeLock = new ReentrantReadWriteLock();
165    private final ReentrantReadWriteLock entryLock = new ReentrantReadWriteLock();
166
167    DNLockHolder(final DNLockHolder parent, final DN dn, final int dnHashCode)
168    {
169      this.parent = parent;
170      this.dn = dn;
171      this.dnHashCode = dnHashCode;
172    }
173
174    @Override
175    public String toString()
176    {
177      return "\"" + dn + "\" : " + refCount;
178    }
179
180    /**
181     * Unlocks the subtree read lock from the parent of this lock up to the root.
182     */
183    void releaseParentSubtreeReadLock()
184    {
185      for (DNLockHolder lock = parent; lock != null; lock = lock.parent)
186      {
187        lock.subtreeLock.readLock().unlock();
188      }
189    }
190
191    DNLock tryReadLockEntry()
192    {
193      return tryLock(subtreeLock.readLock(), entryLock.readLock());
194    }
195
196    DNLock tryWriteLockEntry()
197    {
198      return tryLock(subtreeLock.readLock(), entryLock.writeLock());
199    }
200
201    DNLock tryWriteLockSubtree()
202    {
203      return tryLock(subtreeLock.writeLock(), entryLock.writeLock());
204    }
205
206    /**
207     * Locks the subtree read lock from the root down to the parent of this lock.
208     */
209    private boolean tryAcquireParentSubtreeReadLock()
210    {
211      // First lock the parents of the parent.
212      if (parent == null)
213      {
214        return true;
215      }
216
217      if (!parent.tryAcquireParentSubtreeReadLock())
218      {
219        return false;
220      }
221
222      // Then lock the parent of this lock
223      if (tryLockWithTimeout(parent.subtreeLock.readLock()))
224      {
225        return true;
226      }
227
228      // Failed to grab the parent lock within the timeout, so roll-back the other locks.
229      releaseParentSubtreeReadLock();
230      return false;
231    }
232
233    private DNLock tryLock(final Lock subtreeLock, final Lock entryLock)
234    {
235      if (tryAcquireParentSubtreeReadLock())
236      {
237        if (tryLockWithTimeout(subtreeLock))
238        {
239          if (tryLockWithTimeout(entryLock))
240          {
241            return new DNLock(this, subtreeLock, entryLock);
242          }
243          subtreeLock.unlock();
244        }
245        releaseParentSubtreeReadLock();
246      }
247      // Failed to acquire all the necessary locks within the time out.
248      dereference(this);
249      return null;
250    }
251
252    private boolean tryLockWithTimeout(final Lock lock)
253    {
254      try
255      {
256        return lock.tryLock(lockTimeout, lockTimeoutUnits);
257      }
258      catch (final InterruptedException e)
259      {
260        // Unable to handle interrupts here.
261        Thread.currentThread().interrupt();
262        return false;
263      }
264    }
265  }
266
267  private static final long DEFAULT_LOCK_TIMEOUT = 9;
268  private static final TimeUnit DEFAULT_LOCK_TIMEOUT_UNITS = TimeUnit.SECONDS;
269  private static final int MINIMUM_NUMBER_OF_BUCKETS = 64;
270  private static final int THREAD_LOCAL_CACHE_SIZE = 8;
271
272  private final int numberOfBuckets;
273  private final LinkedList<DNLockHolder>[] lockTable;
274  private final long lockTimeout;
275  private final TimeUnit lockTimeoutUnits;
276
277  // Avoid sub-classing in order to workaround class leaks in app servers.
278  private final ThreadLocal<LinkedList<DNLockHolder>> threadLocalCache = new ThreadLocal<>();
279
280  /**
281   * Creates a new lock manager with a lock timeout of 9 seconds and an automatically chosen number
282   * of lock table buckets based on the number of processors.
283   */
284  public LockManager()
285  {
286    this(DEFAULT_LOCK_TIMEOUT, DEFAULT_LOCK_TIMEOUT_UNITS);
287  }
288
289  /**
290   * Creates a new lock manager with the specified lock timeout and an automatically chosen number
291   * of lock table buckets based on the number of processors.
292   *
293   * @param lockTimeout
294   *          The lock timeout.
295   * @param lockTimeoutUnit
296   *          The lock timeout units.
297   */
298  public LockManager(final long lockTimeout, final TimeUnit lockTimeoutUnit)
299  {
300    this(lockTimeout, lockTimeoutUnit, Runtime.getRuntime().availableProcessors() * 8);
301  }
302
303  /**
304   * Creates a new lock manager with the provided configuration.
305   *
306   * @param lockTimeout
307   *          The lock timeout.
308   * @param lockTimeoutUnit
309   *          The lock timeout units.
310   * @param numberOfBuckets
311   *          The number of buckets to use in the lock table. The minimum number of buckets is 64.
312   */
313  @SuppressWarnings("unchecked")
314  public LockManager(final long lockTimeout, final TimeUnit lockTimeoutUnit, final int numberOfBuckets)
315  {
316    Reject.ifFalse(lockTimeout >= 0, "lockTimeout must be a non-negative integer");
317    Reject.ifNull(lockTimeoutUnit, "lockTimeoutUnit must be non-null");
318    Reject.ifFalse(numberOfBuckets > 0, "numberOfBuckets must be a positive integer");
319
320    this.lockTimeout = lockTimeout;
321    this.lockTimeoutUnits = lockTimeoutUnit;
322    this.numberOfBuckets = getNumberOfBuckets(numberOfBuckets);
323    this.lockTable = new LinkedList[this.numberOfBuckets];
324    for (int i = 0; i < this.numberOfBuckets; i++)
325    {
326      this.lockTable[i] = new LinkedList<>();
327    }
328  }
329
330  @Override
331  public String toString()
332  {
333    final StringBuilder builder = new StringBuilder();
334    for (int i = 0; i < numberOfBuckets; i++)
335    {
336      final LinkedList<DNLockHolder> bucket = lockTable[i];
337      synchronized (bucket)
338      {
339        for (final DNLockHolder lock : bucket)
340        {
341          builder.append(lock);
342          builder.append('\n');
343        }
344      }
345    }
346    return builder.toString();
347  }
348
349  /**
350   * Acquires the read lock for the specified entry. This method will block if the entry is already
351   * write locked or if the entry, or any of its parents, have the subtree write lock taken.
352   *
353   * @param entry
354   *          The entry whose read lock is required.
355   * @return The lock, or {@code null} if the lock attempt timed out.
356   */
357  public DNLock tryReadLockEntry(final DN entry)
358  {
359    return acquireLockFromCache(entry).tryReadLockEntry();
360  }
361
362  /**
363   * Acquires the write lock for the specified entry. This method will block if the entry is already
364   * read or write locked or if the entry, or any of its parents, have the subtree write lock taken.
365   *
366   * @param entry
367   *          The entry whose write lock is required.
368   * @return The lock, or {@code null} if the lock attempt timed out.
369   */
370  public DNLock tryWriteLockEntry(final DN entry)
371  {
372    return acquireLockFromCache(entry).tryWriteLockEntry();
373  }
374
375  /**
376   * Acquires the write lock for the specified subtree. This method will block if any entry or
377   * subtree within the subtree is already read or write locked or if any of the parent entries of
378   * the subtree have the subtree write lock taken.
379   *
380   * @param subtree
381   *          The subtree whose write lock is required.
382   * @return The lock, or {@code null} if the lock attempt timed out.
383   */
384  public DNLock tryWriteLockSubtree(final DN subtree)
385  {
386    return acquireLockFromCache(subtree).tryWriteLockSubtree();
387  }
388
389  // For unit testing.
390  int getLockTableRefCountFor(final DN dn)
391  {
392    final int dnHashCode = dn.hashCode();
393    final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
394    synchronized (bucket)
395    {
396      for (final DNLockHolder lock : bucket)
397      {
398        if (lock.dnHashCode == dnHashCode && lock.dn.equals(dn))
399        {
400          return lock.refCount.get();
401        }
402      }
403      return -1;
404    }
405  }
406
407  //For unit testing.
408  int getThreadLocalCacheRefCountFor(final DN dn)
409  {
410    final LinkedList<DNLockHolder> cache = threadLocalCache.get();
411    if (cache == null)
412    {
413      return -1;
414    }
415    final int dnHashCode = dn.hashCode();
416    for (final DNLockHolder lock : cache)
417    {
418      if (lock.dnHashCode == dnHashCode && lock.dn.equals(dn))
419      {
420        return lock.refCount.get();
421      }
422    }
423    return -1;
424  }
425
426  private DNLockHolder acquireLockFromCache(final DN dn)
427  {
428    LinkedList<DNLockHolder> cache = threadLocalCache.get();
429    if (cache == null)
430    {
431      cache = new LinkedList<>();
432      threadLocalCache.set(cache);
433    }
434    return acquireLockFromCache0(dn, cache);
435  }
436
437  private DNLockHolder acquireLockFromCache0(final DN dn, final LinkedList<DNLockHolder> cache)
438  {
439    final int dnHashCode = dn.hashCode();
440    DNLockHolder lock = removeLock(cache, dn, dnHashCode);
441    if (lock == null)
442    {
443      lock = acquireLockFromLockTable(dn, dnHashCode, cache);
444      if (cache.size() >= THREAD_LOCAL_CACHE_SIZE)
445      {
446        // Cache too big: evict oldest entry.
447        dereference(cache.removeLast());
448      }
449    }
450    cache.addFirst(lock); // optimize for LRU
451    lock.refCount.incrementAndGet();
452    return lock;
453  }
454
455  private DNLockHolder acquireLockFromLockTable(final DN dn, final int dnHashCode, final LinkedList<DNLockHolder> cache)
456  {
457    /*
458     * The lock doesn't exist yet so we'll have to create a new one referencing its parent lock. The
459     * parent lock may not yet exist in the lock table either so acquire it before locking the
460     * bucket in order to avoid deadlocks resulting from reentrant bucket locks. Note that we
461     * pre-emptively fetch the parent lock because experiments show that the requested child lock is
462     * almost never in the lock-table. Specifically, this method is only called if we are already on
463     * the slow path due to a cache miss in the thread-local cache.
464     */
465    final DN parentDN = dn.parent();
466    final DNLockHolder parentLock = parentDN != null ? acquireLockFromCache0(parentDN, cache) : null;
467    boolean parentLockWasUsed = false;
468    try
469    {
470      final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
471      synchronized (bucket)
472      {
473        DNLockHolder lock = removeLock(bucket, dn, dnHashCode);
474        if (lock == null)
475        {
476          lock = new DNLockHolder(parentLock, dn, dnHashCode);
477          parentLockWasUsed = true;
478        }
479        bucket.addFirst(lock); // optimize for LRU
480        lock.refCount.incrementAndGet();
481        return lock;
482      }
483    }
484    finally
485    {
486      if (!parentLockWasUsed && parentLock != null)
487      {
488        dereference(parentLock);
489      }
490    }
491  }
492
493  private void dereference(final DNLockHolder lock)
494  {
495    if (lock.refCount.decrementAndGet() <= 0)
496    {
497      final LinkedList<DNLockHolder> bucket = getBucket(lock.dnHashCode);
498      boolean lockWasRemoved = false;
499      synchronized (bucket)
500      {
501        // Double check: another thread could have acquired the lock since we decremented it to zero.
502        if (lock.refCount.get() <= 0)
503        {
504          removeLock(bucket, lock.dn, lock.dnHashCode);
505          lockWasRemoved = true;
506        }
507      }
508
509      /*
510       * Dereference the parent outside of the bucket lock to avoid potential deadlocks due to
511       * reentrant bucket locks.
512       */
513      if (lockWasRemoved && lock.parent != null)
514      {
515        dereference(lock.parent);
516      }
517    }
518  }
519
520  private LinkedList<DNLockHolder> getBucket(final int dnHashCode)
521  {
522    return lockTable[dnHashCode & numberOfBuckets - 1];
523  }
524
525  /*
526   * Ensure that the number of buckets is a power of 2 in order to make it easier to map hash codes
527   * to bucket indexes.
528   */
529  private int getNumberOfBuckets(final int buckets)
530  {
531    final int roundedNumberOfBuckets = Math.min(buckets, MINIMUM_NUMBER_OF_BUCKETS);
532    int powerOf2 = 1;
533    while (powerOf2 < roundedNumberOfBuckets)
534    {
535      powerOf2 <<= 1;
536    }
537    return powerOf2;
538  }
539
540  private DNLockHolder removeLock(final LinkedList<DNLockHolder> lockList, final DN dn, final int dnHashCode)
541  {
542    final Iterator<DNLockHolder> iterator = lockList.iterator();
543    while (iterator.hasNext())
544    {
545      final DNLockHolder lock = iterator.next();
546      if (lock.dnHashCode == dnHashCode && lock.dn.equals(dn))
547      {
548        // Found: remove the lock because it will be moved to the front of the list.
549        iterator.remove();
550        return lock;
551      }
552    }
553    return null;
554  }
555}