HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_ArraySet.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_ArraySet.h (UT Library, C++)
7  *
8  * COMMENTS: Flat-array-based hash set implementation.
9  */
10 
11 #pragma once
12 
13 #ifndef __UT_ArraySet__
14 #define __UT_ArraySet__
15 
16 #include "UT_ArrayHelp.h"
17 #include "UT_Assert.h"
18 #include <SYS/SYS_Inline.h>
19 #include <SYS/SYS_Math.h>
20 #include <SYS/SYS_StaticAssert.h>
21 #include <SYS/SYS_Types.h>
22 
23 #include <hboost/functional/hash.hpp>
24 
25 #include <algorithm>
26 #include <limits>
27 #include <stdint.h>
28 #include <utility>
29 
30 namespace UT {
31 
32 template<typename T>
33 struct DefaultClearer;
34 
35 template<typename S>
36 struct DefaultClearer<S*>
37 {
38  static void clear(S*&v) { v = nullptr; }
39  static bool isClear(S *v) { return v == nullptr; }
40  static void clearConstruct(S**p) { clear(*p); }
41 
42  static const bool clearNeedsDestruction = false;
43 };
44 
45 template<typename T>
47 {
48  static void clear(T &v)
49  {
50  if (std::numeric_limits<T>::has_quiet_NaN) {
51  // Use QNaN as unused floating-point marker.
52  // NOTE: All NaNs get treated as unused, even if they're not
53  // the representative QNaN.
54  v = std::numeric_limits<T>::quiet_NaN();
55  }
57  // Use min as unused signed integer marker.
59  }
60  else {
61  // Use max as unused unsigned integer marker.
63  }
64  }
65  static bool isClear(T v)
66  {
67  if (std::numeric_limits<T>::has_quiet_NaN) {
68  // NOTE: All NaNs get treated as unused, even if they're not
69  // the representative QNaN.
70  return SYSisNan(v);
71  }
73  return (v == std::numeric_limits<T>::min());
74  }
75  return (v == std::numeric_limits<T>::max());
76  }
77  static void clearConstruct(T *p) { clear(*p); }
78 
79  static const bool clearNeedsDestruction = false;
80 };
81 
82 template<> struct DefaultClearer< int8_t > : public NumericClearer< int8_t > {};
83 template<> struct DefaultClearer<uint8_t > : public NumericClearer<uint8_t > {};
84 template<> struct DefaultClearer< int16_t> : public NumericClearer< int16_t> {};
85 template<> struct DefaultClearer<uint16_t> : public NumericClearer<uint16_t> {};
86 template<> struct DefaultClearer< int32_t> : public NumericClearer< int32_t> {};
87 template<> struct DefaultClearer<uint32_t> : public NumericClearer<uint32_t> {};
88 template<> struct DefaultClearer< int64_t> : public NumericClearer< int64_t> {};
89 template<> struct DefaultClearer<uint64_t> : public NumericClearer<uint64_t> {};
90 template<> struct DefaultClearer<float> : public NumericClearer<float> {};
91 template<> struct DefaultClearer<double> : public NumericClearer<double> {};
92 template<> struct DefaultClearer<long double> : public NumericClearer<long double> {};
93 #ifdef MBSD
94 // On macOS, size_t is "unsigned long" which matches none of the above
95 template<> struct DefaultClearer<size_t> : public NumericClearer<size_t> {};
96 #endif
97 
98 /// NOTE: It may seem silly to have bool here, but things like
99 /// std::pair<T,bool> should work automatically, so they need some
100 /// default behaviour for bool.
101 template<>
102 struct DefaultClearer<bool>
103 {
104  static void clear(bool&v) { v = false; }
105  static bool isClear(bool v) { return !v; }
106  static void clearConstruct(bool *p) { clear(*p); }
107 
108  static const bool clearNeedsDestruction = false;
109 };
110 
111 template<typename S0,typename S1>
112 struct DefaultClearer<std::pair<S0,S1> >
113 {
114  static void clear(std::pair<S0,S1> &v)
115  {
116  DefaultClearer<S0>::clear(v.first);
117  DefaultClearer<S1>::clear(v.second);
118  }
119  static bool isClear(const std::pair<S0,S1> &v)
120  {
121  return DefaultClearer<S0>::isClear(v.first) && DefaultClearer<S1>::isClear(v.second);
122  }
123  static void clearConstruct(std::pair<S0,S1>*p) {
126  }
128 };
129 
130 /// This is close to a drop-in replacement for std::unordered_set,
131 /// except that it uses a single array of items and empty spaces
132 /// marked with a dedicated "clear" value.
133 /// It also has a fixed maximum load factor, and doesn't store a
134 /// hasher, comparator, or allocator object as member data,
135 /// avoiding unnecessary overhead, but these differences introduce
136 /// some interface incompatibilities.
137 ///
138 /// The incompatibility that will probably be encountered most often
139 /// is the lack of time guarantees. This structure is more efficient in
140 /// many common cases, due to better memory coherency and fewer
141 /// memory allocations, but cannot guarantee as good time scaling in
142 /// worst or even average case as std::unordered_set guarantees for most
143 /// functions.
144 ///
145 /// Because there is only space for one item in each bucket, if a collision
146 /// is hit on insertion, the structure will scan forward until a "clear"
147 /// bucket is found. However, items in a contiguous block will always
148 /// be sorted in the order of their ideal bucket numbers.
149 /// If the end of the array is reached without finding a clear bucket,
150 /// instead of the obvious approach of wrapping to the beginning of the
151 /// array, the block will be shifted backward, so some items in this
152 /// block at the end may be earlier than their ideal bucket number.
153 /// This is about as complicated as wrapping, or not sorting, but avoids
154 /// an awkward issue when erasing while iterating where an item might get
155 /// visited multiple times due to the wrapping, or items might be missed
156 /// due to the lack of order. Even still, unlike
157 /// std::unordered_set, erase may invalidate other iterators and references.
158 /// Also, insert may invalidate other iterators and references.
159 template<
160  typename Key,
161  bool MULTI=false,
162  std::size_t MAX_LOAD_FACTOR_256=128,
163  typename Clearer=DefaultClearer<Key>,
164  class Hash=hboost::hash<Key>,
165  class KeyEqual=std::equal_to<Key>
166 >
167 class ArraySet
168 {
169 public:
171  typedef Key key_type;
172  typedef Key value_type;
173  typedef std::size_t size_type;
174  typedef std::ptrdiff_t difference_type;
175  typedef Hash hasher;
176  typedef KeyEqual key_equal;
178  typedef const value_type& const_reference;
179  typedef value_type *pointer;
180  typedef const value_type *const_pointer;
181  typedef Clearer clearer_type;
182 
184  : myBuckets(nullptr)
185  , myNumBuckets(0)
186  {
187  UT_ASSERT_COMPILETIME(MAX_LOAD_FACTOR_256 <= 256);
188  }
189  explicit ArraySet(size_type init_bucket_count)
190  : myBuckets(nullptr)
191  , myNumBuckets(0)
192  {
193  if (init_bucket_count != 0)
194  setNumBuckets(init_bucket_count);
195  }
196 
197  /// Move constructor, destroying the source.
199  : myBuckets(that.myBuckets)
200  , myNumBuckets(that.myNumBuckets)
201  {
202  that.myBuckets = nullptr;
203  that.myNumBuckets = 0;
204  }
205 
206  /// Copy constructor.
207  /// Avoid accidental copying by making the constructor explicit.
208  explicit ArraySet(const set_type &that)
209  : myBuckets(nullptr)
210  , myNumBuckets(0)
211  {
212  *this = that;
213  }
214 
215  /// Inserts all of the items in the range [start_input,end_input).
216  template<typename INPUT_ITERATOR>
217  ArraySet(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input, size_type init_bucket_count = 0)
218  : myBuckets(nullptr)
219  , myNumBuckets(0)
220  {
221  // Preallocate enough buckets, instead of waiting for
222  // insert to potentially hit collisions.
223  size_type ninput_items = std::distance(start_input, end_input);
224  if (ninput_items == 0)
225  return;
226  setNumBuckets(SYSmax(init_bucket_count, minBuckets(ninput_items)));
227 
228  // Now, just insert them all, (no need to realloc)
229  for (; start_input != end_input; ++start_input)
230  {
231  pointer pdest;
232  bool success = insertHelper<!MULTI,false>(myBuckets,myNumBuckets,*start_input,pdest);
233  if (success)
234  *pdest = *start_input;
235  else
236  --ninput_items;
237  }
238  *((size_type*)(myBuckets+myNumBuckets)) = ninput_items;
239  }
240  /// Constructs a set from an initializer list, with an optional
241  /// bucket count, e.g.:
242  /// UT::ArraySet<int> some_set({5,123,500}, 20);
243  ArraySet(std::initializer_list<value_type> init, size_type init_bucket_count = 0)
244  : myBuckets(nullptr)
245  , myNumBuckets(0)
246  {
248  if (init_bucket_count == 0 || init_bucket_count < init.size()) {
249  bucket_count = minBuckets(init.size());
250  }
251  else {
252  bucket_count = init_bucket_count;
253  }
254  setNumBuckets(bucket_count);
255  for (auto &&p = init.begin(); p != init.end(); ++p) {
256  insert(*p);
257  }
258  }
259 
261  {
262  if (this == &that)
263  return *this;
264 
265  clear();
267 
268  if (!myBuckets)
269  return *this;
270 
271  size_type size = that.size();
272  *((size_type*)(myBuckets+myNumBuckets)) = size;
273 
274  if (size)
275  {
276  // Now, just copy them all, (no need to realloc)
277  const_pointer srcp = that.myBuckets;
279  for (pointer destp = myBuckets; destp != pend; ++destp, ++srcp)
280  {
281  *destp = *srcp;
282  }
283  }
284  return *this;
285  }
286 
287  set_type &operator=(std::initializer_list<value_type> init)
288  {
289  clear();
290  insert(init.begin(), init.end());
291  return *this;
292  }
293 
295  {
296  destroy();
297  myBuckets = that.myBuckets;
298  myNumBuckets = that.myNumBuckets;
299  that.myBuckets = nullptr;
300  that.myNumBuckets = 0;
301  return *this;
302  }
303 
304  bool operator==(const set_type &that) const
305  {
306  if (size() != that.size())
307  return false;
308 
309  // Check if one has no allocation (both empty)
310  if (!myBuckets || !that.myBuckets)
311  return true;
312 
313  const const_pointer pend = myBuckets + myNumBuckets;
314  for (const_pointer p = myBuckets; p != pend; ++p)
315  {
316  if (Clearer::isClear(*p))
317  continue;
318  // NOTE: Don't just use that.contains(*p), in case it doesn't check the whole item contents!
319  const_iterator it = that.find(*p);
320  if (it.atEnd())
321  return false;
322  if (*p != *it)
323  return false;
324  }
325  return true;
326  }
327  bool operator!=(const set_type &that) const
328  {
329  return !(*this == that);
330  }
331 
332  /// Swaps another set with this one
333  void swap(set_type &that)
334  {
335  pointer temp_buckets = that.myBuckets;
336  size_type temp_numbuckets = that.myNumBuckets;
337  that.myBuckets = myBuckets;
338  that.myNumBuckets = myNumBuckets;
339  myBuckets = temp_buckets;
340  myNumBuckets = temp_numbuckets;
341  }
342 
344  {
345  destroy();
346  }
347 
348  /// Returns true iff there are no items in the set
349  bool empty() const
350  {
351  return !myBuckets || (*((const size_type*)(myBuckets+myNumBuckets)) == 0);
352  }
353 
354  /// Returns the number of items in the set
355  size_type size() const
356  {
357  return myBuckets ? *(const size_type*)(myBuckets+myNumBuckets) : 0;
358  }
359 
360  /// This only exists because the standard requires it.
361  /// The only hard limit is what memory will allow.
363  {
365  }
366 
367  /// Removes all items from the set, but does not free the memory.
368  /// Call destroy() to free all the memory.
369  void clear()
370  {
371  if (!myBuckets)
372  return;
373  pointer p = myBuckets;
374  pointer pend = p+myNumBuckets;
375  if (*((const size_type*)pend) == 0)
376  return;
377  while (p != pend)
378  {
379  Clearer::clear(*p);
380  ++p;
381  }
382  *((size_type*)pend) = 0;
383  }
384 
385  /// Removes all items from the set and frees all the memory.
386  void destroy()
387  {
388  if (!myBuckets)
389  return;
390  if (!std::is_trivially_destructible<value_type>::value && (Clearer::clearNeedsDestruction || !empty()))
391  {
392  pointer p = myBuckets;
393  const_pointer pend = p+myNumBuckets;
394  while (p != pend)
395  {
396  // I'm assuming that in some cases, it's faster to check isClear than to call the destructor,
397  // even if it's only because of a better chance of inlining.
398  if (Clearer::clearNeedsDestruction || !Clearer::isClear(*p))
399  p->~value_type();
400  ++p;
401  }
402  }
403  free(myBuckets);
404  myBuckets = nullptr;
405  myNumBuckets = 0;
406  }
407 
408  /// Returns the current number of buckets.
410  {
411  return myNumBuckets;
412  }
413 
414  /// This only exists because the standard requires it.
415  /// The only hard limit is what memory will allow.
417  {
419  }
420 
421  /// This only exists because the standard requires it.
422  /// This function doesn't really make sense for this implementation,
423  /// but it returns 1 if the bucket is occupied, and 0 if it is not.
425  {
426  if (i >= myNumBuckets) {
427  return 0;
428  }
429  if (Clearer::isClear(myBuckets[i])) {
430  return 0;
431  }
432  return 1;
433  }
434 
435  /// Returns the portion of buckets that need to be occupied before
436  /// reallocation occurs.
437  /// Unlike in the standard, the maximum load factor is a constant
438  /// in this implementation, so max_load_factor() is a static function.
439  static float max_load_factor()
440  {
441  return float(MAX_LOAD_FACTOR_256)/256;
442  }
443 
444  /// Returns the current portion of buckets that are occupied.
445  float load_factor() const
446  {
447  return float(size())/float(myNumBuckets);
448  }
449 
450  /// Sets the number of buckets to larger of new_num_buckets and the
451  /// required minimum number of buckets for size() items, unless
452  /// that number is the same as the current number of buckets.
453  void rehash(size_type new_num_buckets)
454  {
455  size_type min_buckets = minBuckets(size());
456  if (new_num_buckets < min_buckets) {
457  new_num_buckets = min_buckets;
458  }
459  if (myNumBuckets == new_num_buckets) {
460  return;
461  }
462  setNumBuckets(min_buckets);
463  }
464 
465  /// Sets the number of buckets to the required minimum number of buckets
466  /// for the larger of num_items and size().
468  {
469  size_type min_buckets = minBuckets(SYSmax(size(),num_items));
470  setNumBuckets(min_buckets);
471  }
472 
473  /// Sets the number of buckets to new_num_buckets, regardless of the
474  /// required minimum number of buckets for size() items, though
475  /// you probably shouldn't be calling it with num_new_buckets < size(),
476  /// except that if num_new_buclets==0, this calls destroy().
477  void setNumBuckets(size_type new_num_buckets)
478  {
479  if (new_num_buckets == myNumBuckets)
480  return;
481  if (new_num_buckets == 0)
482  {
483  // Assume that the caller wants to delete everything
484  destroy();
485  return;
486  }
487 
488  size_type old_size = size();
489  if (new_num_buckets < old_size)
490  {
491  UT_ASSERT_MSG(0,"What does the caller expect to happen when setting the number of buckets lower than the size, but not to zero?\nThis class doesn't support multiple items per bucket.");
492  new_num_buckets = old_size;
493  }
494 
495  // NOTE: I'm assuming that the caller, in explicitly calling
496  // setNumBuckets, doesn't want to be limited by MAX_LOAD_FACTOR_256,
497  // so don't check against it.
498 
499  pointer new_buckets = (pointer)malloc(new_num_buckets*sizeof(value_type) + sizeof(size_type));
500  pointer new_end = new_buckets + new_num_buckets;
501  for (pointer p = new_buckets; p < new_end; ++p)
502  {
503  Clearer::clearConstruct(p);
504  }
505 
506  if (old_size)
507  {
508  // Move items over and clear them from myBuckets
510  for (pointer srcp = myBuckets; srcp < pend; ++srcp)
511  {
512  if (!Clearer::isClear(*srcp))
513  {
514  value_type v;
515  Clearer::clear(v);
516  // Need to make sure that *p is set to clear,
517  // as opposed to whatever std::move leaves it as,
518  // so swap with clear.
519  std::swap(*srcp, v);
520  pointer destp;
521  insertHelper<false,false>(new_buckets, new_num_buckets, v, destp);
522  *destp = std::move(v);
523  }
524  }
525 
526  // Temporarily set the size to 0, so that destroy() won't re-destruct them.
527  *((size_type*)pend) = 0;
528  }
529  // Don't worry; this will just destruct the current buckets
530  destroy();
531 
532  myBuckets = new_buckets;
533  myNumBuckets = new_num_buckets;
534  *((size_type*)new_end) = old_size;
535  }
536 
537  /// This increases the number of buckets, if needed, for
538  /// the specified number of items
539  void bumpNumBuckets(size_type new_num_items)
540  {
541  size_type num_buckets = minBuckets(new_num_items);
542  if (num_buckets <= myNumBuckets)
543  return;
544  // Although UT_Array normally bumps up by about 12.5% each time,
545  // and so does UTbumpAllocToPrime, to reduce significant rehashing
546  // overhead, we bump to at least twice the previous size.
547  num_buckets = SYSmax(num_buckets, 2*myNumBuckets);
548  num_buckets = UTbumpAllocToPrime(num_buckets);
549  setNumBuckets(num_buckets);
550  }
551 
552  /// Set iterator class
553  template<bool constant_type>
555  {
556  public:
557  typedef std::forward_iterator_tag iterator_category;
558  typedef std::ptrdiff_t difference_type;
562 
563  iterator_t() = default;
564  iterator_t(pointer current, const_pointer end, bool scan=true)
565  : myCurrent(current)
566  , myEnd(end)
567  {
568  UT_ASSERT_MSG_P(scan || current == end || !Clearer::isClear(*current), "An iterator can't point to a clear item!");
569  if (scan)
570  {
571  while (myCurrent != myEnd && Clearer::isClear(*myCurrent))
572  {
573  ++myCurrent;
574  }
575  }
576  }
577  iterator_t(const iterator_t &) = default;
578  iterator_t(iterator_t &&) = default;
579  iterator_t &operator=(const iterator_t &) = default;
580  iterator_t &operator=(iterator_t &&) = default;
581  bool operator==(const iterator_t &that) const
582  {
583  UT_ASSERT_MSG_P(myEnd == that.myEnd, "Probably unintentional: Comparing against iterator from different set or iterator invalidated by reallocation!");
584  return (myCurrent == that.myCurrent);
585  }
586  bool operator!=(const iterator_t &that) const
587  {
588  UT_ASSERT_MSG_P(myEnd == that.myEnd, "Probably unintentional: Comparing against iterator from different set or iterator invalidated by reallocation!");
589  return (myCurrent != that.myCurrent);
590  }
591  bool atEnd() const
592  {
593  return myCurrent == myEnd;
594  }
596  {
597  UT_ASSERT_MSG_P(myCurrent < myEnd, "Incrementing end iterator!");
598  if (myCurrent == myEnd) {
599  return *this;
600  }
601  do {
602  ++myCurrent;
603  } while (myCurrent != myEnd && Clearer::isClear(*myCurrent));
604  return *this;
605  }
606 
607  // This is intentionally not implemented to force its non-use but we
608  // keep it here just in case there's weird C++ rules on iterator
609  // requirements
611 
613  {
614  UT_ASSERT_MSG_P(myCurrent < myEnd, "Dereferencing end iterator!");
615  return *myCurrent;
616  }
618  {
619  UT_ASSERT_MSG_P(myCurrent < myEnd, "Dereferencing end iterator!");
620  return myCurrent;
621  }
622  operator pointer() const
623  {
624  UT_ASSERT_MSG_P(myCurrent <= myEnd, "Dereferencing iterator past end!");
625  return myCurrent;
626  }
628  {
629  UT_ASSERT_MSG_P(myCurrent <= myEnd, "Dereferencing iterator past end!");
630  return myCurrent;
631  }
633  {
634  return myEnd;
635  }
636  private:
637  friend class iterator_t<!constant_type>;
638 
639  pointer myCurrent;
640  const_pointer myEnd;
641  };
642 
643  /// Iterator type for iterating over non-constant elements.
644  typedef iterator_t<false> iterator;
645 
646  /// Iterator type for iterating over constant elements.
647  /// It may be useful to convert an iterator to a const_iterator,
648  /// so making this a subclass lets us add a constructor for that.
649  class const_iterator : public iterator_t<true>
650  {
651  public:
653 
654  const_iterator() = default;
655  const_iterator(const iterator &non_const_it)
656  : iterator_t<true>(non_const_it.getCurrent(), non_const_it.getEnd(), false)
657  {}
658  };
659 
660  /// Returns a non-const iterator for the beginning of the set.
662  {
664  }
665  /// Returns a const iterator for the beginning of the set.
666  const_iterator cbegin() const
667  {
668  return const_iterator(myBuckets,myBuckets+myNumBuckets,true);
669  }
670  /// Returns a const iterator for the beginning of the set.
671  const_iterator begin() const
672  {
673  return const_iterator(myBuckets,myBuckets+myNumBuckets,true);
674  }
675  /// Returns a non-const end iterator for the set.
677  {
679  return iterator(pend,pend,false);
680  }
681  /// Returns a const end iterator for the set.
682  const_iterator cend() const
683  {
685  return const_iterator(pend,pend,false);
686  }
687  /// Returns a const end iterator for the set.
688  const_iterator end() const
689  {
691  return const_iterator(pend,pend,false);
692  }
693 
694  /// Returns an iterator to the first item matching key,
695  /// or an end iterator if no items match key.
696  /// @{
697  iterator find(const Key &key)
698  {
699  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
700 
701  const pointer pstart = myBuckets;
702  if (!pstart)
703  return iterator(nullptr,nullptr,false);
704 
705  const pointer pend = pstart + myNumBuckets;
706 
707  const pointer search_start = searchStart(key);
708  for (pointer p = search_start; p != pend; ++p)
709  {
710  if (key_equal()(key,*p))
711  return iterator(p,pend,false);
712  if (Clearer::isClear(*p))
713  return iterator(pend,pend,false);
714  }
715  // Hit the end, so search back from before the start of the block.
716  for (pointer p = search_start-1; p >= pstart; --p)
717  {
718  if (key_equal()(key,*p))
719  return iterator(p,pend,false);
720  if (Clearer::isClear(*p))
721  return iterator(pend,pend,false);
722  }
723  return iterator(pend,pend,false);
724  }
725  const_iterator find(const Key &key) const
726  {
727  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
728 
729  const const_pointer pstart = myBuckets;
730  if (!pstart)
731  return const_iterator(nullptr,nullptr,false);
732 
733  const const_pointer pend = pstart + myNumBuckets;
734 
735  const_pointer search_start = searchStart(key);
736  for (const_pointer p = search_start; p != pend; ++p)
737  {
738  if (key_equal()(key,*p))
739  return const_iterator(p,pend,false);
740  if (Clearer::isClear(*p))
741  return const_iterator(pend,pend,false);
742  }
743  // Hit the end, so search back from before the start of the block.
744  for (const_pointer p = search_start-1; p >= pstart; --p)
745  {
746  if (key_equal()(key,*p))
747  return const_iterator(p,pend,false);
748  if (Clearer::isClear(*p))
749  return const_iterator(pend,pend,false);
750  }
751  return const_iterator(pend,pend,false);
752  }
753  /// @}
754 
755  /// Returns the number of entries matching key.
756  /// If MULTI is false, this will only return either 0 or 1.
757  size_type count(const Key &key) const
758  {
759  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
760 
761  if (!myBuckets)
762  return 0;
763 
764  const const_pointer pstart = myBuckets;
765  const const_pointer pend = pstart + myNumBuckets;
766  const_pointer search_start = searchStart(key);
767  size_type count = 0;
768  for (const_pointer p = search_start; p != pend; ++p)
769  {
770  if (key_equal()(key,*p))
771  {
772  if (!MULTI)
773  return 1;
774  ++count;
775  }
776  else if (Clearer::isClear(*p))
777  return MULTI ? count : 0;
778  }
779  // Hit the end, so search back from before the start of the block.
780  for (const_pointer p = search_start-1; p >= pstart; --p)
781  {
782  if (key_equal()(key,*p))
783  {
784  if (!MULTI)
785  return 1;
786  ++count;
787  }
788  else if (Clearer::isClear(*p))
789  return MULTI ? count : 0;
790  }
791  return MULTI ? count : 0;
792  }
793 
794  /// Returns true iff the set contains the given key.
795  /// This should be faster than count() if MULTI is true.
796  bool contains(const Key &key) const
797  {
798  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
799 
800  if (!myBuckets)
801  return false;
802 
803  const const_pointer pstart = myBuckets;
804  const const_pointer pend = pstart + myNumBuckets;
805  const_pointer search_start = searchStart(key);
806  for (const_pointer p = search_start; p != pend; ++p)
807  {
808  if (key_equal()(key,*p))
809  return true;
810  if (Clearer::isClear(*p))
811  return false;
812  }
813  // Hit the end, so search back from before the start of the block.
814  for (const_pointer p = search_start-1; p >= pstart; --p)
815  {
816  if (key_equal()(key,*p))
817  return true;
818  if (Clearer::isClear(*p))
819  return false;
820  }
821  return false;
822  }
823 
824  template <typename S>
825  bool containsAll(const S &src) const
826  {
827  return std::all_of(std::begin(src), std::end(src),
828  [this](const auto v) { return contains(v); });
829  }
830 
831  /// Returns a pair of iterators representing the range of values matching
832  /// key, as [first,second).
833  /// @{
834  std::pair<const_iterator,const_iterator> equal_range(const Key &key) const
835  {
836  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
837 
838  const const_pointer pstart = myBuckets;
839  const const_pointer pend = pstart + myNumBuckets;
840  if (!pstart)
841  return std::make_pair(const_iterator(nullptr,nullptr,false), const_iterator(nullptr,nullptr,false));
842 
843  const const_pointer search_start = searchStart(key);
844  if (!MULTI)
845  {
846  for (const_pointer p = search_start; p != pend; ++p)
847  {
848  if (key_equal()(key,*p))
849  return std::make_pair(const_iterator(p,pend,false), const_iterator(p+1,pend,true));
850  if (Clearer::isClear(*p))
851  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
852  }
853  // Hit the end, so search back from before the start of the block.
854  for (const_pointer p = search_start-1; p >= pstart; --p)
855  {
856  if (key_equal()(key,*p))
857  return std::make_pair(const_iterator(p,pend,false), const_iterator(p+1,pend,true));
858  if (Clearer::isClear(*p))
859  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
860  }
861  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
862  }
863 
864  // MULTI is true, so we might have multiple matches
865  const_pointer first_found = nullptr;
866  const_pointer end_found = pend;
867  for (const_pointer p = search_start; p != pend; ++p)
868  {
869  if (key_equal()(key,*p))
870  {
871  if (!first_found)
872  first_found = p;
873  }
874  else if (first_found)
875  {
876  if (first_found != search_start)
877  return std::make_pair(const_iterator(first_found,pend,false), const_iterator(p,pend,true));
878 
879  end_found = p;
880  break;
881  }
882  else if (Clearer::isClear(*p))
883  {
884  // NOTE: first_found must be false
885  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
886  }
887  }
888 
889  // Hit the end, so search back from before the start of the block.
890  for (const_pointer p = search_start-1; p >= pstart; --p)
891  {
892  if (!key_equal()(key,*p))
893  {
894  return std::make_pair(const_iterator(p+1,pend,false), const_iterator(end_found,pend,true));
895  }
896  }
897  return std::make_pair(const_iterator(pstart,pend,false), const_iterator(end_found,pend,true));
898  }
899  std::pair<iterator,iterator> equal_range(const Key &key)
900  {
901  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
902 
903  const pointer pstart = myBuckets;
904  const pointer pend = pstart + myNumBuckets;
905  if (!pstart)
906  return std::make_pair(iterator(nullptr,nullptr,false), iterator(nullptr,nullptr,false));
907 
908  const pointer search_start = searchStart(key);
909  if (!MULTI)
910  {
911  for (pointer p = search_start; p != pend; ++p)
912  {
913  if (key_equal()(key,*p))
914  return std::make_pair(iterator(p,pend,false), iterator(p+1,pend,true));
915  if (Clearer::isClear(*p))
916  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
917  }
918  // Hit the end, so search back from before the start of the block.
919  for (pointer p = search_start-1; p >= pstart; --p)
920  {
921  if (key_equal()(key,*p))
922  return std::make_pair(iterator(p,pend,false), iterator(p+1,pend,true));
923  if (Clearer::isClear(*p))
924  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
925  }
926  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
927  }
928 
929  // MULTI is true, so we might have multiple matches
930  pointer first_found = nullptr;
931  pointer end_found = pend;
932  for (pointer p = search_start; p != pend; ++p)
933  {
934  if (key_equal()(key,*p))
935  {
936  if (!first_found)
937  first_found = p;
938  }
939  else if (first_found)
940  {
941  if (first_found != search_start)
942  return std::make_pair(iterator(first_found,pend,false), iterator(p,pend,true));
943 
944  end_found = p;
945  break;
946  }
947  else if (Clearer::isClear(*p))
948  {
949  // NOTE: first_found must be false
950  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
951  }
952  }
953 
954  // Hit the end, so search back from before the start of the block.
955  for (pointer p = search_start-1; p >= pstart; --p)
956  {
957  if (!key_equal()(key,*p))
958  {
959  return std::make_pair(iterator(p+1,pend,false), iterator(end_found,pend,true));
960  }
961  }
962  return std::make_pair(iterator(pstart,pend,false), iterator(end_found,pend,true));
963  }
964  /// @}
965 
966  /// Inserts the given value into the set, unless MULTI is false and
967  /// there's already an item for which key_equal()(value,other)
968  /// returns true. Returns a pair of iterator to the inserted value
969  /// (or the existing item if not inserted), and a bool that's true
970  /// iff the item was inserted.
971  /// @{
972  std::pair<iterator, bool> insert(const value_type &value)
973  {
974  pointer p;
975  bool success = insertHelper(myBuckets, myNumBuckets, value, p);
976  if (success)
977  {
978  *p = value;
980  }
981  return std::make_pair(iterator(p,myBuckets+myNumBuckets,false),success);
982  }
983  std::pair<iterator, bool> insert(value_type &&value)
984  {
985  pointer p;
986  bool success = insertHelper(myBuckets, myNumBuckets, value, p);
987  if (success)
988  {
989  *p = std::move(value);
991  }
992  return std::make_pair(iterator(p,myBuckets+myNumBuckets,false),success);
993  }
994  /// @}
995 
996  /// Inserts all of the items in the range [start_input,end_input).
997  template<typename INPUT_ITERATOR>
998  void insert(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input)
999  {
1000  // Preallocate enough buckets, instead of waiting for
1001  // insert to potentially hit collisions.
1002  size_type ninput_items = std::distance(start_input, end_input);
1003  if (!ninput_items)
1004  return;
1005 
1006  size_type new_size = size() + ninput_items;
1007  if (myNumBuckets < minBuckets(new_size))
1008  bumpNumBuckets(new_size);
1009 
1010  // Now, just insert them all, (no need to realloc)
1011  for (; start_input != end_input; ++start_input)
1012  {
1013  pointer pdest;
1014  bool success = insertHelper<!MULTI,false>(myBuckets,myNumBuckets,*start_input,pdest);
1015  if (success)
1016  *pdest = *start_input;
1017  else
1018  --new_size;
1019 
1020  }
1021  *((size_type*)(myBuckets+myNumBuckets)) = new_size;
1022  }
1023  /// Inserts all of the items from the initializer_list.
1024  void insert(std::initializer_list<value_type> list)
1025  {
1026  // Just delegate to the iterator range insert function.
1027  insert(list.begin(),list.end());
1028  }
1029 
1030  /// Inserts an element constructed with the given arguments.
1031  /// Maybe the standard implementation can somehow get a speedup
1032  /// from this, but we need the item's hash before inserting,
1033  /// so it needs to be constructed first.
1034  template <typename... Args>
1035  std::pair<iterator,bool> emplace(Args&&... args)
1036  {
1037  return insert(value_type(std::forward<Args>(args)...));
1038  }
1039 
1040  /// Removes the item at the specified iterator location,
1041  /// returning an iterator to the next item.
1042  ///
1043  /// Unlike std::unordered_set::erase(const_iterator), this may
1044  /// invalidate iterators and references to other items in the
1045  /// map, if they are in a contiguous block of non-empty items
1046  /// with the item that iter points to. Also, this only
1047  /// accepts an iterator, instead of a const_iterator.
1048  /// (Why would std::unordered_set::erase accept a const_iterator?)
1050  {
1051  const pointer pstart = myBuckets;
1052  const pointer pend = pstart + myNumBuckets;
1053 
1054  const pointer p = (pointer)iter;
1055 
1056  UT_ASSERT_MSG_P(p >= pstart && p < pend && !Clearer::isClear(*p), "An iterator to erase can't point to a clear item!");
1057 
1058  // Actually erase the item
1059  Clearer::clear(*p);
1060  size_type new_size = --*((size_type*)(myBuckets+myNumBuckets));
1061 
1062  if (new_size == 0)
1063  return iterator(pend,pend,false);
1064 
1065  // Check for the common, simple case, where there's nothing after p in
1066  // the block of possible conflicts, and p isn't at the end.
1067  pointer pnext = p+1;
1068  bool end_case = (pnext == pend);
1069  if (!end_case)
1070  {
1071  if (Clearer::isClear(*pnext))
1072  return iterator(pnext,pend,true);
1073 
1074  // Now, for the hard case... -_-
1075 
1076  // If we're not right at the end of the array already,
1077  // find the end of the block, and check if it's the end of the array.
1078  // pnext is already clear, else we'd have returned above.
1079  do
1080  {
1081  ++pnext;
1082  end_case = (pnext == pend);
1083  } while (!end_case && !Clearer::isClear(*pnext));
1084  }
1085 
1086  if (end_case && (p != pstart) && !Clearer::isClear(*(p-1)))
1087  {
1088  // If this is a block right at the end of the array, there may be earlier
1089  // items that are too early, in which case, we need to shift
1090  // everything after that down one, into the empty spot.
1091 
1092  // Find the start of the block.
1093  pointer pprev = p-1;
1094  while (pprev != pstart && !Clearer::isClear(*(pprev-1)))
1095  {
1096  --pprev;
1097  }
1098 
1099  // Find the earliest item that should go at or after
1100  // its current bucket.
1101  do
1102  {
1103  // Check where this item ideally belongs.
1104  size_type hash = hasher()(*pprev);
1105  size_type ideal_bucket_num = hash%myNumBuckets;
1106  if (ideal_bucket_num > (pprev-pstart))
1107  {
1108  // Shift everything later by one and return.
1109  for (pointer pdest = p; pdest != pprev; --pdest)
1110  {
1111  *pdest = std::move(*(pdest-1));
1112  }
1113  Clearer::clear(*pprev);
1114  return iterator(p+1,pend,true);
1115  }
1116 
1117  ++pprev;
1118  } while (pprev != p);
1119  }
1120 
1121  if (p+1 != pend)
1122  {
1123  // We're not at the end of the array, or there were no items before
1124  // this block to move into the empty space, but there is at least
1125  // one item after p, so shift items backward until there's one
1126  // that doesn't belong earlier than it is now.
1127 
1128  size_type empty_bucket_num = p-pstart;
1129 
1130  // NOTE: pnext points to the end of this block.
1131  for (pointer pdest = p; pdest+1 != pnext; ++pdest)
1132  {
1133  // Check where this item ideally belongs.
1134  size_type hash = hasher()(*(pdest+1));
1135  size_type ideal_bucket_num = hash%myNumBuckets;
1136  if (ideal_bucket_num > empty_bucket_num)
1137  {
1138  Clearer::clear(*pdest);
1139  return iterator(p+(pdest==p),pend,false);
1140  }
1141 
1142  *pdest = std::move(*(pdest+1));
1143  ++empty_bucket_num;
1144  }
1145  Clearer::clear(*(pnext-1));
1146  // p now contains an item that was previously after it.
1147  return iterator(p,pend,false);
1148  }
1149 
1150  // p is still empty, but there's an item (or the end pointer)
1151  // right after it.
1152  return iterator(p+1,pend,false);
1153  }
1154 
1155 #if 0 // This isn't fully implemented, so it's excluded to avoid anyone accidentally using it.
1156  /// Removes all items in the range [start_iter,end_iter),
1157  /// and returns an iterator immediately following that range.
1158  ///
1159  /// Unlike std::unordered_set::erase(const_iterator,const_iterator),
1160  /// this may invalidate iterators and references to other items in the
1161  /// map, if they are in a contiguous block of non-empty items
1162  /// with the items in the erased range. Also, this only
1163  /// accepts iterator's, instead of const_iterator's.
1164  /// (Why would std::unordered_set::erase accept a const_iterator?)
1165  iterator erase(iterator start_iter, iterator end_iter) {
1166  if (start_iter == end_iter)
1167  return end_iter;
1168 
1169  const pointer pstart = myBuckets;
1170  const const_pointer pend = pstart + myNumBuckets;
1171 
1172  const pointer prange_start = (pointer)start_iter;
1173  const pointer prange_end = (pointer)end_iter;
1174 
1175  UT_ASSERT_MSG_P(prange_start >= pstart && prange_start < prange_end && prange_end <= pend && !Clearer::isClear(*prange_start), "An iterator to erase can't point to a clear item!");
1176 
1177  // Actually erase the items
1178  size_type nremoved_items = 1;
1179  size_type nitems_in_last_block = 1;
1180  Clearer::clear(*prange_start);
1181  for (pointer p = prange_start+1; p != prange_end; ++p)
1182  {
1183  if (!Clearer::isClear(*p))
1184  {
1185  Clearer::clear(*p);
1186  ++nitems_in_last_block;
1187  ++nremoved_items;
1188  }
1189  else
1190  {
1191  nitems_in_last_block = 0;
1192  }
1193  }
1194 
1195  size_type new_size = (*((size_type*)(myBuckets+myNumBuckets)) -= nremoved_items);
1196 
1197  if (new_size == 0)
1198  {
1199  // This must be pointing to the end of the array.
1200  return end_iter;
1201  }
1202 
1203  pointer block_end = prange_end;
1204 
1205  if (prange_end != pend)
1206  {
1207  // NOTE: If prange_end is not pend, it is always occupied, because
1208  // it came from an iterator, which should be dereferenceable.
1209  // However, if there was a gap between the last removed item
1210  // and prange_end, we can still exit early.
1211  if (nitems_in_last_block == 0)
1212  return end_iter;
1213 
1214  // Find the end of the block
1215  do
1216  {
1217  ++block_end;
1218  } while (block_end != pend && !Clearer::isClear(*block_end));
1219  }
1220  else {
1221  // If end_iter is at the array end, and there's at least one empty
1222  // spot in the given range, we're done, because the last block
1223  // was completely eliminated.
1224  if (nitems_in_last_block != prange_end-prange_start)
1225  return end_iter;
1226  }
1227 
1228  UT_ASSERT_P(nitems_in_last_block > 0);
1229 
1230  // If the end of the block is the array end and
1231  // nitems_in_last_block == prange_end-prange_start (no gaps),
1232  // find the beginning of the block, and if there are any items that
1233  // belong in later buckets:
1234  // 1) shift everything after to the end of the clear block.
1235  // 2) move them backward to where they belong, as available.
1236  // 3) if there is still an item in the last cleared spot, return.
1237  if (block_end == pend && nitems_in_last_block == prange_end-prange_start && prange_start != pstart && !Clearer::isClear(*(prange_start-1)))
1238  {
1239  // Find the start of the block.
1240  pointer block_start = prange_start;
1241  do
1242  {
1243  --block_start;
1244  } while (block_start != pstart && !Clearer::isClear(*(block_start-1)));
1245 
1246  UT_ASSERT_MSG(0,"FIXME: Implement this!!!");
1247  }
1248 
1249  // Move items backward until there's one that doesn't
1250  // need to be moved backward.
1251  UT_ASSERT_MSG(0,"FIXME: Implement this!!!");
1252  return end_iter; // This is probably incorrect, but just to get it to compile
1253  }
1254 #endif
1255 
1256  /// Removes all items matching key and returns
1257  /// the number of items removed.
1258  size_type erase(const Key &key)
1259  {
1260  if (!MULTI)
1261  {
1262  iterator it = find(key);
1263  if (it.atEnd())
1264  return 0;
1265  erase(it);
1266  return 1;
1267  }
1268 
1269  // NOTE: This commented out code won't work, because the 2-argument erase isn't implemented.
1270  //std::pair<iterator,iterator> its = equal_range(key);
1271  //size_type count = std::distance(its.first, its.second);
1272  //erase(its.first, its.second);
1273 
1274  size_type count = 0;
1275  for (iterator it = find(key); !it.atEnd(); it = find(key))
1276  {
1277  erase(it);
1278  }
1279  return count;
1280  }
1281 
1282  /// NOTE: std::unordered_set::hash_function() isn't static,
1283  /// but here, we're not storing a hasher as
1284  /// a data member, so it's independent of the instance.
1286  {
1287  return hasher();
1288  }
1289 
1290  /// NOTE: std::unordered_set::key_eq() isn't static,
1291  /// but here, we're not storing a key_equal as
1292  /// a data member, so it's independent of the instance.
1294  {
1295  return key_equal();
1296  }
1297 
1298  /// Returns the amount of memory used by this, excluding any memory that might be
1299  /// allocated by any of the items, themselves.
1300  int64 getMemoryUsage(bool inclusive) const
1301  {
1302  int64 mem = inclusive ? sizeof(*this) : 0;
1303  mem += sizeof(value_type)*myNumBuckets;
1304  return mem;
1305  }
1306 
1307  template<typename FUNCTOR>
1309  void forEach(FUNCTOR &&functor) const
1310  {
1312  const const_pointer pend = p + myNumBuckets;
1313  for (; p != pend; ++p)
1314  {
1315  if (!Clearer::isClear(*p))
1316  functor(*p);
1317  }
1318  }
1319 
1320 protected:
1322  {
1323  // If there are only 0-2 elements, there might as well only be
1324  // that many buckets.
1325  if (size <= 2)
1326  return size;
1327 
1328  // Simpler case if MAX_LOAD_FACTOR_256 evenly divides 256
1329  if ((256%MAX_LOAD_FACTOR_256) == 0)
1330  // +1 because otherwise it's guaranteed to be composite.
1331  return size * (256/MAX_LOAD_FACTOR_256) + 1;
1332 
1333  return (size*256 + MAX_LOAD_FACTOR_256-1)/MAX_LOAD_FACTOR_256;
1334  }
1335  pointer searchStart(const Key &key)
1336  {
1337  const size_type hash = hasher()(key);
1338  return myBuckets + (hash%myNumBuckets);
1339  }
1340  const_pointer searchStart(const Key &key) const
1341  {
1342  const size_type hash = hasher()(key);
1343  return myBuckets + (hash%myNumBuckets);
1344  }
1345 
1346  template<bool fail_on_equal=!MULTI,bool check_realloc=true>
1347  bool insertHelper(pointer pstart, size_type nbuckets, const value_type &value,pointer &destp)
1348  {
1349  UT_ASSERT_P(!Clearer::isClear(value));
1350 
1351  if (check_realloc && nbuckets == 0)
1352  {
1353  setNumBuckets(1);
1354  destp = myBuckets;
1355  return true;
1356  }
1357 
1358  const pointer pend = pstart + nbuckets;
1359  const size_type hash = hasher()(value);
1360  const size_type ideal_bucket = (hash%nbuckets);
1361  pointer init_searchp = pstart + ideal_bucket;
1362  pointer searchp = init_searchp;
1363 
1364  if (Clearer::isClear(*searchp))
1365  {
1366  // Don't bother checking MAX_LOAD_FACTOR_256 if exact match is clear,
1367  // since there's no harm in perfect hashes... unless it fills up and
1368  // people do a lot of querying keys that aren't in the set.
1369  // If that ends up being a bottleneck, feel free to change this.
1370  destp = searchp;
1371  return true;
1372  }
1373 
1374  if (fail_on_equal && key_equal()(*searchp,value))
1375  {
1376  destp = searchp;
1377  return false;
1378  }
1379 
1380  if (!fail_on_equal && check_realloc)
1381  {
1382  // We're always adding one.
1383  size_type new_size = size() + 1;
1384  if (nbuckets < minBuckets(new_size)) {
1385  bumpNumBuckets(new_size);
1386  // Delegate to version that doesn't check for realloc.
1387  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1388  }
1389  // We're not reallocating in this case.
1390  }
1391 
1392  // Find the place where our ideal bucket is less than the next
1393  // item's ideal bucket, i.e. the end of where collisions on ideal_bucket go,
1394  // in case there are multiple collision spans that have written past ideal_bucket.
1395  if ((hasher()(*searchp)%nbuckets) <= ideal_bucket)
1396  {
1397  while (true)
1398  {
1399  ++searchp;
1400  if (searchp == pend || Clearer::isClear(*searchp))
1401  break;
1402 
1403  const size_type other_hash = hasher()(*searchp);
1404  if (fail_on_equal && other_hash==hash && key_equal()(*searchp,value))
1405  {
1406  destp = searchp;
1407  return false;
1408  }
1409  if (ideal_bucket < (other_hash%nbuckets))
1410  break;
1411  }
1412  }
1413  // Known:
1414  // [init_searchp, searchp] does not contain value.
1415  // searchp is one of:
1416  // Clear
1417  // pend
1418  // idealbucket(searchp) > idealbucket(value)
1419 
1420  pointer insertp = searchp;
1421 
1422  // Find the end of the block of present items, all of
1423  // which should have idealbuckets greater than us.
1424  while (searchp != pend && !Clearer::isClear(*searchp))
1425  {
1426  ++searchp;
1427  }
1428 
1429  if (searchp != pend)
1430  {
1431  // Common case: block end is not the array end.
1432  // Implies searchp is Clear.
1433  // Since we didn't hit the end, this occupied block
1434  // was never bumped back, so we don't have to look
1435  // back for matches or proper ideal bucket span.
1436  // We can easily make room by moving everything in
1437  // [insertp, searchp) -> [insertp+1, searchp+1)
1438  // so we can write to [insertp].
1439 
1440  if (fail_on_equal && check_realloc)
1441  {
1442  // We're always adding one.
1443  size_type new_size = *((const size_type*)pend) + 1;
1444  if (nbuckets < minBuckets(new_size))
1445  {
1446  bumpNumBuckets(new_size);
1447  // Delegate to version that doesn't check for realloc.
1448  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1449  }
1450  // We're not reallocating in this case.
1451  }
1452 
1453  // Move items later by one.
1454  // std::move [insertp, searchp) to [insertp+1, searchp+1)
1455  while (searchp != insertp)
1456  {
1457  *searchp = std::move(*(searchp-1));
1458  --searchp;
1459  }
1460  destp = insertp;
1461  return true;
1462  }
1463 
1464  // Less common case: block end is the array end.
1465  // insertp may actually need to go earlier because we may
1466  // not be pointing to the end of our ideal block span.
1467  // We want insertp to refer to a location that we can update
1468  // with value and preserve the ordering of idealbuckets.
1469  if (insertp == init_searchp)
1470  {
1471  // There should always be a space somewhere if !fail_on_equal,
1472  // because reallocation was handled above.
1473  UT_ASSERT_P(insertp-1 >= pstart || (fail_on_equal && insertp == pstart));
1474  while ((!fail_on_equal || insertp != pstart) && !Clearer::isClear(*(insertp-1)))
1475  {
1476  const size_type other_hash = hasher()(*(insertp-1));
1477  if (fail_on_equal && other_hash == hash && key_equal()(*(insertp-1),value))
1478  {
1479  destp = insertp-1;
1480  return false;
1481  }
1482 
1483  // Exiting at <= rather than < means that we may be in
1484  // middle of an ideal block span.
1485  if ((other_hash%nbuckets) <= ideal_bucket)
1486  break;
1487  --insertp;
1488  UT_ASSERT_P(insertp-1 >= pstart || (fail_on_equal && insertp == pstart));
1489  }
1490  }
1491 
1492  // There are occupied buckets from init_searchp to the end,
1493  // but insertp is after init_searchp, so we don't need to look backward
1494  // ^^^^^^^^^ It isn't???
1495  // to find where to insert; we just need to shift items that
1496  // are in the range from the beginning of the occupied block to insertp
1497  // backward by 1.
1498  //
1499  // Because of the previous loop, insertp can be less than init_searchp.
1500  // In that case we want to start the blockstart at insertp. This
1501  // is both more performant, but more importantly the
1502  // in_ideal_bucket_span condition will actually make sense then.
1503  // Without this, the init_searchp might be *greater* than the
1504  // ideal bucket span and cause us to immediately stop checking.
1505 
1506  pointer blockstart = (init_searchp < insertp) ? init_searchp : insertp;
1507  if (fail_on_equal && check_realloc)
1508  {
1509  // It can happen that blockstart is pstart iff the entire
1510  // array is full, and since we've checked all items with equal hash
1511  // in that case, we know that we'll be inserting, so we need to
1512  // reallocate.
1513  if (blockstart == pstart)
1514  {
1515  size_type new_size = *((const size_type*)pend) + 1;
1516  bumpNumBuckets(new_size);
1517  // Delegate to version that doesn't check for realloc.
1518  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1519  }
1520  }
1521 
1522  // NOTE: blockstart shouldn't hit pstart, because myNumBuckets > size()
1523  UT_ASSERT_P(blockstart-1 >= pstart);
1524  bool in_ideal_bucket_span = true;
1525  bool checked_realloc = false;
1526  while (!Clearer::isClear(*(blockstart-1)))
1527  {
1528  --blockstart;
1529  // Note that we might be in the middle of a span of equal initial_bucket
1530  // values, so an equal item might be earlier than init_searchp.
1531  if (fail_on_equal && in_ideal_bucket_span)
1532  {
1533  const size_type other_hash = hasher()(*blockstart);
1534  if ((other_hash == hash) && key_equal()(*blockstart,value))
1535  {
1536  destp = blockstart;
1537  return false;
1538  }
1539  in_ideal_bucket_span = ((other_hash%nbuckets) == ideal_bucket);
1540  }
1541  if (fail_on_equal && check_realloc && !checked_realloc)
1542  {
1543  if (!in_ideal_bucket_span)
1544  {
1545  // We're always adding one, since items can't be equal outside
1546  // the equal ideal_bucket span. We only need to check for
1547  // reallocation once, since if there's at least one space free,
1548  // blockstart can't end up equalling pstart.
1549  size_type new_size = *((const size_type*)pend) + 1;
1550  if (nbuckets < minBuckets(new_size))
1551  {
1552  bumpNumBuckets(new_size);
1553  // Delegate to version that doesn't check for realloc.
1554  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1555  }
1556  // We're not reallocating in this case.
1557  checked_realloc = true;
1558  }
1559  // It can happen that blockstart is pstart iff the entire
1560  // array is full, and since we've checked all items with equal hash
1561  // in that case, we know that we'll be inserting, so we need to reallocate.
1562  else if (blockstart == pstart)
1563  {
1564  size_type new_size = *((const size_type*)pend) + 1;
1565  bumpNumBuckets(new_size);
1566  // Delegate to version that doesn't check for realloc.
1567  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1568  }
1569  }
1570  UT_ASSERT_P(blockstart-1 >= pstart);
1571  }
1572 
1573  if (fail_on_equal && check_realloc && !checked_realloc)
1574  {
1575  // We're always adding one.
1576  size_type new_size = *((const size_type*)pend) + 1;
1577  if (nbuckets < minBuckets(new_size))
1578  {
1579  bumpNumBuckets(new_size);
1580  // Delegate to version that doesn't check for realloc.
1581  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1582  }
1583  // We're not reallocating in this case.
1584  }
1585 
1586  // Move items earlier by one.
1587  while (blockstart != insertp)
1588  {
1589  *(blockstart-1) = std::move(*blockstart);
1590  ++blockstart;
1591  }
1592  destp = insertp-1;
1593  return true;
1594  }
1595 
1596 protected:
1599 };
1600 
1601 template<
1602  typename Key,
1603  bool MULTI,
1604  std::size_t MAX_LOAD_FACTOR_256,
1605  typename Clearer,
1606  class Hash,
1607  class KeyEqual
1608 >
1609 struct DefaultClearer<ArraySet<Key,MULTI,MAX_LOAD_FACTOR_256,Clearer,Hash,KeyEqual> >
1610 {
1613  static void clear(TheType&v) { v.destroy(); }
1615  static bool isClear(TheType &v) { return v.bucket_count() == 0; }
1617  static void clearConstruct(TheType *p) { new((void *)p) TheType(); }
1618 
1619  static const bool clearNeedsDestruction = false;
1620 };
1621 
1622 } // namespace UT
1623 
1624 template<typename Key,
1625  bool MULTI = false,
1626  int MAX_LOAD_FACTOR_256 = 128,
1627  typename Clearer = UT::DefaultClearer<Key>,
1628  class Hash = hboost::hash<Key>,
1629  class KeyEqual = std::equal_to<Key> >
1631 
1632 namespace std {
1633  template<typename Key,
1634  bool MULTI,
1635  int MAX_LOAD_FACTOR_256,
1636  typename Clearer,
1637  class Hash,
1638  class KeyEqual>
1639  void swap(
1642  {
1643  a.swap(b);
1644  }
1645 }
1646 
1647 #endif
float load_factor() const
Returns the current portion of buckets that are occupied.
Definition: UT_ArraySet.h:445
const_iterator find(const Key &key) const
Definition: UT_ArraySet.h:725
#define SYSmax(a, b)
Definition: SYS_Math.h:1570
const_pointer searchStart(const Key &key) const
Definition: UT_ArraySet.h:1340
pointer myBuckets
Definition: UT_ArraySet.h:1597
static void clearConstruct(bool *p)
Definition: UT_ArraySet.h:106
ArraySet(const set_type &that)
Definition: UT_ArraySet.h:208
ArraySet(size_type init_bucket_count)
Definition: UT_ArraySet.h:189
UT_ASSERT_COMPILETIME(BRAY_EVENT_MAXFLAGS<=32)
std::ptrdiff_t difference_type
Definition: UT_ArraySet.h:558
void clear()
Definition: UT_ArraySet.h:369
std::pair< iterator, bool > insert(const value_type &value)
Definition: UT_ArraySet.h:972
size_type size() const
Returns the number of items in the set.
Definition: UT_ArraySet.h:355
UT_API exint UTbumpAllocToPrime(exint current_size)
void setNumBuckets(size_type new_num_buckets)
Definition: UT_ArraySet.h:477
static const bool clearNeedsDestruction
Definition: UT_ArraySet.h:79
STATIC_INLINE size_t Hash(const char *s, size_t len)
Definition: farmhash.h:2038
void swap(UT::ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > &a, UT::ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > &b)
Definition: UT_ArraySet.h:1639
const GLdouble * v
Definition: glcorearb.h:837
pointer getCurrent() const
Definition: UT_ArraySet.h:627
GLsizei const GLfloat * value
Definition: glcorearb.h:824
static void clearConstruct(T *p)
Definition: UT_ArraySet.h:77
static size_type max_bucket_count()
Definition: UT_ArraySet.h:416
std::pair< iterator, bool > insert(value_type &&value)
Definition: UT_ArraySet.h:983
static void clear(S *&v)
Definition: UT_ArraySet.h:38
static void clear(bool &v)
Definition: UT_ArraySet.h:104
constexpr bool SYSisNan(const F f)
Definition: SYS_Math.h:184
set_type & operator=(set_type &&that)
Definition: UT_ArraySet.h:294
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
bool empty() const
Returns true iff there are no items in the set.
Definition: UT_ArraySet.h:349
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
iterator end()
Returns a non-const end iterator for the set.
Definition: UT_ArraySet.h:676
iterator_t & operator=(const iterator_t &)=default
#define UT_ASSERT_MSG_P(ZZ,...)
Definition: UT_Assert.h:158
std::integral_constant< bool, std::numeric_limits< T >::is_signed||std::is_same< T, int128_t >::value > is_signed
Definition: format.h:821
static bool isClear(bool v)
Definition: UT_ArraySet.h:105
std::ptrdiff_t difference_type
Definition: UT_ArraySet.h:174
bool insertHelper(pointer pstart, size_type nbuckets, const value_type &value, pointer &destp)
Definition: UT_ArraySet.h:1347
static bool isClear(const std::pair< S0, S1 > &v)
Definition: UT_ArraySet.h:119
ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > set_type
Definition: UT_ArraySet.h:170
#define UT_ASSERT_MSG(ZZ,...)
Definition: UT_Assert.h:159
iterator erase(iterator iter)
Definition: UT_ArraySet.h:1049
void insert(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input)
Inserts all of the items in the range [start_input,end_input).
Definition: UT_ArraySet.h:998
iterator_t(pointer current, const_pointer end, bool scan=true)
Definition: UT_ArraySet.h:564
std::size_t size_type
Definition: UT_ArraySet.h:173
static size_type minBuckets(size_type size)
Definition: UT_ArraySet.h:1321
reference operator*() const
Definition: UT_ArraySet.h:612
IMATH_NAMESPACE::V2f float
static void clearConstruct(std::pair< S0, S1 > *p)
Definition: UT_ArraySet.h:123
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:155
void destroy()
Removes all items from the set and frees all the memory.
Definition: UT_ArraySet.h:386
static bool isClear(S *v)
Definition: UT_ArraySet.h:39
ArraySet(std::initializer_list< value_type > init, size_type init_bucket_count=0)
Definition: UT_ArraySet.h:243
GLuint GLuint end
Definition: glcorearb.h:475
iterator begin()
Returns a non-const iterator for the beginning of the set.
Definition: UT_ArraySet.h:661
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
Set iterator class.
Definition: UT_ArraySet.h:554
size_type myNumBuckets
Definition: UT_ArraySet.h:1598
long long int64
Definition: SYS_Types.h:116
set_type & operator=(const set_type &that)
Definition: UT_ArraySet.h:260
bool containsAll(const S &src) const
Definition: UT_ArraySet.h:825
set_type & operator=(std::initializer_list< value_type > init)
Definition: UT_ArraySet.h:287
static key_equal key_eq()
Definition: UT_ArraySet.h:1293
static void clearConstruct(S **p)
Definition: UT_ArraySet.h:40
void bumpNumBuckets(size_type new_num_items)
Definition: UT_ArraySet.h:539
const_iterator cbegin() const
Returns a const iterator for the beginning of the set.
Definition: UT_ArraySet.h:666
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
const_iterator cend() const
Returns a const end iterator for the set.
Definition: UT_ArraySet.h:682
iterator find(const Key &key)
Definition: UT_ArraySet.h:697
num_items
Definition: UT_RTreeImpl.h:672
std::conditional< constant_type, const value_type, value_type >::type & reference
Definition: UT_ArraySet.h:560
std::forward_iterator_tag iterator_category
Definition: UT_ArraySet.h:557
int64 getMemoryUsage(bool inclusive) const
Definition: UT_ArraySet.h:1300
ArraySet(set_type &&that)
Move constructor, destroying the source.
Definition: UT_ArraySet.h:198
size_type count(const Key &key) const
Definition: UT_ArraySet.h:757
GLsizeiptr size
Definition: glcorearb.h:664
bool operator!=(const set_type &that) const
Definition: UT_ArraySet.h:327
value_type & reference
Definition: UT_ArraySet.h:177
size_type bucket_size(size_type i) const
Definition: UT_ArraySet.h:424
GLenum void ** pointer
Definition: glcorearb.h:810
std::pair< iterator, bool > emplace(Args &&...args)
Definition: UT_ArraySet.h:1035
std::conditional< constant_type, const typename ArraySet::value_type, typename ArraySet::value_type >::type value_type
Definition: UT_ArraySet.h:559
SYS_FORCE_INLINE void forEach(FUNCTOR &&functor) const
Definition: UT_ArraySet.h:1309
void insert(std::initializer_list< value_type > list)
Inserts all of the items from the initializer_list.
Definition: UT_ArraySet.h:1024
const value_type * const_pointer
Definition: UT_ArraySet.h:180
const_pointer getEnd() const
Definition: UT_ArraySet.h:632
std::pair< const_iterator, const_iterator > equal_range(const Key &key) const
Definition: UT_ArraySet.h:834
static bool isClear(T v)
Definition: UT_ArraySet.h:65
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
iterator_t< constant_type > & operator++()
Definition: UT_ArraySet.h:595
void swap(set_type &that)
Swaps another set with this one.
Definition: UT_ArraySet.h:333
const value_type & const_reference
Definition: UT_ArraySet.h:178
pointer operator->() const
Definition: UT_ArraySet.h:617
**If you just want to fire and args
Definition: thread.h:609
static void clear(std::pair< S0, S1 > &v)
Definition: UT_ArraySet.h:114
static size_type max_size()
Definition: UT_ArraySet.h:362
void rehash(size_type new_num_buckets)
Definition: UT_ArraySet.h:453
std::pair< iterator, iterator > equal_range(const Key &key)
Definition: UT_ArraySet.h:899
static float max_load_factor()
Definition: UT_ArraySet.h:439
std::conditional< constant_type, typename set_type::const_pointer, typename set_type::pointer >::type pointer
Definition: UT_ArraySet.h:561
Definition: core.h:1131
static hasher hash_function()
Definition: UT_ArraySet.h:1285
static void clear(T &v)
Definition: UT_ArraySet.h:48
const_iterator begin() const
Returns a const iterator for the beginning of the set.
Definition: UT_ArraySet.h:671
iterator_t< false > iterator
Iterator type for iterating over non-constant elements.
Definition: UT_ArraySet.h:644
SIM_API const UT_StringHolder distance
bool operator!=(const iterator_t &that) const
Definition: UT_ArraySet.h:586
bool contains(const Key &key) const
Definition: UT_ArraySet.h:796
size_type bucket_count() const
Returns the current number of buckets.
Definition: UT_ArraySet.h:409
type
Definition: core.h:1059
KeyEqual key_equal
Definition: UT_ArraySet.h:176
pointer searchStart(const Key &key)
Definition: UT_ArraySet.h:1335
const_iterator(const iterator &non_const_it)
Definition: UT_ArraySet.h:655
bool operator==(const iterator_t &that) const
Definition: UT_ArraySet.h:581
size_type erase(const Key &key)
Definition: UT_ArraySet.h:1258
ArraySet(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input, size_type init_bucket_count=0)
Inserts all of the items in the range [start_input,end_input).
Definition: UT_ArraySet.h:217
GLint GLsizei count
Definition: glcorearb.h:405
Clearer clearer_type
Definition: UT_ArraySet.h:181
const_iterator end() const
Returns a const end iterator for the set.
Definition: UT_ArraySet.h:688
bool operator==(const set_type &that) const
Definition: UT_ArraySet.h:304
GLenum src
Definition: glcorearb.h:1793
void reserve(size_type num_items)
Definition: UT_ArraySet.h:467
value_type * pointer
Definition: UT_ArraySet.h:179
PcpNodeRef_ChildrenIterator begin(const PcpNodeRef::child_const_range &r)
Support for range-based for loops for PcpNodeRef children ranges.
Definition: node.h:558