chunkedseq
container library for large in-memory data sets
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
chunk.hpp
Go to the documentation of this file.
1 
13 #include "itemsearch.hpp"
14 #include "annotation.hpp"
15 
16 #ifndef _PASL_DATA_CHUNK_H_
17 #define _PASL_DATA_CHUNK_H_
18 
19 namespace pasl {
20 namespace data {
21 namespace chunkedseq {
22 
23 /***********************************************************************/
24 
25 /*---------------------------------------------------------------------*/
26 // Carrier for a deallocation function that calls "delete" on its argument
27 
29 public:
30  static constexpr bool should_use = true;
31  template <class Item>
32  static void dealloc(Item* x) {
33  delete x;
34  }
35 };
36 
37 class Dummy_pointer_deleter { // does nothing
38 public:
39  static constexpr bool should_use = false;
40  template <class Item>
41  static void dealloc(Item x) {
42  assert(false);
43  }
44 };
45 
46 /*---------------------------------------------------------------------*/
47 // Carrier for a deep-copy function that calls the copy constructor on its argument
48 
50 public:
51  static constexpr bool should_use = true;
52  template <class Item>
53  static Item* copy(Item* x) {
54  return new Item(*x);
55  };
56 };
57 
58 class Dummy_pointer_deep_copier { // does nothing
59 public:
60  static constexpr bool should_use = false;
61  template <class Item>
62  static Item copy(Item x) {
63  assert(false);
64  return x;
65  };
66 };
67 
68 /*---------------------------------------------------------------------*/
85 template <
86  class Fixed_capacity_queue,
87  class Cached_measure,
88  class Annotation=annotation::annotation_builder<>,
89  class Pointer_deleter1=Dummy_pointer_deleter,
90  class Pointer_deep_copier1=Dummy_pointer_deep_copier,
91  class Size_access=itemsearch::no_size_access
92  >
93 class chunk {
94 public:
95 
96  /*---------------------------------------------------------------------*/
99  using queue_type = Fixed_capacity_queue;
103  using const_reference = const value_type&;
104  using pointer = value_type*;
105  using const_pointer = const value_type*;
106  using segment_type = typename queue_type::segment_type;
108 
109  /*---------------------------------------------------------------------*/
112  using cache_type = Cached_measure;
114  using measured_type = typename cache_type::measured_type;
115  using algebra_type = typename cache_type::algebra_type;
116  using measure_type = typename cache_type::measure_type;
118 
119  /*---------------------------------------------------------------------*/
122  using size_type = typename cache_type::size_type;
124  using annotation_type = Annotation;
127 
129  static constexpr int capacity = queue_type::capacity;
130 
137 
139 
140 private:
141 
142  /* Note:
143  * Our combine operator is not necessarily commutative.
144  * as such, we need to be careful to get the order of the
145  * operands right when we increment on the front and back.
146  */
147 
148  inline void incr_front(const measured_type& m) {
149  cached = algebra_type::combine(m, cached);
150  }
151 
152  inline void incr_back(const measured_type& m) {
153  cached = algebra_type::combine(cached, m);
154  }
155 
156  inline void decr_front(const measured_type& m) {
157  incr_front(algebra_type::inverse(m));
158  }
159 
160  inline void decr_back(const measured_type& m) {
161  incr_back(algebra_type::inverse(m));
162  }
163 
164  inline measured_type measure_range(const measure_type& meas, size_type lo, size_type hi) const {
165  size_type nb = hi - lo;
166  size_type sz = size();
167  assert(nb <= sz);
168  measured_type res = algebra_type::identity();
169  items.for_each_segment(int(lo), int(hi), [&] (const_pointer lo, const_pointer hi) {
170  res = algebra_type::combine(res, meas(lo, hi));
171  });
172  return res;
173  }
174 
175  inline measured_type measure(const measure_type& meas) const {
176  return measure_range(meas, 0, size());
177  }
178 
180 
181  /* increments the cached measurement by the combined measurements
182  * of the items at positions [0, hi]
183  */
184  inline void incr_frontn(const measure_type& meas, size_type hi) {
185  incr_front(measure_range(meas, 0, hi));
186  }
187 
188  /* increments the cached measurement by the combined measurements
189  * of the items at positions [lo, size()]
190  */
191  inline void incr_backn(const measure_type& meas, size_type lo) {
192  incr_back(measure_range(meas, lo, size()));
193  }
194 
195  inline void decr_frontn(const measure_type& meas, size_type hi) {
196  decr_front(measure_range(meas, 0, hi));
197  }
198 
199  inline void decr_backn(const measure_type& meas, size_type lo) {
200  decr_back(measure_range(meas, lo, size()));
201  }
202 
203 public:
204 
205  /*---------------------------------------------------------------------*/
208  chunk() {
210  cached = algebra_type::identity();
211  }
212 
213  chunk(const self_type& other)
214  : cached(other.cached), items(other.items), annotation(other.annotation) {
215  if (Pointer_deep_copier1::should_use)
216  for_each([&] (value_type& v) {
218  });
219  }
220 
221  ~chunk() {
222  if (Pointer_deleter1::should_use)
223  for_each([&] (value_type& v) {
224  Pointer_deleter1::dealloc(v);
225  });
226  }
228 
229  /*---------------------------------------------------------------------*/
232  bool full() const {
234  return items.full();
235  }
236 
237  bool empty() const {
238  return items.empty();
239  }
240 
241  bool partial() const {
242  return items.partial();
243  }
244 
245  size_type size() const {
246  return items.size();
247  }
249 
250  /*---------------------------------------------------------------------*/
253  measured_type get_cached() const {
255  return cached;
256  }
258 
259  /*---------------------------------------------------------------------*/
262  value_type& front() const {
264  return items.front();
265  }
266 
267  value_type& back() const {
268  return items.back();
269  }
270 
271  void frontn(value_type* xs, size_type nb) {
272  items.frontn(xs, (int)nb);
273  }
274 
275  void backn(value_type* xs, size_type nb) {
276  items.backn(xs, (int)nb);
277  }
278 
280  return items[ix];
281  }
282 
283  template <class Body>
284  void for_each(const Body& body) const {
285  items.for_each(body);
286  }
287 
288  template <class Body>
289  void for_each_segment(const Body& body) const {
290  for_each_segment(size_type(0), size(), body);
291  }
292 
293  // lo inclusive; hi exclusive
294  template <class Body>
295  void for_each_segment(size_type lo, size_type hi, const Body& body) const {
296  items.for_each_segment(int(lo), int(hi), body);
297  }
298 
300  return items.segment_by_index(int(i));
301  }
302 
304  return items.index_of_pointer(p);
305  }
307 
308  /*---------------------------------------------------------------------*/
311  void push_front(const measure_type& meas, const value_type& x) {
313  items.push_front(x);
314  incr_front(meas(x));
315  }
316 
317  void push_back(const measure_type& meas, const value_type& x) {
318  items.push_back(x);
319  incr_back(meas(x));
320  }
321 
323  value_type v = front();
324  if (algebra_type::has_inverse)
325  decr_front(meas(v));
326  items.pop_front();
327  if (! algebra_type::has_inverse)
328  reset_cache(meas);
329  return v;
330  }
331 
333  value_type v = back();
334  if (algebra_type::has_inverse)
335  decr_back(meas(v));
336  items.pop_back();
337  if (! algebra_type::has_inverse)
338  reset_cache(meas);
339  return v;
340  }
341 
342  /* TODO: would this code be more efficient when has_inverse is true?
343  value_type pop_front(const measure_type& meas) {
344  value_type v = items.pop_front();
345  if (algebra_type::has_inverse)
346  decr_front(meas(v));
347  if (! algebra_type::has_inverse)
348  reset_cache(meas);
349  return v;
350  }
351 
352  value_type pop_back(const measure_type& meas) {
353  value_type v = items.pop_back();
354  if (algebra_type::has_inverse)
355  decr_back(meas(v));
356  if (! algebra_type::has_inverse)
357  reset_cache(meas);
358  return v;
359  }
360  */
361 
362  void pushn_front(const measure_type& meas, const value_type* xs, size_type nb) {
363  items.pushn_front(xs, (int)nb);
364  incr_frontn(meas, nb);
365  }
366 
367  void pushn_back(const measure_type& meas, const value_type* xs, size_type nb) {
368  size_type nb_before = size();
369  items.pushn_back(xs, (int)nb);
370  incr_backn(meas, int(nb_before));
371  }
372 
373  template <class Body>
374  void pushn_back(const measure_type& meas, const Body& body, size_type nb) {
375  size_type nb_before = size();
376  items.pushn_back(body, (int)nb);
377  incr_backn(meas, int(nb_before));
378  }
379 
380  void popn_front(const measure_type& meas, size_type nb) {
381  if (algebra_type::has_inverse)
382  decr_frontn(meas, nb);
383  items.popn_front(int(nb));
384  if (! algebra_type::has_inverse)
385  reset_cache(meas);
386  }
387 
388  void popn_back(const measure_type& meas, size_type nb) {
389  if (algebra_type::has_inverse) {
390  size_type nb_before = size() - nb;
391  decr_backn(meas, nb_before);
392  }
393  items.popn_back((int)nb);
394  if (! algebra_type::has_inverse)
395  reset_cache(meas);
396  }
397 
398  void popn_front(const measure_type& meas, value_type* xs, size_type nb) {
399  check_cached(meas);
400  if (algebra_type::has_inverse)
401  decr_frontn(meas, nb);
402  items.popn_front(xs, (int)nb);
403  if (! algebra_type::has_inverse)
404  reset_cache(meas);
405  check_cached(meas);
406  }
407 
408  void popn_back(const measure_type& meas, value_type* xs, size_type nb) {
409  if (algebra_type::has_inverse) {
410  size_type nb_before = size() - nb;
411  decr_backn(meas, nb_before);
412  }
413  items.popn_back(xs, (int)nb);
414  if (! algebra_type::has_inverse)
415  reset_cache(meas);
416  }
417 
418  template <class Consumer, bool should_consume>
419  void popn_back(const measure_type& meas, const Consumer& cons, size_type nb) {
420  size_type sz = size();
421  assert(nb <= sz);
422  if (should_consume && sz > 0 && nb > 0) {
423  size_type i = sz - nb;
425  size_type sz_seg = seg.end - seg.middle;
426  if (sz_seg == nb) {
427  cons(seg.middle, seg.middle + nb);
428  } else { // handle wraparound
429  segment_type seg2 = segment_by_index(i + sz_seg);
430  cons(seg2.middle, seg2.middle + (nb - sz_seg));
431  cons(seg.middle, seg.middle + sz_seg);
432  }
433  }
434  popn_back(meas, nb);
435  }
436 
437  template <class Consumer, bool should_consume>
438  void popn_front(const measure_type& meas, const Consumer& cons, size_type nb) {
439  size_type sz = size();
440  assert(nb <= sz);
441  if (should_consume && sz > 0 && nb > 0) {
442  size_type i = nb - 1;
444  size_type sz_seg = seg.middle - seg.begin + 1;
445  if (sz_seg == nb) {
446  cons(seg.begin, seg.middle + 1);
447  } else { // handle wraparound
448  segment_type seg2 = segment_by_index(0);
449  cons(seg2.begin, seg2.end);
450  cons(seg.begin, seg.middle + 1);
451  }
452  }
453  popn_front(meas, nb);
454  }
455 
456  void transfer_from_back_to_front(const measure_type& meas, chunk& target, size_type nb) {
457  size_type sz = size();
458  assert(sz >= nb);
459  measured_type delta = measure_range(meas, sz - nb, sz);
460  if (algebra_type::has_inverse)
461  decr_back(delta);
462  items.transfer_from_back_to_front(target.items, (int)nb);
463  if (! algebra_type::has_inverse)
464  reset_cache(meas);
465  target.incr_front(delta);
466  }
467 
468  void transfer_from_front_to_back(const measure_type& meas, chunk& target, size_type nb) {
469  measured_type delta = measure_range(meas, size_type(0), nb);
470  if (algebra_type::has_inverse)
471  decr_front(delta);
472  items.transfer_from_front_to_back(target.items, (int)nb);
473  if (! algebra_type::has_inverse)
474  reset_cache(meas);
475  target.incr_back(delta);
476  }
477 
478  /* 3-way split: place in `x` the first item that reaches the target measure,
479  * if there is such an item.
480  *
481  * write in `found` whether or not there is such an item.
482  */
483  template <class Pred, class Search, class Search_measure>
484  typename Search::measured_type split(const measure_type& meas,
485  const Pred& p,
486  const Search& search,
487  const Search_measure& search_meas,
488  typename Search::measured_type prefix,
489  bool& found,
490  value_type& x,
491  self_type& other) {
492  auto res = search(*this, search_meas, prefix, p);
493  size_type pos = res.position; // one-based index pointing at the target item
494  prefix = res.prefix;
495  size_type sz = size();
496  assert(sz > 0);
497  assert(pos > 0);
498  found = pos < sz+1;
499  if (pos == 1) {
500  x = pop_front(meas);
501  swap(other);
502  return prefix;
503  }
504  if (pos == sz) {
505  x = pop_back(meas);
506  return prefix;
507  }
508  transfer_from_back_to_front(meas, other, sz - pos);
509  x = pop_back(meas);
510  return prefix;
511  }
512 
513  /* same as above but requires that there is an item in the chunk that reaches
514  * the target measure
515  */
516  template <class Pred, class Search, class Search_measure>
517  typename Search::measured_type split(const measure_type& meas,
518  const Pred& p,
519  const Search& search,
520  const Search_measure& search_meas,
521  typename Search::measured_type prefix,
522  value_type& x,
523  self_type& other) {
524  bool found;
525  prefix = split(meas, p, search, search_meas, prefix, found, x, other);
526  assert(found);
527  return prefix;
528  }
529 
530  // 2-way split: based on a 3-way split ==> requires a predicate valid for a 3-way split
531  template <class Pred, class Search, class Search_measure>
532  typename Search::measured_type split(const measure_type& meas,
533  const Pred& p,
534  const Search& search,
535  const Search_measure& search_meas,
536  typename Search::measured_type prefix,
537  self_type& other) {
538  value_type x;
539  prefix = split(meas, p, search, search_meas, prefix, x, other);
540  other.push_front(meas, x);
541  return prefix;
542  }
543 
544  template <class Pred>
546  const Pred& p,
547  measured_type prefix,
548  value_type& x,
549  self_type& other) {
550  search_type search;
551  return split(meas, p, search, meas, prefix, x, other);
552  }
553 
554  template <class Pred>
556  const Pred& p,
557  measured_type prefix,
558  self_type& other) {
559  search_type search;
560  return split(meas, p, search, meas, prefix, other);
561  }
562 
563  void concat(const measure_type& meas, self_type& other) {
564  other.transfer_from_front_to_back(meas, *this, other.size());
565  }
566 
567  void clear() {
568  items.popn_back(int(size()));
569  cached = algebra_type::identity();
570  }
571 
572  void swap(self_type& other) {
573  items.swap(other.items);
574  cache_type::swap(cached, other.cached);
575  annotation.swap(other.annotation);
576  }
577 
578  void reset_cache(const measure_type& meas) {
579  cached = measure(meas);
580  }
582 
583  void check_cached(const measure_type& meas) const {
584 #ifdef DEBUG_CHUNK
585  if (Size_access::enable_index_optimization)
586  assert(Size_access::csize(measure(meas)) == Size_access::csize(cached));
587 #endif
588  }
589 
590 };
591 
592 /***********************************************************************/
593 
594 } // end namespace
595 } // end namespace
596 } // end namespace
597 
598 #endif
void popn_front(const measure_type &meas, const Consumer &cons, size_type nb)
Definition: chunk.hpp:438
size_type index_of_pointer(const value_type *p) const
Definition: chunk.hpp:303
value_type & front() const
Definition: chunk.hpp:263
void for_each(const Body &body) const
Definition: chunk.hpp:284
value_type pop_back(const measure_type &meas)
Definition: chunk.hpp:332
queue_type items
queue structure to contain the items of the chunk
Definition: chunk.hpp:132
value_type & back() const
Definition: chunk.hpp:267
Search::measured_type split(const measure_type &meas, const Pred &p, const Search &search, const Search_measure &search_meas, typename Search::measured_type prefix, bool &found, value_type &x, self_type &other)
Definition: chunk.hpp:484
void reset_cache(const measure_type &meas)
Definition: chunk.hpp:578
value_type pop_front(const measure_type &meas)
Definition: chunk.hpp:322
void for_each_segment(const Body &body) const
Definition: chunk.hpp:289
value_type & operator[](size_type ix) const
Definition: chunk.hpp:279
void popn_front(const measure_type &meas, value_type *xs, size_type nb)
Definition: chunk.hpp:398
void popn_back(const measure_type &meas, value_type *xs, size_type nb)
Definition: chunk.hpp:408
Fixed-capacity array along with cached measurement of array contents.
Definition: chunk.hpp:93
Search::measured_type split(const measure_type &meas, const Pred &p, const Search &search, const Search_measure &search_meas, typename Search::measured_type prefix, value_type &x, self_type &other)
Definition: chunk.hpp:517
Defines an interface for attaching data to nodes in the underlying tree structure of the chunked sequ...
static void dealloc(Item *x)
Definition: chunk.hpp:32
chunk(const self_type &other)
Definition: chunk.hpp:213
void for_each_segment(size_type lo, size_type hi, const Body &body) const
Definition: chunk.hpp:295
void copy(typename Alloc::pointer destination, typename Alloc::const_pointer source, typename Alloc::size_type num)
Polymorphic array copy.
void swap(self_type &other)
Definition: chunk.hpp:572
void transfer_from_front_to_back(const measure_type &meas, chunk &target, size_type nb)
Definition: chunk.hpp:468
void popn_back(const measure_type &meas, const Consumer &cons, size_type nb)
Definition: chunk.hpp:419
void popn_back(const measure_type &meas, size_type nb)
Definition: chunk.hpp:388
Definition: algebra.hpp:18
size_type size() const
Definition: chunk.hpp:245
measured_type split(const measure_type &meas, const Pred &p, measured_type prefix, value_type &x, self_type &other)
Definition: chunk.hpp:545
measured_type split(const measure_type &meas, const Pred &p, measured_type prefix, self_type &other)
Definition: chunk.hpp:555
void popn_front(const measure_type &meas, size_type nb)
Definition: chunk.hpp:380
static constexpr bool should_use
Definition: chunk.hpp:30
Routines for finding an item in a container via binary search.
result_t res
Definition: bench.cpp:51
segment_type segment_by_index(size_type i) const
Definition: chunk.hpp:299
void check_cached(const measure_type &meas) const
Definition: chunk.hpp:583
void push_front(const measure_type &meas, const value_type &x)
Definition: chunk.hpp:312
void pushn_back(const measure_type &meas, const value_type *xs, size_type nb)
Definition: chunk.hpp:367
Search::measured_type split(const measure_type &meas, const Pred &p, const Search &search, const Search_measure &search_meas, typename Search::measured_type prefix, self_type &other)
Definition: chunk.hpp:532
void transfer_from_back_to_front(const measure_type &meas, chunk &target, size_type nb)
Definition: chunk.hpp:456
void pushn_back(const measure_type &meas, const Body &body, size_type nb)
Definition: chunk.hpp:374
void concat(const measure_type &meas, self_type &other)
Definition: chunk.hpp:563
measured_type get_cached() const
Definition: chunk.hpp:254
Fixed_capacity_queue queue_type
Definition: chunk.hpp:100
void frontn(value_type *xs, size_type nb)
Definition: chunk.hpp:271
measured_type cached
cached measurement of the items contained in the chunk
Definition: chunk.hpp:134
annotation_type annotation
annotation value to be attached to the chunk
Definition: chunk.hpp:136
void pushn_front(const measure_type &meas, const value_type *xs, size_type nb)
Definition: chunk.hpp:362
void push_back(const measure_type &meas, const value_type &x)
Definition: chunk.hpp:317
static constexpr int capacity
capacity in number of items
Definition: chunk.hpp:129
bytes_8 Item
Definition: do_fifo.cpp:107
void backn(value_type *xs, size_type nb)
Definition: chunk.hpp:275