chunkedseq
container library for large in-memory data sets
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
itemsearch.hpp
Go to the documentation of this file.
1 
13 #include "segment.hpp"
14 
15 #ifndef _PASL_DATA_ITEMSEARCH_H_
16 #define _PASL_DATA_ITEMSEARCH_H_
17 
18 namespace pasl {
19 namespace data {
20 namespace chunkedseq {
21 namespace itemsearch {
22 
23 /***********************************************************************/
24 
25 /*---------------------------------------------------------------------*/
26 /* Compare-by-position (i.e., one-based index)
27  *
28  * The purpose of this operator is to enable optimizations in item
29  * search that are specific to indexing (i.e., finding an item
30  * quickly by using just pointer arithmetic to locate its position in
31  * the sequence).
32  */
33 
34 template <class Comparable>
35 class less_than {
36 public:
37  bool operator()(Comparable x, Comparable y) {
38  return x < y;
39  }
40 };
41 
42 template <class Measured, class Position, class Measured_fields, class Comparator>
44 private:
45 
46  Position pos;
47 
48 public:
49 
50  compare_measured_by_position(Position pos) : pos(pos) { }
51 
52  bool operator()(Measured m) const {
53  return Comparator()(pos, Measured_fields::csize(m));
54  }
55 
56  Position get_position() const {
57  return pos;
58  }
59 
60 };
61 
62 /* \brief Predicate constructor to define zero-based indexing scheme
63  *
64  * Example use:
65  *
66  * less_than_by_position p(5); // p(i) = 5 < i
67  *
68  * p(1) p(2) p(3) p(4) p(5) p(6) p(7)
69  * f f f f f t t
70  */
71 template <class Measured, class Position, class Measured_fields>
74 
75 /*---------------------------------------------------------------------*/
76 /* Search result type */
77 
84 template <class Position, class Measured>
86 public:
87  Position position;
88  Measured prefix;
90  search_result(Position position, Measured prefix)
91  : position(position), prefix(prefix) { }
92 };
93 
94 /*---------------------------------------------------------------------*/
95 /* Linear search for an item in a segment */
96 
97 template <class Item, class Algebra>
99 public:
100 
101  using value_type = Item;
102  using const_pointer = const value_type*;
104 
105  using algebra_type = Algebra;
107 
109 
110  template <class Pred, class Measure>
111  static result_type search_by(segment_type seg, const Measure& meas, measured_type prefix,
112  const Pred& p) {
113  measured_type new_prefix = prefix;
114  const_pointer ptr;
115  for (ptr = seg.middle; ptr != seg.end; ptr++) {
116  measured_type m = algebra_type::combine(new_prefix, meas(*ptr));
117  if (p(m))
118  break; // found it
119  new_prefix = m;
120  }
121  return result_type(ptr, new_prefix);
122  }
123 
124 };
125 
126 /*---------------------------------------------------------------------*/
127 /* Linear search for an item in a fixed-capacity queue */
128 
130 public:
131  static constexpr bool enable_index_optimization = false;
132 
133  template <class Measured>
134  static size_t size(Measured& m) {
135  assert(false);
136  return 0;
137  }
138  template <class Measured>
139  static size_t csize(Measured m) {
140  assert(false);
141  return 0;
142  }
143 
144 };
145 
146 template <class Fixedcapacity_queue, class Algebra, class Size_access=no_size_access>
148 public:
149 
150  using queue_type = Fixedcapacity_queue;
151  using size_type = size_t;
153  using pointer = value_type*;
154 
155  using algebra_type = Algebra;
157 
159 
160 private:
161 
163  using search_in_segment_result_type = typename search_in_segment_type::result_type;
164  using segment_type = typename search_in_segment_type::segment_type;
165 
166  segment_type segment_by_index(const queue_type& items, size_type i) const {
167  return make_const_segment(items.segment_by_index((int)i));
168  }
169 
170 public:
171 
172  template <class Pred, class Measure>
173  result_type operator()(const queue_type& items, const Measure& meas, measured_type prefix,
174  const Pred& p) const {
175 
176  /* reference version
177  measured_type new_prefix = prefix;
178  size_t nb = items.size();
179  size_t i;
180  for (i = 0; i < nb; i++) {
181  measured_type m = algebra_type::combine(new_prefix, meas(items[i]));
182  if (p(m))
183  break; // found it
184  new_prefix = m;
185  }
186  return result_type(i+1, new_prefix);
187  */
188 
189  size_type sz = size_type(items.size());
190  segment_type seg1 = segment_by_index(items, 0);
191  segment_type seg2 = segment_by_index(items, sz - 1);
192  if (seg1.begin == seg2.begin) { // no wraparound
193  assert(seg1.end - seg1.begin == sz);
194  search_in_segment_result_type res = search_in_segment_type::search_by(seg1, meas, prefix, p);
195  return result_type(res.position - seg1.begin + 1, res.prefix);
196  }
197  // wraparound
198  search_in_segment_result_type res = search_in_segment_type::search_by(seg1, meas, prefix, p);
199  size_type i = res.position - seg1.begin;
200  prefix = res.prefix;
201  if (res.position != seg1.end) // found in first of two segments
202  return result_type(i + 1, prefix);
203  seg2 = segment_type(seg2.begin, seg2.begin, seg2.end);
204  res = search_in_segment_type::search_by(seg2, meas, prefix, p);
205  size_type seg1_nb = seg1.end - seg1.begin;
206  size_type seg2_nb = res.position - seg2.begin;
207  i = seg1_nb + seg2_nb;
208  prefix = res.prefix;
209  return result_type(i + 1, prefix);
210 
211  }
212 
213  // optimization that is specific to a search for a position (i.e., one-based index)
214  // the client field of the prefix value returned by this function is the identity value
215  template <class Measure>
216  result_type operator()(const queue_type& items, const Measure& meas, measured_type prefix,
218  if (! Size_access::enable_index_optimization) {
219  // this optimization does not apply
220  auto q = [&] (measured_type m) { return p(m); };
221  return operator()(items, meas, prefix, q);
222  }
223  size_type target = p.get_position() + 1;
224  size_type sz_prefix = Size_access::csize(prefix);
225  size_type nb_items = size_type(items.size());
226  size_type sz_with_items = nb_items + sz_prefix;
227  assert(target <= sz_with_items + 1);
229  if (target == sz_with_items + 1) { // target pointing one past last item
230  res.position = sz_with_items + 1;
231  res.prefix = algebra_type::identity();
232  Size_access::size(res.prefix) = res.position;
233  } else { // target pointing to an item in the chunk
234  res.position = target - sz_prefix;
235  res.prefix = algebra_type::identity();
236  Size_access::size(res.prefix) = target - 1;
237  }
238  return res;
239  }
240 
241 };
242 
243 /*---------------------------------------------------------------------*/
244 /* Search over the items of a chunk */
245 
246 template <class Chunk,
247  class Algebra,
248  class Size_access=no_size_access,
249  template <class Fixedcapacity_queue, class Cache2, class Size_access2>
250  class Queue_search=search_in_fixed_capacity_queue>
252 public:
253 
254  using chunk_type = Chunk;
255  using queue_type = typename Chunk::queue_type;
256  using size_type = size_t;
258  using pointer = value_type*;
259 
260  using algebra_type = Algebra;
262 
263  using queue_search_type = Queue_search<queue_type, algebra_type, Size_access>;
264 
265  using result_type = typename queue_search_type::result_type;
266 
267 public:
268 
269  template <class Pred, class Measure>
270  result_type operator()(const chunk_type& chunk, const Measure& meas, measured_type prefix,
271  const Pred& p) const {
272  queue_search_type search;
273  return search(chunk.items, meas, prefix, p);
274  }
275 
276 };
277 
278 /***********************************************************************/
279 
280 } // end namespace
281 } // end namespace
282 } // end namespace
283 } // end namespace
284 
285 #endif
typename chunk_type::value_type value_type
Definition: itemsearch.hpp:257
pointer_type end
Definition: segment.hpp:44
Segment descriptor.
Definition: segment.hpp:34
result_type operator()(const queue_type &items, const Measure &meas, measured_type prefix, const Pred &p) const
Definition: itemsearch.hpp:173
bool operator()(Comparable x, Comparable y)
Definition: itemsearch.hpp:37
Fixed-capacity array along with cached measurement of array contents.
Definition: chunk.hpp:93
Memory segment.
compare_measured_by_position< Measured, Position, Measured_fields, less_than< Position >> less_than_by_position
Definition: itemsearch.hpp:73
search_result< const_pointer, measured_type > result_type
Definition: itemsearch.hpp:108
Definition: algebra.hpp:18
Result of a search for an item in a sequence.
Definition: itemsearch.hpp:85
typename queue_search_type::result_type result_type
Definition: itemsearch.hpp:265
segment< const Item * > make_const_segment(segment< Item * > seg)
Definition: segment.hpp:89
static result_type search_by(segment_type seg, const Measure &meas, measured_type prefix, const Pred &p)
Definition: itemsearch.hpp:111
result_t res
Definition: bench.cpp:51
result_type operator()(const chunk_type &chunk, const Measure &meas, measured_type prefix, const Pred &p) const
Definition: itemsearch.hpp:270
search_result(Position position, Measured prefix)
Definition: itemsearch.hpp:90
result_type operator()(const queue_type &items, const Measure &meas, measured_type prefix, const less_than_by_position< measured_type, size_type, Size_access > &p) const
Definition: itemsearch.hpp:216
pointer_type middle
Definition: segment.hpp:42
Queue_search< queue_type, algebra_type, Size_access > queue_search_type
Definition: itemsearch.hpp:263
bytes_8 Item
Definition: do_fifo.cpp:107
search_result< size_type, measured_type > result_type
Definition: itemsearch.hpp:158