mdds
block_util.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
3  *
4  * Copyright (c) 2021 Kohei Yoshida
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  *
27  ************************************************************************/
28 
29 #ifndef INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
30 #define INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
31 
32 #include "mdds/global.hpp"
33 #include "../types.hpp"
34 
35 namespace mdds { namespace mtv { namespace aos { namespace detail {
36 
37 template<typename Blks, lu_factor_t F>
39 {
40  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
41  {
42  static_assert(
43  mdds::detail::invalid_static_int<F>, "The loop-unrolling factor must be one of 0, 4, 8, 16, or 32.");
44  }
45 };
46 
47 template<typename Blks>
48 struct adjust_block_positions<Blks, lu_factor_t::none>
49 {
50  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
51  {
52  int64_t n = blocks.size();
53 
54  if (start_block_index >= n)
55  return;
56 
57 #if MDDS_USE_OPENMP
58 #pragma omp parallel for
59 #endif
60  for (int64_t i = start_block_index; i < n; ++i)
61  blocks[i].position += delta;
62  }
63 };
64 
65 template<typename Blks>
66 struct adjust_block_positions<Blks, lu_factor_t::lu4>
67 {
68  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
69  {
70  int64_t n = blocks.size();
71 
72  if (start_block_index >= n)
73  return;
74 
75  // Ensure that the section length is divisible by 4.
76  int64_t len = n - start_block_index;
77  int64_t rem = len & 3; // % 4
78  len -= rem;
79  len += start_block_index;
80 #if MDDS_USE_OPENMP
81 #pragma omp parallel for
82 #endif
83  for (int64_t i = start_block_index; i < len; i += 4)
84  {
85  blocks[i].position += delta;
86  blocks[i + 1].position += delta;
87  blocks[i + 2].position += delta;
88  blocks[i + 3].position += delta;
89  }
90 
91  rem += len;
92  for (int64_t i = len; i < rem; ++i)
93  blocks[i].position += delta;
94  }
95 };
96 
97 template<typename Blks>
98 struct adjust_block_positions<Blks, lu_factor_t::lu8>
99 {
100  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
101  {
102  int64_t n = blocks.size();
103 
104  if (start_block_index >= n)
105  return;
106 
107  // Ensure that the section length is divisible by 8.
108  int64_t len = n - start_block_index;
109  int64_t rem = len & 7; // % 8
110  len -= rem;
111  len += start_block_index;
112 #if MDDS_USE_OPENMP
113 #pragma omp parallel for
114 #endif
115  for (int64_t i = start_block_index; i < len; i += 8)
116  {
117  blocks[i].position += delta;
118  blocks[i + 1].position += delta;
119  blocks[i + 2].position += delta;
120  blocks[i + 3].position += delta;
121  blocks[i + 4].position += delta;
122  blocks[i + 5].position += delta;
123  blocks[i + 6].position += delta;
124  blocks[i + 7].position += delta;
125  }
126 
127  rem += len;
128  for (int64_t i = len; i < rem; ++i)
129  blocks[i].position += delta;
130  }
131 };
132 
133 template<typename Blks>
134 struct adjust_block_positions<Blks, lu_factor_t::lu16>
135 {
136  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
137  {
138  int64_t n = blocks.size();
139 
140  if (start_block_index >= n)
141  return;
142 
143  // Ensure that the section length is divisible by 16.
144  int64_t len = n - start_block_index;
145  int64_t rem = len & 15; // % 16
146  len -= rem;
147  len += start_block_index;
148 #if MDDS_USE_OPENMP
149 #pragma omp parallel for
150 #endif
151  for (int64_t i = start_block_index; i < len; i += 16)
152  {
153  blocks[i].position += delta;
154  blocks[i + 1].position += delta;
155  blocks[i + 2].position += delta;
156  blocks[i + 3].position += delta;
157  blocks[i + 4].position += delta;
158  blocks[i + 5].position += delta;
159  blocks[i + 6].position += delta;
160  blocks[i + 7].position += delta;
161  blocks[i + 8].position += delta;
162  blocks[i + 9].position += delta;
163  blocks[i + 10].position += delta;
164  blocks[i + 11].position += delta;
165  blocks[i + 12].position += delta;
166  blocks[i + 13].position += delta;
167  blocks[i + 14].position += delta;
168  blocks[i + 15].position += delta;
169  }
170 
171  rem += len;
172  for (int64_t i = len; i < rem; ++i)
173  blocks[i].position += delta;
174  }
175 };
176 
177 template<typename Blks>
178 struct adjust_block_positions<Blks, lu_factor_t::lu32>
179 {
180  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
181  {
182  int64_t n = blocks.size();
183 
184  if (start_block_index >= n)
185  return;
186 
187  // Ensure that the section length is divisible by 32.
188  int64_t len = n - start_block_index;
189  int64_t rem = len & 31; // % 32
190  len -= rem;
191  len += start_block_index;
192 #if MDDS_USE_OPENMP
193 #pragma omp parallel for
194 #endif
195  for (int64_t i = start_block_index; i < len; i += 32)
196  {
197  blocks[i].position += delta;
198  blocks[i + 1].position += delta;
199  blocks[i + 2].position += delta;
200  blocks[i + 3].position += delta;
201  blocks[i + 4].position += delta;
202  blocks[i + 5].position += delta;
203  blocks[i + 6].position += delta;
204  blocks[i + 7].position += delta;
205  blocks[i + 8].position += delta;
206  blocks[i + 9].position += delta;
207  blocks[i + 10].position += delta;
208  blocks[i + 11].position += delta;
209  blocks[i + 12].position += delta;
210  blocks[i + 13].position += delta;
211  blocks[i + 14].position += delta;
212  blocks[i + 15].position += delta;
213  blocks[i + 16].position += delta;
214  blocks[i + 17].position += delta;
215  blocks[i + 18].position += delta;
216  blocks[i + 19].position += delta;
217  blocks[i + 20].position += delta;
218  blocks[i + 21].position += delta;
219  blocks[i + 22].position += delta;
220  blocks[i + 23].position += delta;
221  blocks[i + 24].position += delta;
222  blocks[i + 25].position += delta;
223  blocks[i + 26].position += delta;
224  blocks[i + 27].position += delta;
225  blocks[i + 28].position += delta;
226  blocks[i + 29].position += delta;
227  blocks[i + 30].position += delta;
228  blocks[i + 31].position += delta;
229  }
230 
231  rem += len;
232  for (int64_t i = len; i < rem; ++i)
233  blocks[i].position += delta;
234  }
235 };
236 
237 }}}} // namespace mdds::mtv::aos::detail
238 
239 #endif
240 
241 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: flat_segment_tree.hpp:46