Clp  1.17.8
CoinAbcHelperFunctions.hpp
Go to the documentation of this file.
1 /* $Id$ */
2 // Copyright (C) 2003, International Business Machines
3 // Corporation and others, Copyright (C) 2012, FasterCoin. All Rights Reserved.
4 // This code is licensed under the terms of the Eclipse Public License (EPL).
5 
6 #ifndef CoinAbcHelperFunctions_H
7 #define CoinAbcHelperFunctions_H
8 
9 #include "ClpConfig.h"
10 #ifdef HAVE_CMATH
11 #include <cmath>
12 #else
13 #ifdef HAVE_MATH_H
14 #include <math.h>
15 #else
16 #include <cmath>
17 #endif
18 #endif
19 #include "CoinAbcCommon.hpp"
20 #ifndef abc_assert
21 #define abc_assert(condition) \
22  { \
23  if (!condition) { \
24  printf("abc_assert in %s at line %d - %s is false\n", \
25  __FILE__, __LINE__, __STRING(condition)); \
26  abort(); \
27  } \
28  }
29 #endif
30 // cilk_for granularity.
31 #define CILK_FOR_GRAINSIZE 128
32 //#define AVX2 2
33 #if AVX2 == 1
34 #include "emmintrin.h"
35 #elif AVX2 == 2
36 #include <immintrin.h>
37 #elif AVX2 == 3
38 #include "avx2intrin.h"
39 #endif
40 //#define __AVX__ 1
41 //#define __AVX2__ 1
48 #define UNROLL_SCATTER 2
49 #define INLINE_SCATTER 1
50 #if INLINE_SCATTER == 0
51 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
52  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
53  const int *COIN_RESTRICT thisIndex,
54  CoinFactorizationDouble *COIN_RESTRICT region);
55 #else
56 void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
57  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
58  const int *COIN_RESTRICT thisIndex,
59  CoinFactorizationDouble *COIN_RESTRICT region)
60 {
61 #if UNROLL_SCATTER == 0
62  for (CoinBigIndex j = number - 1; j >= 0; j--) {
63  CoinSimplexInt iRow = thisIndex[j];
64  CoinFactorizationDouble regionValue = region[iRow];
65  CoinFactorizationDouble value = thisElement[j];
66  assert(value);
67  region[iRow] = regionValue - value * pivotValue;
68  }
69 #elif UNROLL_SCATTER == 1
70  if ((number & 1) != 0) {
71  number--;
72  CoinSimplexInt iRow = thisIndex[number];
73  CoinFactorizationDouble regionValue = region[iRow];
74  CoinFactorizationDouble value = thisElement[number];
75  region[iRow] = regionValue - value * pivotValue;
76  }
77  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
78  CoinSimplexInt iRow0 = thisIndex[j];
79  CoinSimplexInt iRow1 = thisIndex[j - 1];
80  CoinFactorizationDouble regionValue0 = region[iRow0];
81  CoinFactorizationDouble regionValue1 = region[iRow1];
82  region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
83  region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
84  }
85 #elif UNROLL_SCATTER == 2
86  if ((number & 1) != 0) {
87  number--;
88  CoinSimplexInt iRow = thisIndex[number];
89  CoinFactorizationDouble regionValue = region[iRow];
90  CoinFactorizationDouble value = thisElement[number];
91  region[iRow] = regionValue - value * pivotValue;
92  }
93  if ((number & 2) != 0) {
94  CoinSimplexInt iRow0 = thisIndex[number - 1];
95  CoinFactorizationDouble regionValue0 = region[iRow0];
96  CoinFactorizationDouble value0 = thisElement[number - 1];
97  CoinSimplexInt iRow1 = thisIndex[number - 2];
98  CoinFactorizationDouble regionValue1 = region[iRow1];
99  CoinFactorizationDouble value1 = thisElement[number - 2];
100  region[iRow0] = regionValue0 - value0 * pivotValue;
101  region[iRow1] = regionValue1 - value1 * pivotValue;
102  number -= 2;
103  }
104 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
105  cilk_for(CoinBigIndex j = number - 1; j >= 0; j -= 4)
106  {
107  CoinSimplexInt iRow0 = thisIndex[j];
108  CoinSimplexInt iRow1 = thisIndex[j - 1];
109  CoinFactorizationDouble regionValue0 = region[iRow0];
110  CoinFactorizationDouble regionValue1 = region[iRow1];
111  region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
112  region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
113  CoinSimplexInt iRow2 = thisIndex[j - 2];
114  CoinSimplexInt iRow3 = thisIndex[j - 3];
115  CoinFactorizationDouble regionValue2 = region[iRow2];
116  CoinFactorizationDouble regionValue3 = region[iRow3];
117  region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
118  region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
119  }
120 #elif UNROLL_SCATTER == 3
121  CoinSimplexInt iRow0;
122  CoinSimplexInt iRow1;
123  CoinFactorizationDouble regionValue0;
124  CoinFactorizationDouble regionValue1;
125  switch (static_cast< unsigned int >(number)) {
126  case 0:
127  break;
128  case 1:
129  iRow0 = thisIndex[0];
130  regionValue0 = region[iRow0];
131  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
132  break;
133  case 2:
134  iRow0 = thisIndex[0];
135  iRow1 = thisIndex[1];
136  regionValue0 = region[iRow0];
137  regionValue1 = region[iRow1];
138  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
139  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
140  break;
141  case 3:
142  iRow0 = thisIndex[0];
143  iRow1 = thisIndex[1];
144  regionValue0 = region[iRow0];
145  regionValue1 = region[iRow1];
146  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
147  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
148  iRow0 = thisIndex[2];
149  regionValue0 = region[iRow0];
150  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
151  break;
152  case 4:
153  iRow0 = thisIndex[0];
154  iRow1 = thisIndex[1];
155  regionValue0 = region[iRow0];
156  regionValue1 = region[iRow1];
157  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
158  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
159  iRow0 = thisIndex[2];
160  iRow1 = thisIndex[3];
161  regionValue0 = region[iRow0];
162  regionValue1 = region[iRow1];
163  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
164  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
165  break;
166  case 5:
167  iRow0 = thisIndex[0];
168  iRow1 = thisIndex[1];
169  regionValue0 = region[iRow0];
170  regionValue1 = region[iRow1];
171  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
172  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
173  iRow0 = thisIndex[2];
174  iRow1 = thisIndex[3];
175  regionValue0 = region[iRow0];
176  regionValue1 = region[iRow1];
177  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
178  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
179  iRow0 = thisIndex[4];
180  regionValue0 = region[iRow0];
181  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
182  break;
183  case 6:
184  iRow0 = thisIndex[0];
185  iRow1 = thisIndex[1];
186  regionValue0 = region[iRow0];
187  regionValue1 = region[iRow1];
188  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
189  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
190  iRow0 = thisIndex[2];
191  iRow1 = thisIndex[3];
192  regionValue0 = region[iRow0];
193  regionValue1 = region[iRow1];
194  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
195  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
196  iRow0 = thisIndex[4];
197  iRow1 = thisIndex[5];
198  regionValue0 = region[iRow0];
199  regionValue1 = region[iRow1];
200  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
201  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
202  break;
203  case 7:
204  iRow0 = thisIndex[0];
205  iRow1 = thisIndex[1];
206  regionValue0 = region[iRow0];
207  regionValue1 = region[iRow1];
208  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
209  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
210  iRow0 = thisIndex[2];
211  iRow1 = thisIndex[3];
212  regionValue0 = region[iRow0];
213  regionValue1 = region[iRow1];
214  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
215  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
216  iRow0 = thisIndex[4];
217  iRow1 = thisIndex[5];
218  regionValue0 = region[iRow0];
219  regionValue1 = region[iRow1];
220  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
221  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
222  iRow0 = thisIndex[6];
223  regionValue0 = region[iRow0];
224  region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
225  break;
226  case 8:
227  iRow0 = thisIndex[0];
228  iRow1 = thisIndex[1];
229  regionValue0 = region[iRow0];
230  regionValue1 = region[iRow1];
231  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
232  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
233  iRow0 = thisIndex[2];
234  iRow1 = thisIndex[3];
235  regionValue0 = region[iRow0];
236  regionValue1 = region[iRow1];
237  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
238  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
239  iRow0 = thisIndex[4];
240  iRow1 = thisIndex[5];
241  regionValue0 = region[iRow0];
242  regionValue1 = region[iRow1];
243  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
244  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
245  iRow0 = thisIndex[6];
246  iRow1 = thisIndex[7];
247  regionValue0 = region[iRow0];
248  regionValue1 = region[iRow1];
249  region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
250  region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
251  break;
252  default:
253  if ((number & 1) != 0) {
254  number--;
255  CoinSimplexInt iRow = thisIndex[number];
256  CoinFactorizationDouble regionValue = region[iRow];
257  CoinFactorizationDouble value = thisElement[number];
258  region[iRow] = regionValue - value * pivotValue;
259  }
260  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
261  CoinSimplexInt iRow0 = thisIndex[j];
262  CoinSimplexInt iRow1 = thisIndex[j - 1];
263  CoinFactorizationDouble regionValue0 = region[iRow0];
264  CoinFactorizationDouble regionValue1 = region[iRow1];
265  region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
266  region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
267  }
268  break;
269  }
270 #endif
271 }
272 void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
273  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
274  CoinFactorizationDouble *COIN_RESTRICT region)
275 {
276 #if UNROLL_SCATTER == 0
277  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
278  for (CoinBigIndex j = number - 1; j >= 0; j--) {
279  CoinSimplexInt iRow = thisIndex[j];
280  CoinFactorizationDouble regionValue = region[iRow];
281  CoinFactorizationDouble value = thisElement[j];
282  assert(value);
283  region[iRow] = regionValue - value * pivotValue;
284  }
285 #elif UNROLL_SCATTER == 1
286  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
287  if ((number & 1) != 0) {
288  number--;
289  CoinSimplexInt iRow = thisIndex[number];
290  CoinFactorizationDouble regionValue = region[iRow];
291  CoinFactorizationDouble value = thisElement[number];
292  region[iRow] = regionValue - value * pivotValue;
293  }
294  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
295  CoinSimplexInt iRow0 = thisIndex[j];
296  CoinSimplexInt iRow1 = thisIndex[j - 1];
297  CoinFactorizationDouble regionValue0 = region[iRow0];
298  CoinFactorizationDouble regionValue1 = region[iRow1];
299  region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
300  region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
301  }
302 #elif UNROLL_SCATTER == 2
303  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
304  if ((number & 1) != 0) {
305  number--;
306  CoinSimplexInt iRow = thisIndex[number];
307  CoinFactorizationDouble regionValue = region[iRow];
308  CoinFactorizationDouble value = thisElement[number];
309  region[iRow] = regionValue - value * pivotValue;
310  }
311  if ((number & 2) != 0) {
312  CoinSimplexInt iRow0 = thisIndex[number - 1];
313  CoinFactorizationDouble regionValue0 = region[iRow0];
314  CoinFactorizationDouble value0 = thisElement[number - 1];
315  CoinSimplexInt iRow1 = thisIndex[number - 2];
316  CoinFactorizationDouble regionValue1 = region[iRow1];
317  CoinFactorizationDouble value1 = thisElement[number - 2];
318  region[iRow0] = regionValue0 - value0 * pivotValue;
319  region[iRow1] = regionValue1 - value1 * pivotValue;
320  number -= 2;
321  }
322 #if AVX2 == 22
323  CoinFactorizationDouble temp[4] __attribute__((aligned(32)));
324  __m256d pv = _mm256_broadcast_sd(&pivotValue);
325  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
326  __m256d elements = _mm256_loadu_pd(thisElement + j - 3);
327  CoinSimplexInt iRow0 = thisIndex[j - 3];
328  CoinSimplexInt iRow1 = thisIndex[j - 2];
329  CoinSimplexInt iRow2 = thisIndex[j - 1];
330  CoinSimplexInt iRow3 = thisIndex[j - 0];
331  temp[0] = region[iRow0];
332  temp[1] = region[iRow1];
333  temp[2] = region[iRow2];
334  temp[3] = region[iRow3];
335  __m256d t0 = _mm256_load_pd(temp);
336  t0 -= pv * elements;
337  _mm256_store_pd(temp, t0);
338  region[iRow0] = temp[0];
339  region[iRow1] = temp[1];
340  region[iRow2] = temp[2];
341  region[iRow3] = temp[3];
342  }
343 #else
344 #pragma cilk grainsize = CILK_FOR_GRAINSIZE
345  cilk_for(CoinBigIndex j = number - 1; j >= 0; j -= 4)
346  {
347  CoinSimplexInt iRow0 = thisIndex[j];
348  CoinSimplexInt iRow1 = thisIndex[j - 1];
349  CoinFactorizationDouble regionValue0 = region[iRow0];
350  CoinFactorizationDouble regionValue1 = region[iRow1];
351  region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
352  region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
353  CoinSimplexInt iRow2 = thisIndex[j - 2];
354  CoinSimplexInt iRow3 = thisIndex[j - 3];
355  CoinFactorizationDouble regionValue2 = region[iRow2];
356  CoinFactorizationDouble regionValue3 = region[iRow3];
357  region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
358  region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
359  }
360 #endif
361 #elif UNROLL_SCATTER == 3
362  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
363  CoinSimplexInt iRow0;
364  CoinSimplexInt iRow1;
365  CoinFactorizationDouble regionValue0;
366  CoinFactorizationDouble regionValue1;
367  switch (static_cast< unsigned int >(number)) {
368  case 0:
369  break;
370  case 1:
371  iRow0 = thisIndex[0];
372  regionValue0 = region[iRow0];
373  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
374  break;
375  case 2:
376  iRow0 = thisIndex[0];
377  iRow1 = thisIndex[1];
378  regionValue0 = region[iRow0];
379  regionValue1 = region[iRow1];
380  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
381  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
382  break;
383  case 3:
384  iRow0 = thisIndex[0];
385  iRow1 = thisIndex[1];
386  regionValue0 = region[iRow0];
387  regionValue1 = region[iRow1];
388  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
389  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
390  iRow0 = thisIndex[2];
391  regionValue0 = region[iRow0];
392  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
393  break;
394  case 4:
395  iRow0 = thisIndex[0];
396  iRow1 = thisIndex[1];
397  regionValue0 = region[iRow0];
398  regionValue1 = region[iRow1];
399  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
400  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
401  iRow0 = thisIndex[2];
402  iRow1 = thisIndex[3];
403  regionValue0 = region[iRow0];
404  regionValue1 = region[iRow1];
405  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
406  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
407  break;
408  case 5:
409  iRow0 = thisIndex[0];
410  iRow1 = thisIndex[1];
411  regionValue0 = region[iRow0];
412  regionValue1 = region[iRow1];
413  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
414  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
415  iRow0 = thisIndex[2];
416  iRow1 = thisIndex[3];
417  regionValue0 = region[iRow0];
418  regionValue1 = region[iRow1];
419  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
420  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
421  iRow0 = thisIndex[4];
422  regionValue0 = region[iRow0];
423  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
424  break;
425  case 6:
426  iRow0 = thisIndex[0];
427  iRow1 = thisIndex[1];
428  regionValue0 = region[iRow0];
429  regionValue1 = region[iRow1];
430  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
431  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
432  iRow0 = thisIndex[2];
433  iRow1 = thisIndex[3];
434  regionValue0 = region[iRow0];
435  regionValue1 = region[iRow1];
436  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
437  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
438  iRow0 = thisIndex[4];
439  iRow1 = thisIndex[5];
440  regionValue0 = region[iRow0];
441  regionValue1 = region[iRow1];
442  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
443  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
444  break;
445  case 7:
446  iRow0 = thisIndex[0];
447  iRow1 = thisIndex[1];
448  regionValue0 = region[iRow0];
449  regionValue1 = region[iRow1];
450  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
451  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
452  iRow0 = thisIndex[2];
453  iRow1 = thisIndex[3];
454  regionValue0 = region[iRow0];
455  regionValue1 = region[iRow1];
456  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
457  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
458  iRow0 = thisIndex[4];
459  iRow1 = thisIndex[5];
460  regionValue0 = region[iRow0];
461  regionValue1 = region[iRow1];
462  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
463  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
464  iRow0 = thisIndex[6];
465  regionValue0 = region[iRow0];
466  region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
467  break;
468  case 8:
469  iRow0 = thisIndex[0];
470  iRow1 = thisIndex[1];
471  regionValue0 = region[iRow0];
472  regionValue1 = region[iRow1];
473  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
474  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
475  iRow0 = thisIndex[2];
476  iRow1 = thisIndex[3];
477  regionValue0 = region[iRow0];
478  regionValue1 = region[iRow1];
479  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
480  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
481  iRow0 = thisIndex[4];
482  iRow1 = thisIndex[5];
483  regionValue0 = region[iRow0];
484  regionValue1 = region[iRow1];
485  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
486  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
487  iRow0 = thisIndex[6];
488  iRow1 = thisIndex[7];
489  regionValue0 = region[iRow0];
490  regionValue1 = region[iRow1];
491  region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
492  region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
493  break;
494  default:
495  if ((number & 1) != 0) {
496  number--;
497  CoinSimplexInt iRow = thisIndex[number];
498  CoinFactorizationDouble regionValue = region[iRow];
499  CoinFactorizationDouble value = thisElement[number];
500  region[iRow] = regionValue - value * pivotValue;
501  }
502  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
503  CoinSimplexInt iRow0 = thisIndex[j];
504  CoinSimplexInt iRow1 = thisIndex[j - 1];
505  CoinFactorizationDouble regionValue0 = region[iRow0];
506  CoinFactorizationDouble regionValue1 = region[iRow1];
507  region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
508  region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
509  }
510  break;
511  }
512 #endif
513 }
514 #endif
515 //#define COIN_PREFETCH
516 #ifdef COIN_PREFETCH
517 #if 1
518 #define coin_prefetch(mem) \
519  __asm__ __volatile__("prefetchnta %0" \
520  : \
521  : "m"(*(reinterpret_cast< char * >(mem))))
522 #define coin_prefetch_const(mem) \
523  __asm__ __volatile__("prefetchnta %0" \
524  : \
525  : "m"(*(reinterpret_cast< const char * >(mem))))
526 #else
527 #define coin_prefetch(mem) \
528  __asm__ __volatile__("prefetch %0" \
529  : \
530  : "m"(*(reinterpret_cast< char * >(mem))))
531 #define coin_prefetch_const(mem) \
532  __asm__ __volatile__("prefetch %0" \
533  : \
534  : "m"(*(reinterpret_cast< const char * >(mem))))
535 #endif
536 #else
537 // dummy
538 #define coin_prefetch(mem)
539 #define coin_prefetch_const(mem)
540 #endif
541 #define NEW_CHUNK_SIZE 4
542 #define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE + NEW_CHUNK_SIZE / 2);
543 #define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE / 2)
544 // leaf, pure, nothrow and hot give warnings
545 // fastcall and sseregparm give wrong results
546 //#define SCATTER_ATTRIBUTE __attribute__ ((leaf,fastcall,pure,sseregparm,nothrow,hot))
547 #define SCATTER_ATTRIBUTE
548 typedef void (*scatterUpdate)(int, CoinFactorizationDouble, const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE;
549 typedef struct {
551  CoinBigIndex offset;
552  int number;
553 } scatterStruct;
554 void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble multiplier,
555  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
556  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
557 void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
558  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
559  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
560 void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
561  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
562  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
563 void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
564  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
565  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
566 void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
567  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
568  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
569 void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
570  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
571  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
572 void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
573  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
574  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
575 void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
576  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
577  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
578 void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
579  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
580  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
581 void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
582  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
583  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
584 void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
585  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
586  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
587 void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
588  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
589  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
590 void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
591  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
592  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
593 void CoinAbcScatterUpdate1Subtract(int numberIn, CoinFactorizationDouble multiplier,
594  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
595  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
596 void CoinAbcScatterUpdate2Subtract(int numberIn, CoinFactorizationDouble multiplier,
597  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
598  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
599 void CoinAbcScatterUpdate3Subtract(int numberIn, CoinFactorizationDouble multiplier,
600  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
601  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
602 void CoinAbcScatterUpdate4Subtract(int numberIn, CoinFactorizationDouble multiplier,
603  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
604  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
605 void CoinAbcScatterUpdate5Subtract(int numberIn, CoinFactorizationDouble multiplier,
606  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
607  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
608 void CoinAbcScatterUpdate6Subtract(int numberIn, CoinFactorizationDouble multiplier,
609  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
610  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
611 void CoinAbcScatterUpdate7Subtract(int numberIn, CoinFactorizationDouble multiplier,
612  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
613  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
614 void CoinAbcScatterUpdate8Subtract(int numberIn, CoinFactorizationDouble multiplier,
615  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
616  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
617 void CoinAbcScatterUpdate4NSubtract(int numberIn, CoinFactorizationDouble multiplier,
618  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
619  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
620 void CoinAbcScatterUpdate4NPlus1Subtract(int numberIn, CoinFactorizationDouble multiplier,
621  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
622  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
623 void CoinAbcScatterUpdate4NPlus2Subtract(int numberIn, CoinFactorizationDouble multiplier,
624  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
625  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
626 void CoinAbcScatterUpdate4NPlus3Subtract(int numberIn, CoinFactorizationDouble multiplier,
627  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
628  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
629 void CoinAbcScatterUpdate1Add(int numberIn, CoinFactorizationDouble multiplier,
630  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
631  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
632 void CoinAbcScatterUpdate2Add(int numberIn, CoinFactorizationDouble multiplier,
633  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
634  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
635 void CoinAbcScatterUpdate3Add(int numberIn, CoinFactorizationDouble multiplier,
636  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
637  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
638 void CoinAbcScatterUpdate4Add(int numberIn, CoinFactorizationDouble multiplier,
639  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
640  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
641 void CoinAbcScatterUpdate5Add(int numberIn, CoinFactorizationDouble multiplier,
642  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
643  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
644 void CoinAbcScatterUpdate6Add(int numberIn, CoinFactorizationDouble multiplier,
645  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
646  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
647 void CoinAbcScatterUpdate7Add(int numberIn, CoinFactorizationDouble multiplier,
648  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
649  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
650 void CoinAbcScatterUpdate8Add(int numberIn, CoinFactorizationDouble multiplier,
651  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
652  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
653 void CoinAbcScatterUpdate4NAdd(int numberIn, CoinFactorizationDouble multiplier,
654  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
655  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
656 void CoinAbcScatterUpdate4NPlus1Add(int numberIn, CoinFactorizationDouble multiplier,
657  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
658  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
659 void CoinAbcScatterUpdate4NPlus2Add(int numberIn, CoinFactorizationDouble multiplier,
660  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
661  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
662 void CoinAbcScatterUpdate4NPlus3Add(int numberIn, CoinFactorizationDouble multiplier,
663  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
664  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
665 #if INLINE_SCATTER == 0
666 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
667  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
668  const int *COIN_RESTRICT thisIndex,
669  CoinFactorizationDouble *COIN_RESTRICT region,
670  double *COIN_RESTRICT work);
671 #else
672 #if 0
673 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
674  const CoinFactorizationDouble * COIN_RESTRICT thisElement,
675  const int * COIN_RESTRICT thisIndex,
676  CoinFactorizationDouble * COIN_RESTRICT region,
677  double * COIN_RESTRICT /*work*/)
678 {
679 #if UNROLL_SCATTER == 0
680  for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
681  CoinSimplexInt iRow = thisIndex[j];
682  CoinFactorizationDouble regionValue = region[iRow];
683  CoinFactorizationDouble value = thisElement[j];
684  assert (value);
685  region[iRow] = regionValue - value * pivotValue;
686  }
687 #elif UNROLL_SCATTER == 1
688  if ((number&1)!=0) {
689  CoinSimplexInt iRow = thisIndex[0];
690  thisIndex++;
691  CoinFactorizationDouble regionValue = region[iRow];
692  CoinFactorizationDouble value = thisElement[0];
693  thisElement++;
694  region[iRow] = regionValue - value * pivotValue;
695  }
696  number = number>>1;
697  CoinFactorizationDouble work2[4];
698  for ( ; number !=0; number-- ) {
699  CoinSimplexInt iRow0 = thisIndex[0];
700  CoinSimplexInt iRow1 = thisIndex[1];
701  work2[0] = region[iRow0];
702  work2[1] = region[iRow1];
703 #if 0
704  work2[2] = region[iRow0];
705  work2[3] = region[iRow1];
706  //__v4df b = __builtin_ia32_maskloadpd256(work2);
707  __v4df b = __builtin_ia32_loadupd256(work2);
708  //__v4df b = _mm256_load_pd(work2);
709 #endif
710  work2[0] -= thisElement[0] * pivotValue;
711  work2[1] -= thisElement[1] * pivotValue;
712  region[iRow0] = work2[0];
713  region[iRow1] = work2[1];
714  thisIndex+=2;
715  thisElement+=2;
716  }
717 #endif
718 }
719 #endif
720 #endif
721 #define UNROLL_GATHER 0
722 #define INLINE_GATHER 1
723 #if INLINE_GATHER == 0
724 CoinFactorizationDouble CoinAbcGatherUpdate(CoinSimplexInt number,
725  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
726  const int *COIN_RESTRICT thisIndex,
727  CoinFactorizationDouble *COIN_RESTRICT region);
728 #else
729 CoinFactorizationDouble ABC_INLINE inline CoinAbcGatherUpdate(CoinSimplexInt number,
730  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
731  const int *COIN_RESTRICT thisIndex,
732  CoinFactorizationDouble *COIN_RESTRICT region)
733 {
734 #if UNROLL_GATHER == 0
735  CoinFactorizationDouble pivotValue = 0.0;
736  for (CoinBigIndex j = 0; j < number; j++) {
737  CoinFactorizationDouble value = thisElement[j];
738  CoinSimplexInt jRow = thisIndex[j];
739  value *= region[jRow];
740  pivotValue -= value;
741  }
742  return pivotValue;
743 #else
744 #error code
745 #endif
746 }
747 #endif
748 #define UNROLL_MULTIPLY_INDEXED 0
749 #define INLINE_MULTIPLY_INDEXED 0
750 #if INLINE_MULTIPLY_INDEXED == 0
751 void CoinAbcMultiplyIndexed(int number,
752  const double *COIN_RESTRICT multiplier,
753  const int *COIN_RESTRICT thisIndex,
754  CoinFactorizationDouble *COIN_RESTRICT region);
755 void CoinAbcMultiplyIndexed(int number,
756  const long double *COIN_RESTRICT multiplier,
757  const int *COIN_RESTRICT thisIndex,
758  long double *COIN_RESTRICT region);
759 #else
760 void ABC_INLINE inline CoinAbcMultiplyIndexed(int number,
761  const double *COIN_RESTRICT multiplier,
762  const int *COIN_RESTRICT thisIndex,
763  CoinFactorizationDouble *COIN_RESTRICT region)
764 {
765 }
766 #endif
767 double CoinAbcMaximumAbsElement(const double *region, int size);
768 void CoinAbcMinMaxAbsElement(const double *region, int size, double &minimum, double &maximum);
769 void CoinAbcMinMaxAbsNormalValues(const double *region, int size, double &minimum, double &maximum);
770 void CoinAbcScale(double *region, double multiplier, int size);
771 void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int size);
773 double CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int size);
774 void CoinAbcSetElements(double *region, int size, double value);
775 void CoinAbcMultiplyAdd(const double *region1, int size, double multiplier1,
776  double *regionChanged, double multiplier2);
777 double CoinAbcInnerProduct(const double *region1, int size, const double *region2);
778 void CoinAbcGetNorms(const double *region, int size, double &norm1, double &norm2);
780 void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int number);
782 void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int number);
784 void CoinAbcScatterZeroTo(double *regionTo, const int *index, int number);
786 void CoinAbcScatterToList(const double *regionFrom, double *regionTo,
787  const int *indexList, const int *indexScatter, int number);
789 void CoinAbcInverseSqrts(double *array, int n);
790 void CoinAbcReciprocal(double *array, int n, const double *input);
791 void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size);
792 void CoinAbcMemcpyLong(int *array, const int *arrayFrom, int size);
793 void CoinAbcMemcpyLong(unsigned char *array, const unsigned char *arrayFrom, int size);
794 void CoinAbcMemset0Long(double *array, int size);
795 void CoinAbcMemset0Long(int *array, int size);
796 void CoinAbcMemset0Long(unsigned char *array, int size);
797 void CoinAbcMemmove(double *array, const double *arrayFrom, int size);
798 void CoinAbcMemmove(int *array, const int *arrayFrom, int size);
799 void CoinAbcMemmove(unsigned char *array, const unsigned char *arrayFrom, int size);
801 void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size);
803 int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths);
805 int CoinAbcCompact(int numberSections, int alreadyDone, int *array, const int *starts, const int *lengths);
806 #endif
807 #if ABC_CREATE_SCATTER_FUNCTION
808 SCATTER_ATTRIBUTE void functionName(ScatterUpdate1)(int numberIn, CoinFactorizationDouble multiplier,
809  const CoinFactorizationDouble *COIN_RESTRICT element,
810  CoinFactorizationDouble *COIN_RESTRICT region)
811 {
812 #ifndef NDEBUG
813  assert(numberIn == 1);
814 #endif
815  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 1);
816  int iColumn0 = thisColumn[0];
817  double value0 = region[iColumn0];
818  value0 OPERATION multiplier *element[0];
819  region[iColumn0] = value0;
820 }
821 SCATTER_ATTRIBUTE void functionName(ScatterUpdate2)(int numberIn, CoinFactorizationDouble multiplier,
822  const CoinFactorizationDouble *COIN_RESTRICT element,
823  CoinFactorizationDouble *COIN_RESTRICT region)
824 {
825 #ifndef NDEBUG
826  assert(numberIn == 2);
827 #endif
828  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 2);
829 #if NEW_CHUNK_SIZE == 2
830  int nFull = 2 & (~(NEW_CHUNK_SIZE - 1));
831  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
833  int iColumn0 = thisColumn[0];
834  int iColumn1 = thisColumn[1];
835  CoinFactorizationDouble value0 = region[iColumn0];
836  CoinFactorizationDouble value1 = region[iColumn1];
837  value0 OPERATION multiplier *element[0 + NEW_CHUNK_SIZE_OFFSET];
838  value1 OPERATION multiplier *element[1 + NEW_CHUNK_SIZE_OFFSET];
839  region[iColumn0] = value0;
840  region[iColumn1] = value1;
841  element += NEW_CHUNK_SIZE_INCREMENT;
842  thisColumn = reinterpret_cast< const int * >(element);
843  }
844 #endif
845 #if NEW_CHUNK_SIZE == 4
846  int iColumn0 = thisColumn[0];
847  int iColumn1 = thisColumn[1];
848  CoinFactorizationDouble value0 = region[iColumn0];
849  CoinFactorizationDouble value1 = region[iColumn1];
850  value0 OPERATION multiplier *element[0];
851  value1 OPERATION multiplier *element[1];
852  region[iColumn0] = value0;
853  region[iColumn1] = value1;
854 #endif
855 }
856 SCATTER_ATTRIBUTE void functionName(ScatterUpdate3)(int numberIn, CoinFactorizationDouble multiplier,
857  const CoinFactorizationDouble *COIN_RESTRICT element,
858  CoinFactorizationDouble *COIN_RESTRICT region)
859 {
860 #ifndef NDEBUG
861  assert(numberIn == 3);
862 #endif
863  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 3);
864 #if AVX2 == 1
865  double temp[2];
866 #endif
867 #if NEW_CHUNK_SIZE == 2
868  int nFull = 3 & (~(NEW_CHUNK_SIZE - 1));
869  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
870  //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
871  int iColumn0 = thisColumn[0];
872  int iColumn1 = thisColumn[1];
873  CoinFactorizationDouble value0 = region[iColumn0];
874  CoinFactorizationDouble value1 = region[iColumn1];
875  value0 OPERATION multiplier *element[0];
876  value1 OPERATION multiplier *element[1];
877  region[iColumn0] = value0;
878  region[iColumn1] = value1;
879  element += NEW_CHUNK_SIZE;
880  thisColumn + = NEW_CHUNK_SIZE;
881  }
882 #endif
883 #if NEW_CHUNK_SIZE == 2
884  int iColumn0 = thisColumn[0];
885  double value0 = region[iColumn0];
886  value0 OPERATION multiplier *element[0];
887  region[iColumn0] = value0;
888 #else
889  int iColumn0 = thisColumn[0];
890  int iColumn1 = thisColumn[1];
891  int iColumn2 = thisColumn[2];
892 #if AVX2 == 1
893  __v2df bb;
894  double value2 = region[iColumn2];
895  value2 OPERATION multiplier *element[2];
896  set_const_v2df(bb, multiplier);
897  temp[0] = region[iColumn0];
898  temp[1] = region[iColumn1];
899  region[iColumn2] = value2;
900  __v2df v0 = __builtin_ia32_loadupd(temp);
901  __v2df a = __builtin_ia32_loadupd(element);
902  a *= bb;
903  v0 OPERATION a;
904  __builtin_ia32_storeupd(temp, v0);
905  region[iColumn0] = temp[0];
906  region[iColumn1] = temp[1];
907 #else
908  double value0 = region[iColumn0];
909  double value1 = region[iColumn1];
910  double value2 = region[iColumn2];
911  value0 OPERATION multiplier *element[0];
912  value1 OPERATION multiplier *element[1];
913  value2 OPERATION multiplier *element[2];
914  region[iColumn0] = value0;
915  region[iColumn1] = value1;
916  region[iColumn2] = value2;
917 #endif
918 #endif
919 }
920 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4)(int numberIn, CoinFactorizationDouble multiplier,
921  const CoinFactorizationDouble *COIN_RESTRICT element,
922  CoinFactorizationDouble *COIN_RESTRICT region)
923 {
924 #ifndef NDEBUG
925  assert(numberIn == 4);
926 #endif
927  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 4);
928  int nFull = 4 & (~(NEW_CHUNK_SIZE - 1));
929 #if AVX2 == 1
930  double temp[4];
931 #endif
932  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
933  //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
934 #if NEW_CHUNK_SIZE == 2
935  int iColumn0 = thisColumn[0];
936  int iColumn1 = thisColumn[1];
937  double value0 = region[iColumn0];
938  double value1 = region[iColumn1];
939  value0 OPERATION multiplier *element[0];
940  value1 OPERATION multiplier *element[1];
941  region[iColumn0] = value0;
942  region[iColumn1] = value1;
943 #elif NEW_CHUNK_SIZE == 4
944  int iColumn0 = thisColumn[0];
945  int iColumn1 = thisColumn[1];
946  int iColumn2 = thisColumn[2];
947  int iColumn3 = thisColumn[3];
948 #if AVX2 == 1
949  __v2df bb;
950  set_const_v2df(bb, multiplier);
951  temp[0] = region[iColumn0];
952  temp[1] = region[iColumn1];
953  temp[2] = region[iColumn2];
954  temp[3] = region[iColumn3];
955  __v2df v0 = __builtin_ia32_loadupd(temp);
956  __v2df v1 = __builtin_ia32_loadupd(temp + 2);
957  __v2df a = __builtin_ia32_loadupd(element);
958  a *= bb;
959  v0 OPERATION a;
960  a = __builtin_ia32_loadupd(element + 2);
961  a *= bb;
962  v1 OPERATION a;
963  __builtin_ia32_storeupd(temp, v0);
964  __builtin_ia32_storeupd(temp + 2, v1);
965  region[iColumn0] = temp[0];
966  region[iColumn1] = temp[1];
967  region[iColumn2] = temp[2];
968  region[iColumn3] = temp[3];
969 #else
970  double value0 = region[iColumn0];
971  double value1 = region[iColumn1];
972  double value2 = region[iColumn2];
973  double value3 = region[iColumn3];
974  value0 OPERATION multiplier *element[0];
975  value1 OPERATION multiplier *element[1];
976  value2 OPERATION multiplier *element[2];
977  value3 OPERATION multiplier *element[3];
978  region[iColumn0] = value0;
979  region[iColumn1] = value1;
980  region[iColumn2] = value2;
981  region[iColumn3] = value3;
982 #endif
983 #else
984  abort();
985 #endif
986  element += NEW_CHUNK_SIZE;
987  thisColumn += NEW_CHUNK_SIZE;
988  }
989 }
990 SCATTER_ATTRIBUTE void functionName(ScatterUpdate5)(int numberIn, CoinFactorizationDouble multiplier,
991  const CoinFactorizationDouble *COIN_RESTRICT element,
992  CoinFactorizationDouble *COIN_RESTRICT region)
993 {
994 #ifndef NDEBUG
995  assert(numberIn == 5);
996 #endif
997  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 5);
998  int nFull = 5 & (~(NEW_CHUNK_SIZE - 1));
999 #if AVX2 == 1
1000  double temp[4];
1001 #endif
1002  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1003  //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
1004 #if NEW_CHUNK_SIZE == 2
1005  int iColumn0 = thisColumn[0];
1006  int iColumn1 = thisColumn[1];
1007  double value0 = region[iColumn0];
1008  double value1 = region[iColumn1];
1009  value0 OPERATION multiplier *element[0];
1010  value1 OPERATION multiplier *element[1];
1011  region[iColumn0] = value0;
1012  region[iColumn1] = value1;
1013 #elif NEW_CHUNK_SIZE == 4
1014  int iColumn0 = thisColumn[0];
1015  int iColumn1 = thisColumn[1];
1016  int iColumn2 = thisColumn[2];
1017  int iColumn3 = thisColumn[3];
1018 #if AVX2 == 1
1019  __v2df bb;
1020  set_const_v2df(bb, multiplier);
1021  temp[0] = region[iColumn0];
1022  temp[1] = region[iColumn1];
1023  temp[2] = region[iColumn2];
1024  temp[3] = region[iColumn3];
1025  __v2df v0 = __builtin_ia32_loadupd(temp);
1026  __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1027  __v2df a = __builtin_ia32_loadupd(element);
1028  a *= bb;
1029  v0 OPERATION a;
1030  a = __builtin_ia32_loadupd(element + 2);
1031  a *= bb;
1032  v1 OPERATION a;
1033  __builtin_ia32_storeupd(temp, v0);
1034  __builtin_ia32_storeupd(temp + 2, v1);
1035  region[iColumn0] = temp[0];
1036  region[iColumn1] = temp[1];
1037  region[iColumn2] = temp[2];
1038  region[iColumn3] = temp[3];
1039 #else
1040  double value0 = region[iColumn0];
1041  double value1 = region[iColumn1];
1042  double value2 = region[iColumn2];
1043  double value3 = region[iColumn3];
1044  value0 OPERATION multiplier *element[0];
1045  value1 OPERATION multiplier *element[1];
1046  value2 OPERATION multiplier *element[2];
1047  value3 OPERATION multiplier *element[3];
1048  region[iColumn0] = value0;
1049  region[iColumn1] = value1;
1050  region[iColumn2] = value2;
1051  region[iColumn3] = value3;
1052 #endif
1053 #else
1054  abort();
1055 #endif
1056  element += NEW_CHUNK_SIZE;
1057  thisColumn += NEW_CHUNK_SIZE;
1058  }
1059  int iColumn0 = thisColumn[0];
1060  double value0 = region[iColumn0];
1061  value0 OPERATION multiplier *element[0];
1062  region[iColumn0] = value0;
1063 }
1064 SCATTER_ATTRIBUTE void functionName(ScatterUpdate6)(int numberIn, CoinFactorizationDouble multiplier,
1065  const CoinFactorizationDouble *COIN_RESTRICT element,
1066  CoinFactorizationDouble *COIN_RESTRICT region)
1067 {
1068 #ifndef NDEBUG
1069  assert(numberIn == 6);
1070 #endif
1071  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 6);
1072  int nFull = 6 & (~(NEW_CHUNK_SIZE - 1));
1073 #if AVX2 == 1
1074  double temp[4];
1075 #endif
1076  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1077  coin_prefetch_const(element + 6);
1078 #if NEW_CHUNK_SIZE == 2
1079  int iColumn0 = thisColumn[0];
1080  int iColumn1 = thisColumn[1];
1081  double value0 = region[iColumn0];
1082  double value1 = region[iColumn1];
1083  value0 OPERATION multiplier *element[0];
1084  value1 OPERATION multiplier *element[1];
1085  region[iColumn0] = value0;
1086  region[iColumn1] = value1;
1087 #elif NEW_CHUNK_SIZE == 4
1088  int iColumn0 = thisColumn[0];
1089  int iColumn1 = thisColumn[1];
1090  int iColumn2 = thisColumn[2];
1091  int iColumn3 = thisColumn[3];
1092 #if AVX2 == 1
1093  __v2df bb;
1094  set_const_v2df(bb, multiplier);
1095  temp[0] = region[iColumn0];
1096  temp[1] = region[iColumn1];
1097  temp[2] = region[iColumn2];
1098  temp[3] = region[iColumn3];
1099  __v2df v0 = __builtin_ia32_loadupd(temp);
1100  __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1101  __v2df a = __builtin_ia32_loadupd(element);
1102  a *= bb;
1103  v0 OPERATION a;
1104  a = __builtin_ia32_loadupd(element + 2);
1105  a *= bb;
1106  v1 OPERATION a;
1107  __builtin_ia32_storeupd(temp, v0);
1108  __builtin_ia32_storeupd(temp + 2, v1);
1109  region[iColumn0] = temp[0];
1110  region[iColumn1] = temp[1];
1111  region[iColumn2] = temp[2];
1112  region[iColumn3] = temp[3];
1113 #else
1114  double value0 = region[iColumn0];
1115  double value1 = region[iColumn1];
1116  double value2 = region[iColumn2];
1117  double value3 = region[iColumn3];
1118  value0 OPERATION multiplier *element[0];
1119  value1 OPERATION multiplier *element[1];
1120  value2 OPERATION multiplier *element[2];
1121  value3 OPERATION multiplier *element[3];
1122  region[iColumn0] = value0;
1123  region[iColumn1] = value1;
1124  region[iColumn2] = value2;
1125  region[iColumn3] = value3;
1126 #endif
1127 #else
1128  abort();
1129 #endif
1130  element += NEW_CHUNK_SIZE;
1131  thisColumn += NEW_CHUNK_SIZE;
1132  }
1133 #if NEW_CHUNK_SIZE == 4
1134  int iColumn0 = thisColumn[0];
1135  int iColumn1 = thisColumn[1];
1136  double value0 = region[iColumn0];
1137  double value1 = region[iColumn1];
1138  value0 OPERATION multiplier *element[0];
1139  value1 OPERATION multiplier *element[1];
1140  region[iColumn0] = value0;
1141  region[iColumn1] = value1;
1142 #endif
1143 }
1144 SCATTER_ATTRIBUTE void functionName(ScatterUpdate7)(int numberIn, CoinFactorizationDouble multiplier,
1145  const CoinFactorizationDouble *COIN_RESTRICT element,
1146  CoinFactorizationDouble *COIN_RESTRICT region)
1147 {
1148 #ifndef NDEBUG
1149  assert(numberIn == 7);
1150 #endif
1151  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 7);
1152  int nFull = 7 & (~(NEW_CHUNK_SIZE - 1));
1153 #if AVX2 == 1
1154  double temp[4];
1155 #endif
1156  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1157  coin_prefetch_const(element + 6);
1158 #if NEW_CHUNK_SIZE == 2
1159  int iColumn0 = thisColumn[0];
1160  int iColumn1 = thisColumn[1];
1161  double value0 = region[iColumn0];
1162  double value1 = region[iColumn1];
1163  value0 OPERATION multiplier *element[0];
1164  value1 OPERATION multiplier *element[1];
1165  region[iColumn0] = value0;
1166  region[iColumn1] = value1;
1167 #elif NEW_CHUNK_SIZE == 4
1168  int iColumn0 = thisColumn[0];
1169  int iColumn1 = thisColumn[1];
1170  int iColumn2 = thisColumn[2];
1171  int iColumn3 = thisColumn[3];
1172 #if AVX2 == 1
1173  __v2df bb;
1174  set_const_v2df(bb, multiplier);
1175  temp[0] = region[iColumn0];
1176  temp[1] = region[iColumn1];
1177  temp[2] = region[iColumn2];
1178  temp[3] = region[iColumn3];
1179  __v2df v0 = __builtin_ia32_loadupd(temp);
1180  __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1181  __v2df a = __builtin_ia32_loadupd(element);
1182  a *= bb;
1183  v0 OPERATION a;
1184  a = __builtin_ia32_loadupd(element + 2);
1185  a *= bb;
1186  v1 OPERATION a;
1187  __builtin_ia32_storeupd(temp, v0);
1188  __builtin_ia32_storeupd(temp + 2, v1);
1189  region[iColumn0] = temp[0];
1190  region[iColumn1] = temp[1];
1191  region[iColumn2] = temp[2];
1192  region[iColumn3] = temp[3];
1193 #else
1194  double value0 = region[iColumn0];
1195  double value1 = region[iColumn1];
1196  double value2 = region[iColumn2];
1197  double value3 = region[iColumn3];
1198  value0 OPERATION multiplier *element[0];
1199  value1 OPERATION multiplier *element[1];
1200  value2 OPERATION multiplier *element[2];
1201  value3 OPERATION multiplier *element[3];
1202  region[iColumn0] = value0;
1203  region[iColumn1] = value1;
1204  region[iColumn2] = value2;
1205  region[iColumn3] = value3;
1206 #endif
1207 #else
1208  abort();
1209 #endif
1210  element += NEW_CHUNK_SIZE;
1211  thisColumn += NEW_CHUNK_SIZE;
1212  }
1213 #if NEW_CHUNK_SIZE == 2
1214  int iColumn0 = thisColumn[0];
1215  double value0 = region[iColumn0];
1216  value0 OPERATION multiplier *element[0];
1217  region[iColumn0] = value0;
1218 #else
1219  int iColumn0 = thisColumn[0];
1220  int iColumn1 = thisColumn[1];
1221  int iColumn2 = thisColumn[2];
1222  double value0 = region[iColumn0];
1223  double value1 = region[iColumn1];
1224  double value2 = region[iColumn2];
1225  value0 OPERATION multiplier *element[0];
1226  value1 OPERATION multiplier *element[1];
1227  value2 OPERATION multiplier *element[2];
1228  region[iColumn0] = value0;
1229  region[iColumn1] = value1;
1230  region[iColumn2] = value2;
1231 #endif
1232 }
1233 SCATTER_ATTRIBUTE void functionName(ScatterUpdate8)(int numberIn, CoinFactorizationDouble multiplier,
1234  const CoinFactorizationDouble *COIN_RESTRICT element,
1235  CoinFactorizationDouble *COIN_RESTRICT region)
1236 {
1237 #ifndef NDEBUG
1238  assert(numberIn == 8);
1239 #endif
1240  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 8);
1241  int nFull = 8 & (~(NEW_CHUNK_SIZE - 1));
1242 #if AVX2 == 1
1243  double temp[4];
1244 #endif
1245  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1246  coin_prefetch_const(element + 6);
1247 #if NEW_CHUNK_SIZE == 2
1248  int iColumn0 = thisColumn[0];
1249  int iColumn1 = thisColumn[1];
1250  double value0 = region[iColumn0];
1251  double value1 = region[iColumn1];
1252  value0 OPERATION multiplier *element[0];
1253  value1 OPERATION multiplier *element[1];
1254  region[iColumn0] = value0;
1255  region[iColumn1] = value1;
1256 #elif NEW_CHUNK_SIZE == 4
1257  int iColumn0 = thisColumn[0];
1258  int iColumn1 = thisColumn[1];
1259  int iColumn2 = thisColumn[2];
1260  int iColumn3 = thisColumn[3];
1261 #if AVX2 == 1
1262  __v2df bb;
1263  set_const_v2df(bb, multiplier);
1264  temp[0] = region[iColumn0];
1265  temp[1] = region[iColumn1];
1266  temp[2] = region[iColumn2];
1267  temp[3] = region[iColumn3];
1268  __v2df v0 = __builtin_ia32_loadupd(temp);
1269  __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1270  __v2df a = __builtin_ia32_loadupd(element);
1271  a *= bb;
1272  v0 OPERATION a;
1273  a = __builtin_ia32_loadupd(element + 2);
1274  a *= bb;
1275  v1 OPERATION a;
1276  __builtin_ia32_storeupd(temp, v0);
1277  __builtin_ia32_storeupd(temp + 2, v1);
1278  region[iColumn0] = temp[0];
1279  region[iColumn1] = temp[1];
1280  region[iColumn2] = temp[2];
1281  region[iColumn3] = temp[3];
1282 #else
1283  double value0 = region[iColumn0];
1284  double value1 = region[iColumn1];
1285  double value2 = region[iColumn2];
1286  double value3 = region[iColumn3];
1287  value0 OPERATION multiplier *element[0];
1288  value1 OPERATION multiplier *element[1];
1289  value2 OPERATION multiplier *element[2];
1290  value3 OPERATION multiplier *element[3];
1291  region[iColumn0] = value0;
1292  region[iColumn1] = value1;
1293  region[iColumn2] = value2;
1294  region[iColumn3] = value3;
1295 #endif
1296 #else
1297  abort();
1298 #endif
1299  element += NEW_CHUNK_SIZE;
1300  thisColumn += NEW_CHUNK_SIZE;
1301  }
1302 }
1303 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4N)(int numberIn, CoinFactorizationDouble multiplier,
1304  const CoinFactorizationDouble *COIN_RESTRICT element,
1305  CoinFactorizationDouble *COIN_RESTRICT region)
1306 {
1307  assert((numberIn & 3) == 0);
1308  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1309  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1310 #if AVX2 == 1
1311  double temp[4];
1312 #endif
1313  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1314  coin_prefetch_const(element + 16);
1315  coin_prefetch_const(thisColumn + 32);
1316 #if NEW_CHUNK_SIZE == 2
1317  int iColumn0 = thisColumn[0];
1318  int iColumn1 = thisColumn[1];
1319  double value0 = region[iColumn0];
1320  double value1 = region[iColumn1];
1321  value0 OPERATION multiplier *element[0];
1322  value1 OPERATION multiplier *element[1];
1323  region[iColumn0] = value0;
1324  region[iColumn1] = value1;
1325 #elif NEW_CHUNK_SIZE == 4
1326  int iColumn0 = thisColumn[0];
1327  int iColumn1 = thisColumn[1];
1328  int iColumn2 = thisColumn[2];
1329  int iColumn3 = thisColumn[3];
1330 #if AVX2 == 1
1331  __v2df bb;
1332  set_const_v2df(bb, multiplier);
1333  temp[0] = region[iColumn0];
1334  temp[1] = region[iColumn1];
1335  temp[2] = region[iColumn2];
1336  temp[3] = region[iColumn3];
1337  __v2df v0 = __builtin_ia32_loadupd(temp);
1338  __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1339  __v2df a = __builtin_ia32_loadupd(element);
1340  a *= bb;
1341  v0 OPERATION a;
1342  a = __builtin_ia32_loadupd(element + 2);
1343  a *= bb;
1344  v1 OPERATION a;
1345  __builtin_ia32_storeupd(temp, v0);
1346  __builtin_ia32_storeupd(temp + 2, v1);
1347  region[iColumn0] = temp[0];
1348  region[iColumn1] = temp[1];
1349  region[iColumn2] = temp[2];
1350  region[iColumn3] = temp[3];
1351 #else
1352  double value0 = region[iColumn0];
1353  double value1 = region[iColumn1];
1354  double value2 = region[iColumn2];
1355  double value3 = region[iColumn3];
1356  value0 OPERATION multiplier *element[0];
1357  value1 OPERATION multiplier *element[1];
1358  value2 OPERATION multiplier *element[2];
1359  value3 OPERATION multiplier *element[3];
1360  region[iColumn0] = value0;
1361  region[iColumn1] = value1;
1362  region[iColumn2] = value2;
1363  region[iColumn3] = value3;
1364 #endif
1365 #else
1366  abort();
1367 #endif
1368  element += NEW_CHUNK_SIZE;
1369  thisColumn += NEW_CHUNK_SIZE;
1370  }
1371 }
1372 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus1)(int numberIn, CoinFactorizationDouble multiplier,
1373  const CoinFactorizationDouble *COIN_RESTRICT element,
1374  CoinFactorizationDouble *COIN_RESTRICT region)
1375 {
1376  assert((numberIn & 3) == 1);
1377  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1378  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1379 #if AVX2 == 1
1380  double temp[4];
1381 #endif
1382  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1383  coin_prefetch_const(element + 16);
1384  coin_prefetch_const(thisColumn + 32);
1385 #if NEW_CHUNK_SIZE == 2
1386  int iColumn0 = thisColumn[0];
1387  int iColumn1 = thisColumn[1];
1388  double value0 = region[iColumn0];
1389  double value1 = region[iColumn1];
1390  value0 OPERATION multiplier *element[0];
1391  value1 OPERATION multiplier *element[1];
1392  region[iColumn0] = value0;
1393  region[iColumn1] = value1;
1394 #elif NEW_CHUNK_SIZE == 4
1395  int iColumn0 = thisColumn[0];
1396  int iColumn1 = thisColumn[1];
1397  int iColumn2 = thisColumn[2];
1398  int iColumn3 = thisColumn[3];
1399 #if AVX2 == 1
1400  __v2df bb;
1401  set_const_v2df(bb, multiplier);
1402  temp[0] = region[iColumn0];
1403  temp[1] = region[iColumn1];
1404  temp[2] = region[iColumn2];
1405  temp[3] = region[iColumn3];
1406  __v2df v0 = __builtin_ia32_loadupd(temp);
1407  __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1408  __v2df a = __builtin_ia32_loadupd(element);
1409  a *= bb;
1410  v0 OPERATION a;
1411  a = __builtin_ia32_loadupd(element + 2);
1412  a *= bb;
1413  v1 OPERATION a;
1414  __builtin_ia32_storeupd(temp, v0);
1415  __builtin_ia32_storeupd(temp + 2, v1);
1416  region[iColumn0] = temp[0];
1417  region[iColumn1] = temp[1];
1418  region[iColumn2] = temp[2];
1419  region[iColumn3] = temp[3];
1420 #else
1421  double value0 = region[iColumn0];
1422  double value1 = region[iColumn1];
1423  double value2 = region[iColumn2];
1424  double value3 = region[iColumn3];
1425  value0 OPERATION multiplier *element[0];
1426  value1 OPERATION multiplier *element[1];
1427  value2 OPERATION multiplier *element[2];
1428  value3 OPERATION multiplier *element[3];
1429  region[iColumn0] = value0;
1430  region[iColumn1] = value1;
1431  region[iColumn2] = value2;
1432  region[iColumn3] = value3;
1433 #endif
1434 #else
1435  abort();
1436 #endif
1437  element += NEW_CHUNK_SIZE;
1438  thisColumn += NEW_CHUNK_SIZE;
1439  }
1440  int iColumn0 = thisColumn[0];
1441  double value0 = region[iColumn0];
1442  value0 OPERATION multiplier *element[0];
1443  region[iColumn0] = value0;
1444 }
1445 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus2)(int numberIn, CoinFactorizationDouble multiplier,
1446  const CoinFactorizationDouble *COIN_RESTRICT element,
1447  CoinFactorizationDouble *COIN_RESTRICT region)
1448 {
1449  assert((numberIn & 3) == 2);
1450  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1451  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1452 #if AVX2 == 1
1453  double temp[4];
1454 #endif
1455  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1456  coin_prefetch_const(element + 16);
1457  coin_prefetch_const(thisColumn + 32);
1458 #if NEW_CHUNK_SIZE == 2
1459  int iColumn0 = thisColumn[0];
1460  int iColumn1 = thisColumn[1];
1461  double value0 = region[iColumn0];
1462  double value1 = region[iColumn1];
1463  value0 OPERATION multiplier *element[0];
1464  value1 OPERATION multiplier *element[1];
1465  region[iColumn0] = value0;
1466  region[iColumn1] = value1;
1467 #elif NEW_CHUNK_SIZE == 4
1468  int iColumn0 = thisColumn[0];
1469  int iColumn1 = thisColumn[1];
1470  int iColumn2 = thisColumn[2];
1471  int iColumn3 = thisColumn[3];
1472 #if AVX2 == 1
1473  __v2df bb;
1474  set_const_v2df(bb, multiplier);
1475  temp[0] = region[iColumn0];
1476  temp[1] = region[iColumn1];
1477  temp[2] = region[iColumn2];
1478  temp[3] = region[iColumn3];
1479  __v2df v0 = __builtin_ia32_loadupd(temp);
1480  __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1481  __v2df a = __builtin_ia32_loadupd(element);
1482  a *= bb;
1483  v0 OPERATION a;
1484  a = __builtin_ia32_loadupd(element + 2);
1485  a *= bb;
1486  v1 OPERATION a;
1487  __builtin_ia32_storeupd(temp, v0);
1488  __builtin_ia32_storeupd(temp + 2, v1);
1489  region[iColumn0] = temp[0];
1490  region[iColumn1] = temp[1];
1491  region[iColumn2] = temp[2];
1492  region[iColumn3] = temp[3];
1493 #else
1494  double value0 = region[iColumn0];
1495  double value1 = region[iColumn1];
1496  double value2 = region[iColumn2];
1497  double value3 = region[iColumn3];
1498  value0 OPERATION multiplier *element[0];
1499  value1 OPERATION multiplier *element[1];
1500  value2 OPERATION multiplier *element[2];
1501  value3 OPERATION multiplier *element[3];
1502  region[iColumn0] = value0;
1503  region[iColumn1] = value1;
1504  region[iColumn2] = value2;
1505  region[iColumn3] = value3;
1506 #endif
1507 #else
1508  abort();
1509 #endif
1510  element += NEW_CHUNK_SIZE;
1511  thisColumn += NEW_CHUNK_SIZE;
1512  }
1513 #if NEW_CHUNK_SIZE == 4
1514  int iColumn0 = thisColumn[0];
1515  int iColumn1 = thisColumn[1];
1516  double value0 = region[iColumn0];
1517  double value1 = region[iColumn1];
1518  value0 OPERATION multiplier *element[0];
1519  value1 OPERATION multiplier *element[1];
1520  region[iColumn0] = value0;
1521  region[iColumn1] = value1;
1522 #endif
1523 }
1524 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus3)(int numberIn, CoinFactorizationDouble multiplier,
1525  const CoinFactorizationDouble *COIN_RESTRICT element,
1526  CoinFactorizationDouble *COIN_RESTRICT region)
1527 {
1528  assert((numberIn & 3) == 3);
1529  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1530  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1531 #if AVX2 == 1
1532  double temp[4];
1533 #endif
1534  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1535  coin_prefetch_const(element + 16);
1536  coin_prefetch_const(thisColumn + 32);
1537 #if NEW_CHUNK_SIZE == 2
1538  int iColumn0 = thisColumn[0];
1539  int iColumn1 = thisColumn[1];
1540  double value0 = region[iColumn0];
1541  double value1 = region[iColumn1];
1542  value0 OPERATION multiplier *element[0];
1543  value1 OPERATION multiplier *element[1];
1544  region[iColumn0] = value0;
1545  region[iColumn1] = value1;
1546 #elif NEW_CHUNK_SIZE == 4
1547  int iColumn0 = thisColumn[0];
1548  int iColumn1 = thisColumn[1];
1549  int iColumn2 = thisColumn[2];
1550  int iColumn3 = thisColumn[3];
1551 #if AVX2 == 1
1552  __v2df bb;
1553  set_const_v2df(bb, multiplier);
1554  temp[0] = region[iColumn0];
1555  temp[1] = region[iColumn1];
1556  temp[2] = region[iColumn2];
1557  temp[3] = region[iColumn3];
1558  __v2df v0 = __builtin_ia32_loadupd(temp);
1559  __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1560  __v2df a = __builtin_ia32_loadupd(element);
1561  a *= bb;
1562  v0 OPERATION a;
1563  a = __builtin_ia32_loadupd(element + 2);
1564  a *= bb;
1565  v1 OPERATION a;
1566  __builtin_ia32_storeupd(temp, v0);
1567  __builtin_ia32_storeupd(temp + 2, v1);
1568  region[iColumn0] = temp[0];
1569  region[iColumn1] = temp[1];
1570  region[iColumn2] = temp[2];
1571  region[iColumn3] = temp[3];
1572 #else
1573  double value0 = region[iColumn0];
1574  double value1 = region[iColumn1];
1575  double value2 = region[iColumn2];
1576  double value3 = region[iColumn3];
1577  value0 OPERATION multiplier *element[0];
1578  value1 OPERATION multiplier *element[1];
1579  value2 OPERATION multiplier *element[2];
1580  value3 OPERATION multiplier *element[3];
1581  region[iColumn0] = value0;
1582  region[iColumn1] = value1;
1583  region[iColumn2] = value2;
1584  region[iColumn3] = value3;
1585 #endif
1586 #else
1587  abort();
1588 #endif
1589  element += NEW_CHUNK_SIZE;
1590  thisColumn += NEW_CHUNK_SIZE;
1591  }
1592 #if NEW_CHUNK_SIZE == 2
1593  int iColumn0 = thisColumn[0];
1594  double value0 = region[iColumn0];
1595  value0 OPERATION multiplier *element[0];
1596  region[iColumn0] = value0;
1597 #else
1598  int iColumn0 = thisColumn[0];
1599  int iColumn1 = thisColumn[1];
1600  int iColumn2 = thisColumn[2];
1601  double value0 = region[iColumn0];
1602  double value1 = region[iColumn1];
1603  double value2 = region[iColumn2];
1604  value0 OPERATION multiplier *element[0];
1605  value1 OPERATION multiplier *element[1];
1606  value2 OPERATION multiplier *element[2];
1607  region[iColumn0] = value0;
1608  region[iColumn1] = value1;
1609  region[iColumn2] = value2;
1610 #endif
1611 }
1612 #endif
1613 
1614 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
1615 */
SCATTER_ATTRIBUTE
#define SCATTER_ATTRIBUTE
Definition: CoinAbcHelperFunctions.hpp:547
CoinAbcScatterUpdate4NPlus3Add
void CoinAbcScatterUpdate4NPlus3Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate
void ABC_INLINE CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue, const CoinFactorizationDouble *COIN_RESTRICT thisElement, const int *COIN_RESTRICT thisIndex, CoinFactorizationDouble *COIN_RESTRICT region)
Definition: CoinAbcHelperFunctions.hpp:56
CoinAbcScatterUpdate6Add
void CoinAbcScatterUpdate6Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
scatterStruct
Definition: CoinAbcHelperFunctions.hpp:549
CoinAbcScatterToList
void CoinAbcScatterToList(const double *regionFrom, double *regionTo, const int *indexList, const int *indexScatter, int number)
regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
coin_prefetch
#define coin_prefetch(mem)
Definition: CoinAbcHelperFunctions.hpp:538
CoinAbcScatterUpdate4NPlus3
void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcGetNorms
void CoinAbcGetNorms(const double *region, int size, double &norm1, double &norm2)
CoinAbcScatterUpdate4NPlus2Add
void CoinAbcScatterUpdate4NPlus2Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcMaximumAbsElement
double CoinAbcMaximumAbsElement(const double *region, int size)
CoinAbcScatterUpdate5
void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate2
void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcMultiplyIndexed
void CoinAbcMultiplyIndexed(int number, const double *COIN_RESTRICT multiplier, const int *COIN_RESTRICT thisIndex, CoinFactorizationDouble *COIN_RESTRICT region)
scatterUpdate
void(* scatterUpdate)(int, CoinFactorizationDouble, const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE
Definition: CoinAbcHelperFunctions.hpp:548
CoinAbcScatterUpdate1Subtract
void CoinAbcScatterUpdate1Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterZeroTo
void CoinAbcScatterZeroTo(double *regionTo, const int *index, int number)
regionTo[index[i]]=0.0
CoinAbcScatterUpdate0
void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcReciprocal
void CoinAbcReciprocal(double *array, int n, const double *input)
CoinAbcScatterUpdate4NPlus1
void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcMemset0Long
void CoinAbcMemset0Long(double *array, int size)
CoinAbcScatterUpdate4NPlus1Subtract
void CoinAbcScatterUpdate4NPlus1Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate4NSubtract
void CoinAbcScatterUpdate4NSubtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcMemmove
void CoinAbcMemmove(double *array, const double *arrayFrom, int size)
CoinAbcScatterUpdate6Subtract
void CoinAbcScatterUpdate6Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate5Subtract
void CoinAbcScatterUpdate5Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
scatterStruct::offset
CoinBigIndex offset
Definition: CoinAbcHelperFunctions.hpp:551
CoinAbcScatterUpdate7Add
void CoinAbcScatterUpdate7Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate8Subtract
void CoinAbcScatterUpdate8Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
NEW_CHUNK_SIZE
#define NEW_CHUNK_SIZE
Definition: CoinAbcHelperFunctions.hpp:541
NEW_CHUNK_SIZE_OFFSET
#define NEW_CHUNK_SIZE_OFFSET
Definition: CoinAbcHelperFunctions.hpp:543
CoinAbcScatterUpdate2Subtract
void CoinAbcScatterUpdate2Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcInverseSqrts
void CoinAbcInverseSqrts(double *array, int n)
array[i]=1.0/sqrt(array[i])
CoinAbcScatterUpdate7
void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate5Add
void CoinAbcScatterUpdate5Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
cilk_for
#define cilk_for
Definition: CoinAbcCommon.hpp:49
CoinAbcScatterUpdate8Add
void CoinAbcScatterUpdate8Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
NEW_CHUNK_SIZE_INCREMENT
#define NEW_CHUNK_SIZE_INCREMENT
Definition: CoinAbcHelperFunctions.hpp:542
CoinAbcScatterUpdate8
void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcMinMaxAbsNormalValues
void CoinAbcMinMaxAbsNormalValues(const double *region, int size, double &minimum, double &maximum)
CoinAbcScale
void CoinAbcScale(double *region, double multiplier, int size)
CoinAbcGatherFrom
void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int number)
regionTo[i]=regionFrom[index[i]]
CoinAbcScatterUpdate3Subtract
void CoinAbcScatterUpdate3Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
scatterStruct::number
int number
Definition: CoinAbcHelperFunctions.hpp:552
CoinAbcScatterUpdate4NPlus1Add
void CoinAbcScatterUpdate4NPlus1Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcCompact
int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths)
This compacts several sections and zeroes out end (returns number)
coin_prefetch_const
#define coin_prefetch_const(mem)
Definition: CoinAbcHelperFunctions.hpp:539
CoinAbcScatterUpdate4NPlus3Subtract
void CoinAbcScatterUpdate4NPlus3Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScaleNormalValues
void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int size)
ABC_INLINE
#define ABC_INLINE
Definition: CoinAbcCommon.hpp:30
COIN_RESTRICT
#define COIN_RESTRICT
Definition: ClpMatrixBase.hpp:21
CoinAbcScatterUpdate4NPlus2Subtract
void CoinAbcScatterUpdate4NPlus2Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcInnerProduct
double CoinAbcInnerProduct(const double *region1, int size, const double *region2)
CoinAbcScatterUpdate2Add
void CoinAbcScatterUpdate2Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
ClpConfig.h
CoinAbcScatterUpdate1Add
void CoinAbcScatterUpdate1Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate4NAdd
void CoinAbcScatterUpdate4NAdd(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcMaximumAbsElementAndScale
double CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int size)
maximum fabs(region[i]) and then region[i]*=multiplier
CoinAbcScatterUpdate4
void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterTo
void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int number)
regionTo[index[i]]=regionFrom[i]
CoinAbcSetElements
void CoinAbcSetElements(double *region, int size, double value)
CoinAbcScatterUpdate1
void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinSimplexInt
int CoinSimplexInt
Definition: CoinAbcCommon.hpp:22
scatterStruct::functionPointer
scatterUpdate functionPointer
Definition: CoinAbcHelperFunctions.hpp:550
CoinAbcScatterUpdate3
void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcMinMaxAbsElement
void CoinAbcMinMaxAbsElement(const double *region, int size, double &minimum, double &maximum)
CoinAbcCommon.hpp
CoinAbcScatterUpdate4N
void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcMultiplyAdd
void CoinAbcMultiplyAdd(const double *region1, int size, double multiplier1, double *regionChanged, double multiplier2)
CoinAbcScatterUpdate4Add
void CoinAbcScatterUpdate4Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcMemmoveAndZero
void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size)
This moves down and zeroes out end.
CoinAbcMemcpyLong
void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size)
CoinAbcScatterUpdate6
void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate3Add
void CoinAbcScatterUpdate3Add(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcGatherUpdate
CoinFactorizationDouble ABC_INLINE CoinAbcGatherUpdate(CoinSimplexInt number, const CoinFactorizationDouble *COIN_RESTRICT thisElement, const int *COIN_RESTRICT thisIndex, CoinFactorizationDouble *COIN_RESTRICT region)
Definition: CoinAbcHelperFunctions.hpp:729
CoinAbcScatterUpdate4NPlus2
void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate4Subtract
void CoinAbcScatterUpdate4Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE
CoinAbcScatterUpdate7Subtract
void CoinAbcScatterUpdate7Subtract(int numberIn, CoinFactorizationDouble multiplier, const CoinFactorizationDouble *COIN_RESTRICT thisElement, CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE