include/gudhi_patches/CGAL/NewKernel_d/Vector/avx4.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213

// Copyright (c) 2014
// INRIA Saclay-Ile de France (France)
//
// This file is part of CGAL (www.cgal.org); you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation; either version 3 of the License,
// or (at your option) any later version.
//
// Licensees holding a valid commercial license may use this file in
// accordance with the commercial license agreement provided with the software.
//
// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
//
// $URL$
// $Id$
//
// Author(s)     : Marc Glisse

#ifndef CGAL_VECTOR_AVX4_H
#define CGAL_VECTOR_AVX4_H

#if !defined __AVX__ || (__GNUC__ * 100 + __GNUC_MINOR__ < 408)
#error Requires AVX and gcc 4.8+
#endif
#include <x86intrin.h>

#include <CGAL/NewKernel_d/functor_tags.h>
#include <CGAL/Dimension.h>
#include <CGAL/enum.h> // CGAL::Sign
#include <CGAL/number_utils.h> // CGAL::sign


namespace CGAL {

  struct Avx_vector_4 {
    typedef double NT;
    typedef Dimension_tag<4> Dimension;
    typedef Dimension_tag<4> Max_dimension;
    // No Rebind_dimension, this is a building block
    template<class,bool=true> struct Property : boost::false_type {};
    template<bool b> struct Property<Has_vector_plus_minus_tag,b>
      : boost::true_type {};
    /* MAYBE?
       template<bool b> struct Property<Has_vector_scalar_ops_tag,b>
       : boost::true_type {};
       */
    template<bool b> struct Property<Has_determinant_of_vectors_tag,b>
      : boost::true_type {};
    template<bool b> struct Property<Has_dot_product_tag,b>
      : boost::true_type {};
    template<bool b> struct Property<Has_determinant_of_vectors_omit_last_tag,b>
      : boost::true_type {};

    typedef __m256d Vector;
    struct Construct_vector {
      struct Dimension {
	// Initialize with NaN?
	Vector operator()(unsigned d) const {
	  CGAL_assertion(d==4);
	  return Vector();
	}
      };

      struct Iterator {
	template<typename Iter>
	  Vector operator()(unsigned d,Iter const& f,Iter const& e) const {
	    CGAL_assertion(d==4);
	    double x0 = *f;
	    double x1 = *++f;
	    double x2 = *++f;
	    double x3 = *++f;
	    CGAL_assertion(++f==e);
	    Vector a = { x0, x1, x2, x3 };
	    return a;
	  }
      };

      struct Iterator_and_last {
	template<typename Iter,typename T>
	  Vector operator()(unsigned d,Iter const& f,Iter const& e,double t) const {
	    CGAL_assertion(d==4);
	    double x0 = *f;
	    double x1 = *++f;
	    double x2 = *++f;
	    CGAL_assertion(++f==e);
	    Vector a = { x0, x1, x2, t };
	    return a;
	  }
      };

      struct Values {
	  Vector operator()(double a,double b,double c,double d) const {
	    Vector r = { a, b, c, d };
	    return r;
	  }
      };

      struct Values_divide {
	Vector operator()(double h,double a,double b,double c,double d) const {
	  // {a,b,c,d}/{h,h,h,h} should be roughly the same
	  Vector r = { a/h, b/h, c/h, d/h };
	  return r;
	}
      };
    };

    public:
    typedef double const* Vector_const_iterator;
    static inline Vector_const_iterator vector_begin(Vector const&a){
      return (Vector_const_iterator)(&a);
    }
    static inline Vector_const_iterator vector_end(Vector const&a){
      return (Vector_const_iterator)(&a)+4;
    }
    static inline unsigned size_of_vector(Vector){
      return 4;
    }
    static inline double dot_product(__m256d x, __m256d y){
      __m256d p=x*y;
      __m256d z=_mm256_hadd_pd(p,p);
      return z[0]+z[2];
    }
    private:
    static inline __m256d avx_sym(__m256d x){
#if 0
      return __builtin_shuffle(x,(__m256i){2,3,0,1});
#else
      return _mm256_permute2f128_pd(x,x,1);
#endif
    }
    static inline __m256d avx_left(__m256d x){
#if 0
      return __builtin_shuffle(x,(__m256i){1,2,3,0});
#else
#ifdef __AVX2__
      return _mm256_permute4x64_pd(x,1+2*4+3*16+0*64);
#else
      __m256d s = _mm256_permute2f128_pd(x,x,1);
      return _mm256_shuffle_pd(x,s,5);
#endif
#endif
    }
    static inline __m256d avx_right(__m256d x){
#if 0
      return __builtin_shuffle(x,(__m256i){3,0,1,2});
#else
#ifdef __AVX2__
      return _mm256_permute4x64_pd(x,3+0*4+1*16+2*64);
#else
      __m256d s = _mm256_permute2f128_pd(x,x,1);
      return _mm256_shuffle_pd(s,x,5);
#endif
#endif
    }
    static inline double avx_altprod(__m256d x, __m256d y){
      __m256d p=x*y;
      __m256d z=_mm256_hsub_pd(p,p);
      return z[0]+z[2];
    }
    public:
    static double
      determinant_of_vectors(Vector a, Vector b, Vector c, Vector d) {
      __m256d x=a*avx_left(b)-avx_left(a)*b;
      __m256d yy=a*avx_sym(b);
      __m256d y=yy-avx_sym(yy);
      __m256d z0=x*avx_sym(c);
      __m256d z1=avx_left(x)*c;
      __m256d z2=y*avx_left(c);
      __m256d z=z0+z1-z2;
      return avx_altprod(z,avx_right(d));
    }
    static CGAL::Sign
      sign_of_determinant_of_vectors(Vector a, Vector b, Vector c, Vector d) {
      return CGAL::sign(determinant_of_vectors(a,b,c,d));
    }

    private:
    static inline __m256d avx3_right(__m256d x){
#if 0
      return __builtin_shuffle(x,(__m256i){2,0,1,3}); // can replace 3 with anything
#else
#ifdef __AVX2__
      return _mm256_permute4x64_pd(x,2+0*4+1*16+3*64);
#else
      __m256d s = _mm256_permute2f128_pd(x,x,1);
      return _mm256_shuffle_pd(s,x,12);
#endif
#endif
    }
    public:
    static inline double dot_product_omit_last(__m256d x, __m256d y){
      __m256d p=x*y;
      __m128d q=_mm256_extractf128_pd(p,0);
      double z=_mm_hadd_pd(q,q)[0];
      return z+p[2];
    }
    // Note: without AVX2, is it faster than the scalar computation?
    static double
      determinant_of_vectors_omit_last(Vector a, Vector b, Vector c) {
      __m256d x=a*avx3_right(b)-avx3_right(a)*b;
      return dot_product_omit_last(c,avx3_right(x));
    }
    static CGAL::Sign
      sign_of_determinant_of_vectors_omit_last(Vector a, Vector b, Vector c) {
      return CGAL::sign(determinant_of_vectors_omit_last(a,b,c));
    }

  };

}
#endif