blitz  Version 1.0.2
tuning.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 /***************************************************************************
3  * blitz/tuning.h Platform-specific code tuning
4  *
5  * $Id$
6  *
7  * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org>
8  *
9  * This file is a part of Blitz.
10  *
11  * Blitz is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation, either version 3
14  * of the License, or (at your option) any later version.
15  *
16  * Blitz is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Blitz. If not, see <http://www.gnu.org/licenses/>.
23  *
24  * Suggestions: blitz-devel@lists.sourceforge.net
25  * Bugs: blitz-support@lists.sourceforge.net
26  *
27  * For more information, please see the Blitz++ Home Page:
28  * https://sourceforge.net/projects/blitz/
29  *
30  ***************************************************************************/
31 
32 #ifndef BZ_TUNING_H
33 #define BZ_TUNING_H
34 
35 // These estimates should be conservative (i.e. underestimate the
36 // cache sizes). \todo these can be const ints instead of macros.
37 #define BZ_L1_CACHE_ESTIMATED_SIZE 32768
38 #define BZ_L2_CACHE_ESTIMATED_SIZE 6291456
39 // This will work for 32, 16 also
40 #define BZ_L1_CACHE_LINE_SIZE 64
41 #define BZ_CACHE_LINES_TO_ALIGN 16
42 
43 #undef BZ_PARTIAL_LOOP_UNROLL
44 #define BZ_PASS_EXPR_BY_VALUE
45 #undef BZ_PTR_INC_FASTER_THAN_INDIRECTION
46 #define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR
47 #undef BZ_KCC_COPY_PROPAGATION_KLUDGE
48 #undef BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS
49 #undef BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE
50 #define BZ_INLINE_GROUP1
51 #define BZ_INLINE_GROUP2
52 #define BZ_COLLAPSE_LOOPS
53 #define BZ_USE_FAST_READ_ARRAY_EXPR
54 #define BZ_ARRAY_EXPR_USE_COMMON_STRIDE
55 #undef BZ_ARRAY_SPACE_FILLING_TRAVERSAL
56 #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
57 #undef BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
58 #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
59 #define BZ_ARRAY_2D_STENCIL_TILING
60 #define BZ_ARRAY_2D_STENCIL_TILE_SIZE 128
61 #undef BZ_INTERLACE_ARRAYS
62 #define BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY
63 #define BZ_FAST_COMPILE
64 #define BZ_TV_EVALUATE_UNROLL_LENGTH 0
65 #define BZ_MAX_BITS_FOR_BINARY_UNROLL 8
66 #define BZ_VECTORIZED_LOOP_WIDTH 32
67 
68 
69 #ifndef BZ_DISABLE_NEW_ET
70  #define BZ_NEW_EXPRESSION_TEMPLATES
71 #endif
72 
73 #ifdef BZ_FAST_COMPILE
74 #define BZ_ETPARMS_CONSTREF
75 #define BZ_NO_INLINE_ET
76 #endif
77 
78 // possibly overridden by specific compilers below
79 #define _bz_forceinline inline
80 #define _bz_inline_et inline
81 
82 
83 /*
84  * Platform-specific tuning
85  */
86 
87 #ifdef _CRAYT3E
88  // The backend compiler on the T3E does a better job of
89  // loop unrolling.
90  #undef BZ_PARTIAL_LOOP_UNROLL
91  #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
92  #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
93 #endif
94 
95 #ifdef __INTEL_COMPILER
96  // icpc does not vectorize the unrolled loop so this is def. bad
97  #define BZ_TV_EVALUATE_UNROLL_LENGTH 0
98 
99  // defines for inlining
100  #undef _bz_forceinline
101  #undef _bz_inline_et
102  #define _bz_forceinline __forceinline
103  #define _bz_inline_et __forceinline
104 
105 #else // need this since icpc also defines __GNUC__
106 #ifdef __GNUC__
107  // The egcs compiler does a good job of loop unrolling, if
108  // -funroll-loops is used.
109  #undef BZ_PARTIAL_LOOP_UNROLL
110  #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
111  #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
112 #endif
113 #endif
114 
115 #ifdef BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE
116  #undef BZ_KCC_COPY_PROPAGATION_KLUDGE
117 #endif
118 
119 #ifdef BZ_INLINE_GROUP1
120  #define _bz_inline1 inline
121 #else
122  #define _bz_inline1
123 #endif
124 
125 #ifdef BZ_INLINE_GROUP2
126  #define _bz_inline2 inline
127 #else
128  #define _bz_inline2
129 #endif
130 
131 // override definitions above
132 #ifdef BZ_NO_INLINE_ET
133  #undef _bz_inline_et
134  #define _bz_inline_et
135 #endif
136 
137 #ifdef BZ_ETPARMS_CONSTREF
138  #define BZ_ETPARM(X) const X&
139 #else
140  #define BZ_ETPARM(X) X
141 #endif
142 
143 #ifdef __DECCXX
144  // The DEC cxx compiler has problems with loop unrolling
145  // because of aliasing. Loop unrolling and anti-aliasing
146  // is done by Blitz++.
147 
148  #define BZ_PARTIAL_LOOP_UNROLL
149  #define BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
150  #define BZ_ARRAY_STACK_TRAVERSAL_UNROLL
151 #endif
152 
153 /*
154  * BZ_NO_PROPAGATE(X) prevents the compiler from performing
155  * copy propagation on a variable. This is used for loop
156  * unrolling to prevent KAI C++ from rearranging the
157  * ordering of memory accesses.
158  */
159 
160 #define BZ_NO_PROPAGATE(X) X
161 
162 #ifdef __KCC
163 #ifdef BZ_USE_NO_PROPAGATE
164  extern "C" int __kai_apply(const char*, ...);
165 
166  #undef BZ_NO_PROPAGATE(X)
167  #define BZ_NO_PROPAGATE(X) __kai_apply("(%a)",&X)
168 #endif
169 #endif
170 
171 #endif // BZ_TUNING_H