Qt 4.8
qsimd.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 #include "qsimd_p.h"
43 #include <QByteArray>
44 #include <stdio.h>
45 
46 #if defined(Q_OS_WINCE)
47 #include <windows.h>
48 #endif
49 
50 #if defined(Q_OS_WIN64)
51 #include <intrin.h>
52 #endif
53 
54 #if defined(Q_OS_LINUX) && defined(__arm__)
55 #include "private/qcore_unix_p.h"
56 
57 // the kernel header definitions for HWCAP_*
58 // (the ones we need/may need anyway)
59 
60 // copied from <asm/hwcap.h> (ARM)
61 #define HWCAP_IWMMXT 512
62 #define HWCAP_CRUNCH 1024
63 #define HWCAP_THUMBEE 2048
64 #define HWCAP_NEON 4096
65 #define HWCAP_VFPv3 8192
66 #define HWCAP_VFPv3D16 16384
67 
68 // copied from <linux/auxvec.h>
69 #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
70 
71 #endif
72 
74 
75 #if defined (Q_OS_NACL)
76 static inline uint detectProcessorFeatures()
77 {
78  return 0;
79 }
80 #elif defined (Q_OS_WINCE)
82 {
83  uint features = 0;
84 
85 #if defined (ARM)
86  if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) {
87  features = IWMMXT;
88  return features;
89  }
90 #elif defined(_X86_)
91  features = 0;
92 #if defined QT_HAVE_MMX
93  if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE))
94  features |= MMX;
95 #endif
96 #if defined QT_HAVE_3DNOW
97  if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE))
98  features |= MMX3DNOW;
99 #endif
100  return features;
101 #endif
102  features = 0;
103  return features;
104 }
105 
106 #elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON)
107 static inline uint detectProcessorFeatures()
108 {
109  uint features = 0;
110 
111 #if defined(Q_OS_LINUX)
112  int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY);
113  if (auxv != -1) {
114  unsigned long vector[64];
115  int nread;
116  while (features == 0) {
117  nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector);
118  if (nread <= 0) {
119  // EOF or error
120  break;
121  }
122 
123  int max = nread / (sizeof vector[0]);
124  for (int i = 0; i < max; i += 2)
125  if (vector[i] == AT_HWCAP) {
126  if (vector[i+1] & HWCAP_IWMMXT)
127  features |= IWMMXT;
128  if (vector[i+1] & HWCAP_NEON)
129  features |= NEON;
130  break;
131  }
132  }
133 
134  ::qt_safe_close(auxv);
135  return features;
136  }
137  // fall back if /proc/self/auxv wasn't found
138 #endif
139 
140 #if defined(QT_HAVE_IWMMXT)
141  // runtime detection only available when running as a previlegied process
142  features = IWMMXT;
143 #elif defined(QT_ALWAYS_HAVE_NEON)
144  features = NEON;
145 #endif
146 
147  return features;
148 }
149 
150 #elif defined(__i386__) || defined(_M_IX86)
151 static inline uint detectProcessorFeatures()
152 {
153  uint features = 0;
154 
155  unsigned int extended_result = 0;
156  unsigned int feature_result = 0;
157  uint result = 0;
158  /* see p. 118 of amd64 instruction set manual Vol3 */
159 #if defined(Q_CC_GNU)
160  long cpuid_supported, tmp1;
161  asm ("pushf\n"
162  "pop %0\n"
163  "mov %0, %1\n"
164  "xor $0x00200000, %0\n"
165  "push %0\n"
166  "popf\n"
167  "pushf\n"
168  "pop %0\n"
169  "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
170  : "=a" (cpuid_supported), "=r" (tmp1)
171  );
172  if (cpuid_supported) {
173  asm ("xchg %%ebx, %2\n"
174  "cpuid\n"
175  "xchg %%ebx, %2\n"
176  : "=c" (feature_result), "=d" (result), "=&r" (tmp1)
177  : "a" (1));
178 
179  asm ("xchg %%ebx, %1\n"
180  "cpuid\n"
181  "cmp $0x80000000, %%eax\n"
182  "jnbe 1f\n"
183  "xor %0, %0\n"
184  "jmp 2f\n"
185  "1:\n"
186  "mov $0x80000001, %%eax\n"
187  "cpuid\n"
188  "2:\n"
189  "xchg %%ebx, %1\n"
190  : "=d" (extended_result), "=&r" (tmp1)
191  : "a" (0x80000000)
192  : "%ecx"
193  );
194  }
195 
196 #elif defined (Q_OS_WIN)
197  _asm {
198  push eax
199  push ebx
200  push ecx
201  push edx
202  pushfd
203  pop eax
204  mov ebx, eax
205  xor eax, 00200000h
206  push eax
207  popfd
208  pushfd
209  pop eax
210  mov edx, 0
211  xor eax, ebx
212  jz skip
213 
214  mov eax, 1
215  cpuid
216  mov result, edx
217  mov feature_result, ecx
218  skip:
219  pop edx
220  pop ecx
221  pop ebx
222  pop eax
223  }
224 
225  _asm {
226  push eax
227  push ebx
228  push ecx
229  push edx
230  pushfd
231  pop eax
232  mov ebx, eax
233  xor eax, 00200000h
234  push eax
235  popfd
236  pushfd
237  pop eax
238  mov edx, 0
239  xor eax, ebx
240  jz skip2
241 
242  mov eax, 80000000h
243  cpuid
244  cmp eax, 80000000h
245  jbe skip2
246  mov eax, 80000001h
247  cpuid
248  mov extended_result, edx
249  skip2:
250  pop edx
251  pop ecx
252  pop ebx
253  pop eax
254  }
255 #endif
256 
257 
258  // result now contains the standard feature bits
259  if (result & (1u << 15))
260  features |= CMOV;
261  if (result & (1u << 23))
262  features |= MMX;
263  if (extended_result & (1u << 22))
264  features |= MMXEXT;
265  if (extended_result & (1u << 31))
266  features |= MMX3DNOW;
267  if (extended_result & (1u << 30))
268  features |= MMX3DNOWEXT;
269  if (result & (1u << 25))
270  features |= SSE;
271  if (result & (1u << 26))
272  features |= SSE2;
273  if (feature_result & (1u))
274  features |= SSE3;
275  if (feature_result & (1u << 9))
276  features |= SSSE3;
277  if (feature_result & (1u << 19))
278  features |= SSE4_1;
279  if (feature_result & (1u << 20))
280  features |= SSE4_2;
281  if (feature_result & (1u << 28))
282  features |= AVX;
283 
284  return features;
285 }
286 
287 #elif defined(__x86_64) || defined(Q_OS_WIN64)
288 static inline uint detectProcessorFeatures()
289 {
290  uint features = MMX|SSE|SSE2|CMOV;
291  uint feature_result = 0;
292 
293 #if defined (Q_OS_WIN64)
294  {
295  int info[4];
296  __cpuid(info, 1);
297  feature_result = info[2];
298  }
299 #elif defined(Q_CC_GNU)
300  quint64 tmp;
301  asm ("xchg %%rbx, %1\n"
302  "cpuid\n"
303  "xchg %%rbx, %1\n"
304  : "=c" (feature_result), "=&r" (tmp)
305  : "a" (1)
306  : "%edx"
307  );
308 #endif
309 
310  if (feature_result & (1u))
311  features |= SSE3;
312  if (feature_result & (1u << 9))
313  features |= SSSE3;
314  if (feature_result & (1u << 19))
315  features |= SSE4_1;
316  if (feature_result & (1u << 20))
317  features |= SSE4_2;
318  if (feature_result & (1u << 28))
319  features |= AVX;
320 
321  return features;
322 }
323 
324 #elif defined(__ia64__)
325 static inline uint detectProcessorFeatures()
326 {
327  return MMX|SSE|SSE2;
328 }
329 
330 #else
331 static inline uint detectProcessorFeatures()
332 {
333  return 0;
334 }
335 #endif
336 
337 /*
338  * Use kdesdk/scripts/generate_string_table.pl to update the table below.
339  * Here's the data (don't forget the ONE leading space):
340  mmx
341  mmxext
342  mmx3dnow
343  mmx3dnowext
344  sse
345  sse2
346  cmov
347  iwmmxt
348  neon
349  sse3
350  ssse3
351  sse4.1
352  sse4.2
353  avx
354  */
355 
356 // begin generated
357 static const char features_string[] =
358  " mmx\0"
359  " mmxext\0"
360  " mmx3dnow\0"
361  " mmx3dnowext\0"
362  " sse\0"
363  " sse2\0"
364  " cmov\0"
365  " iwmmxt\0"
366  " neon\0"
367  " sse3\0"
368  " ssse3\0"
369  " sse4.1\0"
370  " sse4.2\0"
371  " avx\0"
372  "\0";
373 
374 static const int features_indices[] = {
375  0, 5, 13, 23, 36, 41, 47, 53,
376  61, 67, 73, 80, 88, 96, -1
377 };
378 // end generated
379 
380 const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
381 
383 {
384  static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1);
385  if (features != -1)
386  return features;
387 
389  QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
390  if (!disable.isEmpty()) {
391  disable.prepend(' ');
392  for (int i = 0; i < features_count; ++i) {
393  if (disable.contains(features_string + features_indices[i]))
394  f &= ~(1 << i);
395  }
396  }
397 
398  features = f;
399  return features;
400 }
401 
403 {
404  uint features = qDetectCPUFeatures();
405  printf("Processor features: ");
406  for (int i = 0; i < features_count; ++i) {
407  if (features & (1 << i))
408  printf("%s", features_string + features_indices[i]);
409  }
410  puts("");
411 }
412 
static const int features_indices[]
Definition: qsimd.cpp:374
Q_CORE_EXPORT QByteArray qgetenv(const char *varName)
static mach_timebase_info_data_t info
#define QT_END_NAMESPACE
This macro expands to.
Definition: qglobal.h:90
Definition: qsimd_p.h:225
The QByteArray class provides an array of bytes.
Definition: qbytearray.h:135
#define O_RDONLY
Definition: qsimd_p.h:217
const int features_count
Definition: qsimd.cpp:380
quint16 u
QByteArray & prepend(char c)
Prepends the character ch to this byte array.
static int qt_safe_close(int fd)
Definition: qcore_unix_p.h:297
Definition: qsimd_p.h:218
#define Q_BASIC_ATOMIC_INITIALIZER(a)
Definition: qbasicatomic.h:218
Definition: qsimd_p.h:212
#define QT_BEGIN_NAMESPACE
This macro expands to.
Definition: qglobal.h:89
static int qt_safe_open(const char *pathname, int flags, mode_t mode=0777)
Definition: qcore_unix_p.h:171
unsigned __int64 quint64
Definition: qglobal.h:943
void qDumpCPUFeatures()
Definition: qsimd.cpp:402
unsigned int uint
Definition: qglobal.h:996
static int cmp(const ushort *s1, const ushort *s2, size_t len)
uint qDetectCPUFeatures()
Definition: qsimd.cpp:382
Definition: qsimd_p.h:222
static const char features_string[]
Definition: qsimd.cpp:357
Definition: qsimd_p.h:221
bool isEmpty() const
Returns true if the byte array has size 0; otherwise returns false.
Definition: qbytearray.h:421
static uint detectProcessorFeatures()
Definition: qsimd.cpp:81
Definition: qsimd_p.h:220
Definition: qsimd_p.h:216
static qint64 qt_safe_read(int fd, void *data, qint64 maxlen)
Definition: qcore_unix_p.h:273
QBool contains(char c) const
Returns true if the byte array contains the character ch; otherwise returns false.
Definition: qbytearray.h:525