arm-neon.h文件功能

Android 汇编

Created At : 2019-05-30 08:45

Views 👀 :

Comment:

功能介绍

基本类型

结构化数据类型

基本指令集

功能介绍

文件中有进60000行，学习是一个渐进的过程。
所有的功能在注释中说明

预处理

/**
 * 一些预编译类型，后续在函数中都有对照
 */
typedef float float32_t;
typedef __fp16 float16_t;
#ifdef __aarch64__
typedef double float64_t;
#endif

#ifdef __aarch64__
typedef uint8_t poly8_t;
typedef uint16_t poly16_t;
typedef uint64_t poly64_t;
typedef __uint128_t poly128_t;
#else
typedef int8_t poly8_t;
typedef int16_t poly16_t;
#endif

基本类型

64bit数据类型，映射至寄存器即为D0-D31

/**
 * int8_t[8]
 * int16_t[4]
 * int32_t[2]
 * int64_t[1]
 */
typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;

/**
 * 同上，不过是int变为unsigned int
 */
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;

/**
 * float16_t[4]，该类型为半精度，在部分新的cpu上支持，c/c++中尚无此基本数据类型
 * float32_t[2]
 */
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;

/**
 * poly8和poly16类型在常用算法中基本不会使用
 */
#ifdef __aarch64__
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
#endif
typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
#ifdef __aarch64__
typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t;
#endif

128bit数据类型，映射至寄存器即为Q0-Q15

/**
 * int8_t[16]
 * int16_t[8]
 * int32_t[4]
 * int64_t[2]
 */
typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;

/**
 * 同上，不过是int变为unsigned int
 */
typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;

/**
 * float16_t[8]，该类型为半精度，在部分新的cpu上支持，c/c++中尚无此基本数据类型
 * float32_t[4]
 */
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;

/**
 * poly8和poly16类型在常用算法中基本不会使用
 */
#ifdef __aarch64__
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
#endif
typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
#ifdef __aarch64__
typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t;
#endif

结构化数据类型

这些数据类型都是上面基本数据类型的组合的结构化数据类型，被映射到多个寄存器中。
typedef struct int8x8x2_t { int8x8_t val[2]; } int8x8x2_t; ...... #ifdef __aarch64__ typedef struct poly64x1x4_t { poly64x1_t val[4]; } poly64x1x4_t; #endif
基本指令集
NEON指令安装操作数类型可以分为：
正常指令：生成大小相同且类型通常与操作数向量相同的结果向量
长指令：对双字向量操作数执行运算，生成四字向量的结果。所生成的元素一般是操作数元素宽度的两倍，
并属于同一类型
宽指令：一个双字向量操作数和一个四字向量操作数执行运算，生成四字向量结果。所生成的元素和第一个
操作数的元素是第二个操作数元素宽度的两倍
饱和指令：当超过数据类型指定的范围则自动限制在该范围内