fma

package
v0.0.0-...-3878f85 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 23, 2017 License: MIT Imports: 1 Imported by: 0

Documentation

Overview

THESE PACKAGES ARE FOR DEMONSTRATION PURPOSES ONLY!

THEY DO NOT NOT CONTAIN WORKING INTRINSICS!

See https://github.com/klauspost/intrinsics

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func FmaddPd

func FmaddPd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FmaddPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', add the intermediate result to packed elements in 'c', and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFMADD132PD, VFMADD213PD, VFMADD231PD'. Intrinsic: '_mm_fmadd_pd'. Requires FMA.

func FmaddPs

func FmaddPs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FmaddPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', add the intermediate result to packed elements in 'c', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*32
	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFMADD132PS, VFMADD213PS, VFMADD231PS'. Intrinsic: '_mm_fmadd_ps'. Requires FMA.

func FmaddSd

func FmaddSd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FmaddSd: Multiply the lower double-precision (64-bit) floating-point elements in 'a' and 'b', and add the intermediate result to the lower element in 'c'. Store the result in the lower element of 'dst', and copy the upper element from 'a' to the upper element of 'dst'.

dst[63:0] := (a[63:0] * b[63:0]) + c[63:0]
dst[127:64] := a[127:64]
dst[MAX:128] := 0

Instruction: 'VFMADD132SD, VFMADD213SD, VFMADD231SD'. Intrinsic: '_mm_fmadd_sd'. Requires FMA.

func FmaddSs

func FmaddSs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FmaddSs: Multiply the lower single-precision (32-bit) floating-point elements in 'a' and 'b', and add the intermediate result to the lower element in 'c'. Store the result in the lower element of 'dst', and copy the upper 3 packed elements from 'a' to the upper elements of 'dst'.

dst[31:0] := (a[31:0] * b[31:0]) + c[31:0]
dst[127:32] := a[127:32]
dst[MAX:128] := 0

Instruction: 'VFMADD132SS, VFMADD213SS, VFMADD231SS'. Intrinsic: '_mm_fmadd_ss'. Requires FMA.

func FmaddsubPd

func FmaddsubPd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FmaddsubPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', alternatively add and subtract packed elements in 'c' to/from the intermediate result, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	IF (j is even)
		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
	ELSE
		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
	FI
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFMADDSUB132PD, VFMADDSUB213PD, VFMADDSUB231PD'. Intrinsic: '_mm_fmaddsub_pd'. Requires FMA.

func FmaddsubPs

func FmaddsubPs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FmaddsubPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', alternatively add and subtract packed elements in 'c' to/from the intermediate result, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*32
	IF (j is even)
		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
	ELSE
		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
	FI
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFMADDSUB132PS, VFMADDSUB213PS, VFMADDSUB231PS'. Intrinsic: '_mm_fmaddsub_ps'. Requires FMA.

func FmsubPd

func FmsubPd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FmsubPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', subtract packed elements in 'c' from the intermediate result, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFMSUB132PD, VFMSUB213PD, VFMSUB231PD'. Intrinsic: '_mm_fmsub_pd'. Requires FMA.

func FmsubPs

func FmsubPs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FmsubPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', subtract packed elements in 'c' from the intermediate result, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*32
	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFMSUB132PS, VFMSUB213PS, VFMSUB231PS'. Intrinsic: '_mm_fmsub_ps'. Requires FMA.

func FmsubSd

func FmsubSd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FmsubSd: Multiply the lower double-precision (64-bit) floating-point elements in 'a' and 'b', and subtract the lower element in 'c' from the intermediate result. Store the result in the lower element of 'dst', and copy the upper element from 'a' to the upper element of 'dst'.

dst[63:0] := (a[63:0] * b[63:0]) - c[63:0]
dst[127:64] := a[127:64]
dst[MAX:128] := 0

Instruction: 'VFMSUB132SD, VFMSUB213SD, VFMSUB231SD'. Intrinsic: '_mm_fmsub_sd'. Requires FMA.

func FmsubSs

func FmsubSs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FmsubSs: Multiply the lower single-precision (32-bit) floating-point elements in 'a' and 'b', and subtract the lower element in 'c' from the intermediate result. Store the result in the lower element of 'dst', and copy the upper 3 packed elements from 'a' to the upper elements of 'dst'.

dst[31:0] := (a[31:0] * b[31:0]) - c[31:0]
dst[127:32] := a[127:32]
dst[MAX:128] := 0

Instruction: 'VFMSUB132SS, VFMSUB213SS, VFMSUB231SS'. Intrinsic: '_mm_fmsub_ss'. Requires FMA.

func FmsubaddPd

func FmsubaddPd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FmsubaddPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', alternatively subtract and add packed elements in 'c' from/to the intermediate result, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	IF (j is even)
		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
	ELSE
		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
	FI
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFMSUBADD132PD, VFMSUBADD213PD, VFMSUBADD231PD'. Intrinsic: '_mm_fmsubadd_pd'. Requires FMA.

func FmsubaddPs

func FmsubaddPs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FmsubaddPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', alternatively subtract and add packed elements in 'c' from/to the intermediate result, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*32
	IF (j is even)
		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
	ELSE
		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
	FI
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFMSUBADD132PS, VFMSUBADD213PS, VFMSUBADD231PS'. Intrinsic: '_mm_fmsubadd_ps'. Requires FMA.

func FnmaddPd

func FnmaddPd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FnmaddPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', add the negated intermediate result to packed elements in 'c', and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFNMADD132PD, VFNMADD213PD, VFNMADD231PD'. Intrinsic: '_mm_fnmadd_pd'. Requires FMA.

func FnmaddPs

func FnmaddPs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FnmaddPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', add the negated intermediate result to packed elements in 'c', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*32
	a[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFNMADD132PS, VFNMADD213PS, VFNMADD231PS'. Intrinsic: '_mm_fnmadd_ps'. Requires FMA.

func FnmaddSd

func FnmaddSd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FnmaddSd: Multiply the lower double-precision (64-bit) floating-point elements in 'a' and 'b', and add the negated intermediate result to the lower element in 'c'. Store the result in the lower element of 'dst', and copy the upper element from 'a' to the upper element of 'dst'.

dst[63:0] := -(a[63:0] * b[63:0]) + c[63:0]
dst[127:64] := a[127:64]
dst[MAX:128] := 0

Instruction: 'VFNMADD132SD, VFNMADD213SD, VFNMADD231SD'. Intrinsic: '_mm_fnmadd_sd'. Requires FMA.

func FnmaddSs

func FnmaddSs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FnmaddSs: Multiply the lower single-precision (32-bit) floating-point elements in 'a' and 'b', and add the negated intermediate result to the lower element in 'c'. Store the result in the lower element of 'dst', and copy the upper 3 packed elements from 'a' to the upper elements of 'dst'.

dst[31:0] := -(a[31:0] * b[31:0]) + c[31:0]
dst[127:32] := a[127:32]
dst[MAX:128] := 0

Instruction: 'VFNMADD132SS, VFNMADD213SS, VFNMADD231SS'. Intrinsic: '_mm_fnmadd_ss'. Requires FMA.

func FnmsubPd

func FnmsubPd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FnmsubPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', subtract packed elements in 'c' from the negated intermediate result, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFNMSUB132PD, VFNMSUB213PD, VFNMSUB231PD'. Intrinsic: '_mm_fnmsub_pd'. Requires FMA.

func FnmsubPs

func FnmsubPs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FnmsubPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', subtract packed elements in 'c' from the negated intermediate result, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*32
	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
ENDFOR
dst[MAX:128] := 0

Instruction: 'VFNMSUB132PS, VFNMSUB213PS, VFNMSUB231PS'. Intrinsic: '_mm_fnmsub_ps'. Requires FMA.

func FnmsubSd

func FnmsubSd(a x86.M128d, b x86.M128d, c x86.M128d) (dst x86.M128d)

FnmsubSd: Multiply the lower double-precision (64-bit) floating-point elements in 'a' and 'b', and subtract the lower element in 'c' from the negated intermediate result. Store the result in the lower element of 'dst', and copy the upper element from 'a' to the upper element of 'dst'.

dst[63:0] := -(a[63:0] * b[63:0]) - c[63:0]
dst[127:64] := a[127:64]
dst[MAX:128] := 0

Instruction: 'VFNMSUB132SD, VFNMSUB213SD, VFNMSUB231SD'. Intrinsic: '_mm_fnmsub_sd'. Requires FMA.

func FnmsubSs

func FnmsubSs(a x86.M128, b x86.M128, c x86.M128) (dst x86.M128)

FnmsubSs: Multiply the lower single-precision (32-bit) floating-point elements in 'a' and 'b', and subtract the lower element in 'c' from the negated intermediate result. Store the result in the lower element of 'dst', and copy the upper 3 packed elements from 'a' to the upper elements of 'dst'.

dst[31:0] := -(a[31:0] * b[31:0]) - c[31:0]
dst[127:32] := a[127:32]
dst[MAX:128] := 0

Instruction: 'VFNMSUB132SS, VFNMSUB213SS, VFNMSUB231SS'. Intrinsic: '_mm_fnmsub_ss'. Requires FMA.

func M256FmaddPd

func M256FmaddPd(a x86.M256d, b x86.M256d, c x86.M256d) (dst x86.M256d)

M256FmaddPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', add the intermediate result to packed elements in 'c', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*64
	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFMADD132PD, VFMADD213PD, VFMADD231PD'. Intrinsic: '_mm256_fmadd_pd'. Requires FMA.

func M256FmaddPs

func M256FmaddPs(a x86.M256, b x86.M256, c x86.M256) (dst x86.M256)

M256FmaddPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', add the intermediate result to packed elements in 'c', and store the results in 'dst'.

FOR j := 0 to 7
	i := j*32
	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFMADD132PS, VFMADD213PS, VFMADD231PS'. Intrinsic: '_mm256_fmadd_ps'. Requires FMA.

func M256FmaddsubPd

func M256FmaddsubPd(a x86.M256d, b x86.M256d, c x86.M256d) (dst x86.M256d)

M256FmaddsubPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', alternatively add and subtract packed elements in 'c' to/from the intermediate result, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*64
	IF (j is even)
		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
	ELSE
		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
	FI
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFMADDSUB132PD, VFMADDSUB213PD, VFMADDSUB231PD'. Intrinsic: '_mm256_fmaddsub_pd'. Requires FMA.

func M256FmaddsubPs

func M256FmaddsubPs(a x86.M256, b x86.M256, c x86.M256) (dst x86.M256)

M256FmaddsubPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', alternatively add and subtract packed elements in 'c' to/from the intermediate result, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*32
	IF (j is even)
		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
	ELSE
		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
	FI
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFMADDSUB132PS, VFMADDSUB213PS, VFMADDSUB231PS'. Intrinsic: '_mm256_fmaddsub_ps'. Requires FMA.

func M256FmsubPd

func M256FmsubPd(a x86.M256d, b x86.M256d, c x86.M256d) (dst x86.M256d)

M256FmsubPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', subtract packed elements in 'c' from the intermediate result, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*64
	dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFMSUB132PD, VFMSUB213PD, VFMSUB231PD'. Intrinsic: '_mm256_fmsub_pd'. Requires FMA.

func M256FmsubPs

func M256FmsubPs(a x86.M256, b x86.M256, c x86.M256) (dst x86.M256)

M256FmsubPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', subtract packed elements in 'c' from the intermediate result, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*32
	dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFMSUB132PS, VFMSUB213PS, VFMSUB231PS'. Intrinsic: '_mm256_fmsub_ps'. Requires FMA.

func M256FmsubaddPd

func M256FmsubaddPd(a x86.M256d, b x86.M256d, c x86.M256d) (dst x86.M256d)

M256FmsubaddPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', alternatively subtract and add packed elements in 'c' from/to the intermediate result, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*64
	IF (j is even)
		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) + c[i+63:i]
	ELSE
		dst[i+63:i] := (a[i+63:i] * b[i+63:i]) - c[i+63:i]
	FI
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFMSUBADD132PD, VFMSUBADD213PD, VFMSUBADD231PD'. Intrinsic: '_mm256_fmsubadd_pd'. Requires FMA.

func M256FmsubaddPs

func M256FmsubaddPs(a x86.M256, b x86.M256, c x86.M256) (dst x86.M256)

M256FmsubaddPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', alternatively subtract and add packed elements in 'c' from/to the intermediate result, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*32
	IF (j is even)
		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) + c[i+31:i]
	ELSE
		dst[i+31:i] := (a[i+31:i] * b[i+31:i]) - c[i+31:i]
	FI
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFMSUBADD132PS, VFMSUBADD213PS, VFMSUBADD231PS'. Intrinsic: '_mm256_fmsubadd_ps'. Requires FMA.

func M256FnmaddPd

func M256FnmaddPd(a x86.M256d, b x86.M256d, c x86.M256d) (dst x86.M256d)

M256FnmaddPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', add the negated intermediate result to packed elements in 'c', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*64
	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) + c[i+63:i]
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFNMADD132PD, VFNMADD213PD, VFNMADD231PD'. Intrinsic: '_mm256_fnmadd_pd'. Requires FMA.

func M256FnmaddPs

func M256FnmaddPs(a x86.M256, b x86.M256, c x86.M256) (dst x86.M256)

M256FnmaddPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', add the negated intermediate result to packed elements in 'c', and store the results in 'dst'.

FOR j := 0 to 7
	i := j*32
	a[i+31:i] := -(a[i+31:i] * b[i+31:i]) + c[i+31:i]
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFNMADD132PS, VFNMADD213PS, VFNMADD231PS'. Intrinsic: '_mm256_fnmadd_ps'. Requires FMA.

func M256FnmsubPd

func M256FnmsubPd(a x86.M256d, b x86.M256d, c x86.M256d) (dst x86.M256d)

M256FnmsubPd: Multiply packed double-precision (64-bit) floating-point elements in 'a' and 'b', subtract packed elements in 'c' from the negated intermediate result, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*64
	dst[i+63:i] := -(a[i+63:i] * b[i+63:i]) - c[i+63:i]
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFNMSUB132PD, VFNMSUB213PD, VFNMSUB231PD'. Intrinsic: '_mm256_fnmsub_pd'. Requires FMA.

func M256FnmsubPs

func M256FnmsubPs(a x86.M256, b x86.M256, c x86.M256) (dst x86.M256)

M256FnmsubPs: Multiply packed single-precision (32-bit) floating-point elements in 'a' and 'b', subtract packed elements in 'c' from the negated intermediate result, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*32
	dst[i+31:i] := -(a[i+31:i] * b[i+31:i]) - c[i+31:i]
ENDFOR
dst[MAX:256] := 0

Instruction: 'VFNMSUB132PS, VFNMSUB213PS, VFNMSUB231PS'. Intrinsic: '_mm256_fnmsub_ps'. Requires FMA.

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL