sse3

package

v0.0.0-...-3878f85 Latest Latest Go to latest Published: Jul 23, 2017 License: MIT Imports: 1 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/klauspost/intrinsics

Links

Open Source Insights

Documentation ¶

Overview ¶

THESE PACKAGES ARE FOR DEMONSTRATION PURPOSES ONLY!

THEY DO NOT NOT CONTAIN WORKING INTRINSICS!

See https://github.com/klauspost/intrinsics

Index ¶

func AddsubPd(a x86.M128d, b x86.M128d) (dst x86.M128d)
func AddsubPs(a x86.M128, b x86.M128) (dst x86.M128)
func HaddPd(a x86.M128d, b x86.M128d) (dst x86.M128d)
func HaddPs(a x86.M128, b x86.M128) (dst x86.M128)
func HsubPd(a x86.M128d, b x86.M128d) (dst x86.M128d)
func HsubPs(a x86.M128, b x86.M128) (dst x86.M128)
func LddquSi128(mem_addr *x86.M128iConst) (dst x86.M128i)
func MovedupPd(a x86.M128d) (dst x86.M128d)
func MovehdupPs(a x86.M128) (dst x86.M128)
func MoveldupPs(a x86.M128) (dst x86.M128)

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func AddsubPd ¶

func AddsubPd(a x86.M128d, b x86.M128d) (dst x86.M128d)

AddsubPd: Alternatively add and subtract packed double-precision (64-bit) floating-point elements in 'a' to/from packed elements in 'b', and store the results in 'dst'.

FOR j := 0 to 1
	i := j*64
	IF (j is even)
		dst[i+63:i] := a[i+63:i] - b[i+63:i]
	ELSE
		dst[i+63:i] := a[i+63:i] + b[i+63:i]
	FI
ENDFOR

Instruction: 'ADDSUBPD'. Intrinsic: '_mm_addsub_pd'. Requires SSE3.

func AddsubPs ¶

func AddsubPs(a x86.M128, b x86.M128) (dst x86.M128)

AddsubPs: Alternatively add and subtract packed single-precision (32-bit) floating-point elements in 'a' to/from packed elements in 'b', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*32
	IF (j is even)
		dst[i+31:i] := a[i+31:i] - b[i+31:i]
	ELSE
		dst[i+31:i] := a[i+31:i] + b[i+31:i]
	FI
ENDFOR

Instruction: 'ADDSUBPS'. Intrinsic: '_mm_addsub_ps'. Requires SSE3.

func HaddPd ¶

func HaddPd(a x86.M128d, b x86.M128d) (dst x86.M128d)

HaddPd: Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in 'a' and 'b', and pack the results in 'dst'.

dst[63:0] := a[127:64] + a[63:0]
dst[127:64] := b[127:64] + b[63:0]

Instruction: 'HADDPD'. Intrinsic: '_mm_hadd_pd'. Requires SSE3.

func HaddPs ¶

func HaddPs(a x86.M128, b x86.M128) (dst x86.M128)

HaddPs: Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in 'a' and 'b', and pack the results in 'dst'.

dst[31:0] := a[63:32] + a[31:0]
dst[63:32] := a[127:96] + a[95:64]
dst[95:64] := b[63:32] + b[31:0]
dst[127:96] := b[127:96] + b[95:64]

Instruction: 'HADDPS'. Intrinsic: '_mm_hadd_ps'. Requires SSE3.

func HsubPd ¶

func HsubPd(a x86.M128d, b x86.M128d) (dst x86.M128d)

HsubPd: Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in 'a' and 'b', and pack the results in 'dst'.

dst[63:0] := a[63:0] - a[127:64]
dst[127:64] := b[63:0] - b[127:64]

Instruction: 'HSUBPD'. Intrinsic: '_mm_hsub_pd'. Requires SSE3.

func HsubPs ¶

func HsubPs(a x86.M128, b x86.M128) (dst x86.M128)

HsubPs: Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in 'a' and 'b', and pack the results in 'dst'.

dst[31:0] := a[31:0] - a[63:32]
dst[63:32] := a[95:64] - a[127:96]
dst[95:64] := b[31:0] - b[63:32]
dst[127:96] := b[95:64] - b[127:96]

Instruction: 'HSUBPS'. Intrinsic: '_mm_hsub_ps'. Requires SSE3.

func LddquSi128 ¶

func LddquSi128(mem_addr *x86.M128iConst) (dst x86.M128i)

LddquSi128: Load 128-bits of integer data from unaligned memory into 'dst'. This intrinsic may perform better than '_mm_loadu_si128' when the data crosses a cache line boundary.

dst[127:0] := MEM[mem_addr+127:mem_addr]

Instruction: 'LDDQU'. Intrinsic: '_mm_lddqu_si128'. Requires SSE3.

FIXME: Will likely need to be reworked (has pointer parameter).

func MovedupPd ¶

func MovedupPd(a x86.M128d) (dst x86.M128d)

MovedupPd: Duplicate the low double-precision (64-bit) floating-point element from 'a', and store the results in 'dst'.

tmp[63:0] := a[63:0]
tmp[127:64] := a[63:0]

Instruction: 'MOVDDUP'. Intrinsic: '_mm_movedup_pd'. Requires SSE3.

func MovehdupPs ¶

func MovehdupPs(a x86.M128) (dst x86.M128)

MovehdupPs: Duplicate odd-indexed single-precision (32-bit) floating-point elements from 'a', and store the results in 'dst'.

dst[31:0] := a[63:32]
dst[63:32] := a[63:32]
dst[95:64] := a[127:96]
dst[127:96] := a[127:96]

Instruction: 'MOVSHDUP'. Intrinsic: '_mm_movehdup_ps'. Requires SSE3.

func MoveldupPs ¶

func MoveldupPs(a x86.M128) (dst x86.M128)

MoveldupPs: Duplicate even-indexed single-precision (32-bit) floating-point elements from 'a', and store the results in 'dst'.

dst[31:0] := a[31:0]
dst[63:32] := a[31:0]
dst[95:64] := a[95:64]
dst[127:96] := a[95:64]

Instruction: 'MOVSLDUP'. Intrinsic: '_mm_moveldup_ps'. Requires SSE3.

Types ¶

This section is empty.

Source Files ¶

View all Source files

sse3.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL