ssse3

package

v0.0.0-...-3878f85 Latest Latest Go to latest Published: Jul 23, 2017 License: MIT Imports: 1 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/klauspost/intrinsics

Links

Open Source Insights

Documentation ¶

Overview ¶

THESE PACKAGES ARE FOR DEMONSTRATION PURPOSES ONLY!

THEY DO NOT NOT CONTAIN WORKING INTRINSICS!

See https://github.com/klauspost/intrinsics

Index ¶

func AbsEpi16(a x86.M128i) (dst x86.M128i)
func AbsEpi32(a x86.M128i) (dst x86.M128i)
func AbsEpi8(a x86.M128i) (dst x86.M128i)
func AbsPi16(a x86.M64) (dst x86.M64)
func AbsPi32(a x86.M64) (dst x86.M64)
func AbsPi8(a x86.M64) (dst x86.M64)
func AlignrEpi8(a x86.M128i, b x86.M128i, count int) (dst x86.M128i)
func AlignrPi8(a x86.M64, b x86.M64, count int) (dst x86.M64)
func HaddEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)
func HaddEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func HaddPi16(a x86.M64, b x86.M64) (dst x86.M64)
func HaddPi32(a x86.M64, b x86.M64) (dst x86.M64)
func HaddsEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)
func HaddsPi16(a x86.M64, b x86.M64) (dst x86.M64)
func HsubEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)
func HsubEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func HsubPi16(a x86.M64, b x86.M64) (dst x86.M64)
func HsubPi32(a x86.M64, b x86.M64) (dst x86.M64)
func HsubsEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)
func HsubsPi16(a x86.M64, b x86.M64) (dst x86.M64)
func MaddubsEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MaddubsPi16(a x86.M64, b x86.M64) (dst x86.M64)
func MulhrsEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)
func MulhrsPi16(a x86.M64, b x86.M64) (dst x86.M64)
func ShuffleEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i)
func ShufflePi8(a x86.M64, b x86.M64) (dst x86.M64)
func SignEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)
func SignEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)
func SignEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i)
func SignPi16(a x86.M64, b x86.M64) (dst x86.M64)
func SignPi32(a x86.M64, b x86.M64) (dst x86.M64)
func SignPi8(a x86.M64, b x86.M64) (dst x86.M64)

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func AbsEpi16 ¶

func AbsEpi16(a x86.M128i) (dst x86.M128i)

AbsEpi16: Compute the absolute value of packed 16-bit integers in 'a', and store the unsigned results in 'dst'.

FOR j := 0 to 7
	i := j*16
	dst[i+15:i] := ABS(a[i+15:i])
ENDFOR

Instruction: 'PABSW'. Intrinsic: '_mm_abs_epi16'. Requires SSSE3.

func AbsEpi32 ¶

func AbsEpi32(a x86.M128i) (dst x86.M128i)

AbsEpi32: Compute the absolute value of packed 32-bit integers in 'a', and store the unsigned results in 'dst'.

FOR j := 0 to 3
	i := j*32
	dst[i+31:i] := ABS(a[i+31:i])
ENDFOR

Instruction: 'PABSD'. Intrinsic: '_mm_abs_epi32'. Requires SSSE3.

func AbsEpi8 ¶

func AbsEpi8(a x86.M128i) (dst x86.M128i)

AbsEpi8: Compute the absolute value of packed 8-bit integers in 'a', and store the unsigned results in 'dst'.

FOR j := 0 to 15
	i := j*8
	dst[i+7:i] := ABS(a[i+7:i])
ENDFOR

Instruction: 'PABSB'. Intrinsic: '_mm_abs_epi8'. Requires SSSE3.

func AbsPi16 ¶

func AbsPi16(a x86.M64) (dst x86.M64)

AbsPi16: Compute the absolute value of packed 16-bit integers in 'a', and store the unsigned results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := ABS(a[i+15:i])
ENDFOR

Instruction: 'PABSW'. Intrinsic: '_mm_abs_pi16'. Requires SSSE3.

func AbsPi32 ¶

func AbsPi32(a x86.M64) (dst x86.M64)

AbsPi32: Compute the absolute value of packed 32-bit integers in 'a', and store the unsigned results in 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := ABS(a[i+31:i])
ENDFOR

Instruction: 'PABSD'. Intrinsic: '_mm_abs_pi32'. Requires SSSE3.

func AbsPi8 ¶

func AbsPi8(a x86.M64) (dst x86.M64)

AbsPi8: Compute the absolute value of packed 8-bit integers in 'a', and store the unsigned results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := ABS(a[i+7:i])
ENDFOR

Instruction: 'PABSB'. Intrinsic: '_mm_abs_pi8'. Requires SSSE3.

func AlignrEpi8 ¶

func AlignrEpi8(a x86.M128i, b x86.M128i, count int) (dst x86.M128i)

AlignrEpi8: Concatenate 16-byte blocks in 'a' and 'b' into a 32-byte temporary result, shift the result right by 'count' bytes, and store the low 16 bytes in 'dst'.

tmp[255:0] := ((a[127:0] << 128) OR b[127:0]) >> (count[7:0]*8)
dst[127:0] := tmp[127:0]

Instruction: 'PALIGNR'. Intrinsic: '_mm_alignr_epi8'. Requires SSSE3.

func AlignrPi8 ¶

func AlignrPi8(a x86.M64, b x86.M64, count int) (dst x86.M64)

AlignrPi8: Concatenate 8-byte blocks in 'a' and 'b' into a 16-byte temporary result, shift the result right by 'count' bytes, and store the low 16 bytes in 'dst'.

tmp[127:0] := ((a[63:0] << 64) OR b[63:0]) >> (count[7:0]*8)
dst[63:0] := tmp[63:0]

Instruction: 'PALIGNR'. Intrinsic: '_mm_alignr_pi8'. Requires SSSE3.

func HaddEpi16 ¶

func HaddEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)

HaddEpi16: Horizontally add adjacent pairs of 16-bit integers in 'a' and 'b', and pack the signed 16-bit results in 'dst'.

dst[15:0] := a[31:16] + a[15:0]
dst[31:16] := a[63:48] + a[47:32]
dst[47:32] := a[95:80] + a[79:64]
dst[63:48] := a[127:112] + a[111:96]
dst[79:64] := b[31:16] + b[15:0]
dst[95:80] := b[63:48] + b[47:32]
dst[111:96] := b[95:80] + b[79:64]
dst[127:112] := b[127:112] + b[111:96]

Instruction: 'PHADDW'. Intrinsic: '_mm_hadd_epi16'. Requires SSSE3.

func HaddEpi32 ¶

func HaddEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)

HaddEpi32: Horizontally add adjacent pairs of 32-bit integers in 'a' and 'b', and pack the signed 32-bit results in 'dst'.

dst[31:0] := a[63:32] + a[31:0]
dst[63:32] := a[127:96] + a[95:64]
dst[95:64] := b[63:32] + b[31:0]
dst[127:96] := b[127:96] + b[95:64]

Instruction: 'PHADDD'. Intrinsic: '_mm_hadd_epi32'. Requires SSSE3.

func HaddPi16 ¶

func HaddPi16(a x86.M64, b x86.M64) (dst x86.M64)

HaddPi16: Horizontally add adjacent pairs of 16-bit integers in 'a' and 'b', and pack the signed 16-bit results in 'dst'.

dst[15:0] := a[31:16] + a[15:0]
dst[31:16] := a[63:48] + a[47:32]
dst[47:32] := b[31:16] + b[15:0]
dst[63:48] := b[63:48] + b[47:32]

Instruction: 'PHADDW'. Intrinsic: '_mm_hadd_pi16'. Requires SSSE3.

func HaddPi32 ¶

func HaddPi32(a x86.M64, b x86.M64) (dst x86.M64)

HaddPi32: Horizontally add adjacent pairs of 32-bit integers in 'a' and 'b', and pack the signed 32-bit results in 'dst'.

dst[31:0] := a[63:32] + a[31:0]
dst[63:32] := b[63:32] + b[31:0]

Instruction: 'PHADDW'. Intrinsic: '_mm_hadd_pi32'. Requires SSSE3.

func HaddsEpi16 ¶

func HaddsEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)

HaddsEpi16: Horizontally add adjacent pairs of 16-bit integers in 'a' and 'b' using saturation, and pack the signed 16-bit results in 'dst'.

dst[15:0]= Saturate_To_Int16(a[31:16] + a[15:0])
dst[31:16] = Saturate_To_Int16(a[63:48] + a[47:32])
dst[47:32] = Saturate_To_Int16(a[95:80] + a[79:64])
dst[63:48] = Saturate_To_Int16(a[127:112] + a[111:96])
dst[79:64] = Saturate_To_Int16(b[31:16] + b[15:0])
dst[95:80] = Saturate_To_Int16(b[63:48] + b[47:32])
dst[111:96] = Saturate_To_Int16(b[95:80] + b[79:64])
dst[127:112] = Saturate_To_Int16(b[127:112] + b[111:96])

Instruction: 'PHADDSW'. Intrinsic: '_mm_hadds_epi16'. Requires SSSE3.

func HaddsPi16 ¶

func HaddsPi16(a x86.M64, b x86.M64) (dst x86.M64)

HaddsPi16: Horizontally add adjacent pairs of 16-bit integers in 'a' and 'b' using saturation, and pack the signed 16-bit results in 'dst'.

dst[15:0]= Saturate_To_Int16(a[31:16] + a[15:0])
dst[31:16] = Saturate_To_Int16(a[63:48] + a[47:32])
dst[47:32] = Saturate_To_Int16(b[31:16] + b[15:0])
dst[63:48] = Saturate_To_Int16(b[63:48] + b[47:32])

Instruction: 'PHADDSW'. Intrinsic: '_mm_hadds_pi16'. Requires SSSE3.

func HsubEpi16 ¶

func HsubEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)

HsubEpi16: Horizontally subtract adjacent pairs of 16-bit integers in 'a' and 'b', and pack the signed 16-bit results in 'dst'.

dst[15:0] := a[15:0] - a[31:16]
dst[31:16] := a[47:32] - a[63:48]
dst[47:32] := a[79:64] - a[95:80]
dst[63:48] := a[111:96] - a[127:112]
dst[79:64] := b[15:0] - b[31:16]
dst[95:80] := b[47:32] - b[63:48]
dst[111:96] := b[79:64] - b[95:80]
dst[127:112] := b[111:96] - b[127:112]

Instruction: 'PHSUBW'. Intrinsic: '_mm_hsub_epi16'. Requires SSSE3.

func HsubEpi32 ¶

func HsubEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)

HsubEpi32: Horizontally subtract adjacent pairs of 32-bit integers in 'a' and 'b', and pack the signed 32-bit results in 'dst'.

dst[31:0] := a[31:0] - a[63:32]
dst[63:32] := a[95:64] - a[127:96]
dst[95:64] := b[31:0] - b[63:32]
dst[127:96] := b[95:64] - b[127:96]

Instruction: 'PHSUBD'. Intrinsic: '_mm_hsub_epi32'. Requires SSSE3.

func HsubPi16 ¶

func HsubPi16(a x86.M64, b x86.M64) (dst x86.M64)

HsubPi16: Horizontally subtract adjacent pairs of 16-bit integers in 'a' and 'b', and pack the signed 16-bit results in 'dst'.

dst[15:0] := a[15:0] - a[31:16]
dst[31:16] := a[47:32] - a[63:48]
dst[47:32] := b[15:0] - b[31:16]
dst[63:48] := b[47:32] - b[63:48]

Instruction: 'PHSUBW'. Intrinsic: '_mm_hsub_pi16'. Requires SSSE3.

func HsubPi32 ¶

func HsubPi32(a x86.M64, b x86.M64) (dst x86.M64)

HsubPi32: Horizontally subtract adjacent pairs of 32-bit integers in 'a' and 'b', and pack the signed 32-bit results in 'dst'.

dst[31:0] := a[31:0] - a[63:32]
dst[63:32] := b[31:0] - b[63:32]

Instruction: 'PHSUBD'. Intrinsic: '_mm_hsub_pi32'. Requires SSSE3.

func HsubsEpi16 ¶

func HsubsEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)

HsubsEpi16: Horizontally subtract adjacent pairs of 16-bit integers in 'a' and 'b' using saturation, and pack the signed 16-bit results in 'dst'.

dst[15:0]= Saturate_To_Int16(a[15:0] - a[31:16])
dst[31:16] = Saturate_To_Int16(a[47:32] - a[63:48])
dst[47:32] = Saturate_To_Int16(a[79:64] - a[95:80])
dst[63:48] = Saturate_To_Int16(a[111:96] - a[127:112])
dst[79:64] = Saturate_To_Int16(b[15:0] - b[31:16])
dst[95:80] = Saturate_To_Int16(b[47:32] - b[63:48])
dst[111:96] = Saturate_To_Int16(b[79:64] - b[95:80])
dst[127:112] = Saturate_To_Int16(b[111:96] - b[127:112])

Instruction: 'PHSUBSW'. Intrinsic: '_mm_hsubs_epi16'. Requires SSSE3.

func HsubsPi16 ¶

func HsubsPi16(a x86.M64, b x86.M64) (dst x86.M64)

HsubsPi16: Horizontally subtract adjacent pairs of 16-bit integers in 'a' and 'b' using saturation, and pack the signed 16-bit results in 'dst'.

dst[15:0]= Saturate_To_Int16(a[15:0] - a[31:16])
dst[31:16] = Saturate_To_Int16(a[47:32] - a[63:48])
dst[47:32] = Saturate_To_Int16(b[15:0] - b[31:16])
dst[63:48] = Saturate_To_Int16(b[47:32] - b[63:48])

Instruction: 'PHSUBSW'. Intrinsic: '_mm_hsubs_pi16'. Requires SSSE3.

func MaddubsEpi16 ¶

func MaddubsEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)

MaddubsEpi16: Vertically multiply each unsigned 8-bit integer from 'a' with the corresponding signed 8-bit integer from 'b', producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in 'dst'.

FOR j := 0 to 7
	i := j*16
	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
ENDFOR

Instruction: 'PMADDUBSW'. Intrinsic: '_mm_maddubs_epi16'. Requires SSSE3.

func MaddubsPi16 ¶

func MaddubsPi16(a x86.M64, b x86.M64) (dst x86.M64)

MaddubsPi16: Vertically multiply each unsigned 8-bit integer from 'a' with the corresponding signed 8-bit integer from 'b', producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + a[i+7:i]*b[i+7:i] )
ENDFOR

Instruction: 'PMADDUBSW'. Intrinsic: '_mm_maddubs_pi16'. Requires SSSE3.

func MulhrsEpi16 ¶

func MulhrsEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)

MulhrsEpi16: Multiply packed 16-bit integers in 'a' and 'b', producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to 'dst'.

FOR j := 0 to 7
	i := j*16
	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) >> 14) + 1
	dst[i+15:i] := tmp[16:1]
ENDFOR

Instruction: 'PMULHRSW'. Intrinsic: '_mm_mulhrs_epi16'. Requires SSSE3.

func MulhrsPi16 ¶

func MulhrsPi16(a x86.M64, b x86.M64) (dst x86.M64)

MulhrsPi16: Multiply packed 16-bit integers in 'a' and 'b', producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to 'dst'.

FOR j := 0 to 3
	i := j*16
	tmp[31:0] := ((a[i+15:i] * b[i+15:i]) >> 14) + 1
	dst[i+15:i] := tmp[16:1]
ENDFOR

Instruction: 'PMULHRSW'. Intrinsic: '_mm_mulhrs_pi16'. Requires SSSE3.

func ShuffleEpi8 ¶

func ShuffleEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i)

ShuffleEpi8: Shuffle packed 8-bit integers in 'a' according to shuffle control mask in the corresponding 8-bit element of 'b', and store the results in 'dst'.

FOR j := 0 to 15
	i := j*8
	IF b[i+7] == 1
		dst[i+7:i] := 0
	ELSE
		index[3:0] := b[i+3:i]
		dst[i+7:i] := a[index*8+7:index*8]
	FI
ENDFOR

Instruction: 'PSHUFB'. Intrinsic: '_mm_shuffle_epi8'. Requires SSSE3.

func ShufflePi8 ¶

func ShufflePi8(a x86.M64, b x86.M64) (dst x86.M64)

ShufflePi8: Shuffle packed 8-bit integers in 'a' according to shuffle control mask in the corresponding 8-bit element of 'b', and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	IF b[i+7] == 1
		dst[i+7:i] := 0
	ELSE
		index[2:0] := b[i+2:i]
		dst[i+7:i] := a[index*8+7:index*8]
	FI
ENDFOR

Instruction: 'PSHUFB'. Intrinsic: '_mm_shuffle_pi8'. Requires SSSE3.

func SignEpi16 ¶

func SignEpi16(a x86.M128i, b x86.M128i) (dst x86.M128i)

SignEpi16: Negate packed 16-bit integers in 'a' when the corresponding signed 16-bit integer in 'b' is negative, and store the results in 'dst'. Element in 'dst' are zeroed out when the corresponding element in 'b' is zero.

FOR j := 0 to 7
	i := j*16
	IF b[i+15:i] < 0
		dst[i+15:i] := NEG(a[i+15:i])
	ELSE IF b[i+15:i] = 0
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := a[i+15:i]
	FI
ENDFOR

Instruction: 'PSIGNW'. Intrinsic: '_mm_sign_epi16'. Requires SSSE3.

func SignEpi32 ¶

func SignEpi32(a x86.M128i, b x86.M128i) (dst x86.M128i)

SignEpi32: Negate packed 32-bit integers in 'a' when the corresponding signed 32-bit integer in 'b' is negative, and store the results in 'dst'. Element in 'dst' are zeroed out when the corresponding element in 'b' is zero.

FOR j := 0 to 3
	i := j*32
	IF b[i+31:i] < 0
		dst[i+31:i] := NEG(a[i+31:i])
	ELSE IF b[i+31:i] = 0
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := a[i+31:i]
	FI
ENDFOR

Instruction: 'PSIGND'. Intrinsic: '_mm_sign_epi32'. Requires SSSE3.

func SignEpi8 ¶

func SignEpi8(a x86.M128i, b x86.M128i) (dst x86.M128i)

SignEpi8: Negate packed 8-bit integers in 'a' when the corresponding signed 8-bit integer in 'b' is negative, and store the results in 'dst'. Element in 'dst' are zeroed out when the corresponding element in 'b' is zero.

FOR j := 0 to 15
	i := j*8
	IF b[i+7:i] < 0
		dst[i+7:i] := NEG(a[i+7:i])
	ELSE IF b[i+7:i] = 0
		dst[i+7:i] := 0
	ELSE
		dst[i+7:i] := a[i+7:i]
	FI
ENDFOR

Instruction: 'PSIGNB'. Intrinsic: '_mm_sign_epi8'. Requires SSSE3.

func SignPi16 ¶

func SignPi16(a x86.M64, b x86.M64) (dst x86.M64)

SignPi16: Negate packed 16-bit integers in 'a' when the corresponding signed 16-bit integer in 'b' is negative, and store the results in 'dst'. Element in 'dst' are zeroed out when the corresponding element in 'b' is zero.

FOR j := 0 to 3
	i := j*16
	IF b[i+15:i] < 0
		dst[i+15:i] := NEG(a[i+15:i])
	ELSE IF b[i+15:i] = 0
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := a[i+15:i]
	FI
ENDFOR

Instruction: 'PSIGNW'. Intrinsic: '_mm_sign_pi16'. Requires SSSE3.

func SignPi32 ¶

func SignPi32(a x86.M64, b x86.M64) (dst x86.M64)

SignPi32: Negate packed 32-bit integers in 'a' when the corresponding signed 32-bit integer in 'b' is negative, and store the results in 'dst'. Element in 'dst' are zeroed out when the corresponding element in 'b' is zero.

FOR j := 0 to 1
	i := j*32
	IF b[i+31:i] < 0
		dst[i+31:i] := NEG(a[i+31:i])
	ELSE IF b[i+31:i] = 0
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := a[i+31:i]
	FI
ENDFOR

Instruction: 'PSIGND'. Intrinsic: '_mm_sign_pi32'. Requires SSSE3.

func SignPi8 ¶

func SignPi8(a x86.M64, b x86.M64) (dst x86.M64)

SignPi8: Negate packed 8-bit integers in 'a' when the corresponding signed 8-bit integer in 'b' is negative, and store the results in 'dst'. Element in 'dst' are zeroed out when the corresponding element in 'b' is zero.

FOR j := 0 to 7
	i := j*8
	IF b[i+7:i] < 0
		dst[i+7:i] := NEG(a[i+7:i])
	ELSE IF b[i+7:i] = 0
		dst[i+7:i] := 0
	ELSE
		dst[i+7:i] := a[i+7:i]
	FI
ENDFOR

Instruction: 'PSIGNB'. Intrinsic: '_mm_sign_pi8'. Requires SSSE3.

Types ¶

This section is empty.

Source Files ¶

View all Source files

ssse3.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL