mmx

package
Version: v0.0.0-...-3878f85 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 23, 2017 License: MIT Imports: 1 Imported by: 0

Documentation

Overview

THESE PACKAGES ARE FOR DEMONSTRATION PURPOSES ONLY!

THEY DO NOT NOT CONTAIN WORKING INTRINSICS!

See https://github.com/klauspost/intrinsics

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AddPi16

func AddPi16(a x86.M64, b x86.M64) (dst x86.M64)

AddPi16: Add packed 16-bit integers in 'a' and 'b', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := a[i+15:i] + b[i+15:i]
ENDFOR

Instruction: 'PADDW'. Intrinsic: '_mm_add_pi16'. Requires MMX.

func AddPi32

func AddPi32(a x86.M64, b x86.M64) (dst x86.M64)

AddPi32: Add packed 32-bit integers in 'a' and 'b', and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := a[i+31:i] + b[i+31:i]
ENDFOR

Instruction: 'PADDD'. Intrinsic: '_mm_add_pi32'. Requires MMX.

func AddPi8

func AddPi8(a x86.M64, b x86.M64) (dst x86.M64)

AddPi8: Add packed 8-bit integers in 'a' and 'b', and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := a[i+7:i] + b[i+7:i]
ENDFOR

Instruction: 'PADDB'. Intrinsic: '_mm_add_pi8'. Requires MMX.

func AddsPi16

func AddsPi16(a x86.M64, b x86.M64) (dst x86.M64)

AddsPi16: Add packed 16-bit integers in 'a' and 'b' using saturation, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
ENDFOR

Instruction: 'PADDSW'. Intrinsic: '_mm_adds_pi16'. Requires MMX.

func AddsPi8

func AddsPi8(a x86.M64, b x86.M64) (dst x86.M64)

AddsPi8: Add packed 8-bit integers in 'a' and 'b' using saturation, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
ENDFOR

Instruction: 'PADDSB'. Intrinsic: '_mm_adds_pi8'. Requires MMX.

func AddsPu16

func AddsPu16(a x86.M64, b x86.M64) (dst x86.M64)

AddsPu16: Add packed unsigned 16-bit integers in 'a' and 'b' using saturation, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
ENDFOR

Instruction: 'PADDUSW'. Intrinsic: '_mm_adds_pu16'. Requires MMX.

func AddsPu8

func AddsPu8(a x86.M64, b x86.M64) (dst x86.M64)

AddsPu8: Add packed unsigned 8-bit integers in 'a' and 'b' using saturation, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
ENDFOR

Instruction: 'PADDUSB'. Intrinsic: '_mm_adds_pu8'. Requires MMX.

func AndSi64

func AndSi64(a x86.M64, b x86.M64) (dst x86.M64)

AndSi64: Compute the bitwise AND of 64 bits (representing integer data) in 'a' and 'b', and store the result in 'dst'.

dst[63:0] := (a[63:0] AND b[63:0])

Instruction: 'PAND'. Intrinsic: '_mm_and_si64'. Requires MMX.

func AndnotSi64

func AndnotSi64(a x86.M64, b x86.M64) (dst x86.M64)

AndnotSi64: Compute the bitwise AND NOT of 64 bits (representing integer data) in 'a' and 'b', and store the result in 'dst'.

dst[63:0] := ((NOT a[63:0]) AND b[63:0])

Instruction: 'PANDN'. Intrinsic: '_mm_andnot_si64'. Requires MMX.

func CmpeqPi16

func CmpeqPi16(a x86.M64, b x86.M64) (dst x86.M64)

CmpeqPi16: Compare packed 16-bit integers in 'a' and 'b' for equality, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
ENDFOR

Instruction: 'PCMPEQW'. Intrinsic: '_mm_cmpeq_pi16'. Requires MMX.

func CmpeqPi32

func CmpeqPi32(a x86.M64, b x86.M64) (dst x86.M64)

CmpeqPi32: Compare packed 32-bit integers in 'a' and 'b' for equality, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
ENDFOR

Instruction: 'PCMPEQD'. Intrinsic: '_mm_cmpeq_pi32'. Requires MMX.

func CmpeqPi8

func CmpeqPi8(a x86.M64, b x86.M64) (dst x86.M64)

CmpeqPi8: Compare packed 8-bit integers in 'a' and 'b' for equality, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
ENDFOR

Instruction: 'PCMPEQB'. Intrinsic: '_mm_cmpeq_pi8'. Requires MMX.

func CmpgtPi16

func CmpgtPi16(a x86.M64, b x86.M64) (dst x86.M64)

CmpgtPi16: Compare packed 16-bit integers in 'a' and 'b' for greater-than, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0
ENDFOR

Instruction: 'PCMPGTW'. Intrinsic: '_mm_cmpgt_pi16'. Requires MMX.

func CmpgtPi32

func CmpgtPi32(a x86.M64, b x86.M64) (dst x86.M64)

CmpgtPi32: Compare packed 32-bit integers in 'a' and 'b' for greater-than, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0
ENDFOR

Instruction: 'PCMPGTD'. Intrinsic: '_mm_cmpgt_pi32'. Requires MMX.

func CmpgtPi8

func CmpgtPi8(a x86.M64, b x86.M64) (dst x86.M64)

CmpgtPi8: Compare packed 8-bit integers in 'a' and 'b' for greater-than, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0
ENDFOR

Instruction: 'PCMPGTB'. Intrinsic: '_mm_cmpgt_pi8'. Requires MMX.

func Cvtm64Si64

func Cvtm64Si64(a x86.M64) int64

Cvtm64Si64: Copy 64-bit integer 'a' to 'dst'.

dst[63:0] := a[63:0]

Instruction: 'MOVQ'. Intrinsic: '_mm_cvtm64_si64'. Requires MMX.

func Cvtsi32Si64

func Cvtsi32Si64(a int) (dst x86.M64)

Cvtsi32Si64: Copy 32-bit integer 'a' to the lower elements of 'dst', and zero the upper element of 'dst'.

dst[31:0] := a[31:0]
dst[63:32] := 0

Instruction: 'MOVD'. Intrinsic: '_mm_cvtsi32_si64'. Requires MMX.

func Cvtsi64M64

func Cvtsi64M64(a int64) (dst x86.M64)

Cvtsi64M64: Copy 64-bit integer 'a' to 'dst'.

dst[63:0] := a[63:0]

Instruction: 'MOVQ'. Intrinsic: '_mm_cvtsi64_m64'. Requires MMX.

func Cvtsi64Si32

func Cvtsi64Si32(a x86.M64) int

Cvtsi64Si32: Copy the lower 32-bit integer in 'a' to 'dst'.

dst[31:0] := a[31:0]

Instruction: 'MOVD'. Intrinsic: '_mm_cvtsi64_si32'. Requires MMX.

func Empty

func Empty()

Empty: Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures.

Instruction: 'EMMS'. Intrinsic: '_m_empty'. Requires MMX.

func Empty2

func Empty2()

Empty2: Empty the MMX state, which marks the x87 FPU registers as available for use by x87 instructions. This instruction must be used at the end of all MMX technology procedures.

Instruction: 'EMMS'. Intrinsic: '_mm_empty'. Requires MMX.

func FromInt

func FromInt(a int) (dst x86.M64)

FromInt: Copy 32-bit integer 'a' to the lower elements of 'dst', and zero the upper element of 'dst'.

dst[31:0] := a[31:0]
dst[63:32] := 0

Instruction: 'MOVD'. Intrinsic: '_m_from_int'. Requires MMX.

func FromInt64

func FromInt64(a int64) (dst x86.M64)

FromInt64: Copy 64-bit integer 'a' to 'dst'.

dst[63:0] := a[63:0]

Instruction: 'MOVQ'. Intrinsic: '_m_from_int64'. Requires MMX.

func MaddPi16

func MaddPi16(a x86.M64, b x86.M64) (dst x86.M64)

MaddPi16: Multiply packed 16-bit integers in 'a' and 'b', producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	st[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
ENDFOR

Instruction: 'PMADDWD'. Intrinsic: '_mm_madd_pi16'. Requires MMX.

func MulhiPi16

func MulhiPi16(a x86.M64, b x86.M64) (dst x86.M64)

MulhiPi16: Multiply the packed 16-bit integers in 'a' and 'b', producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in 'dst'.

FOR j := 0 to 3
	i := j*16
	tmp[31:0] := a[i+15:i] * b[i+15:i]
	dst[i+15:i] := tmp[31:16]
ENDFOR

Instruction: 'PMULHW'. Intrinsic: '_mm_mulhi_pi16'. Requires MMX.

func MulloPi16

func MulloPi16(a x86.M64, b x86.M64) (dst x86.M64)

MulloPi16: Multiply the packed 16-bit integers in 'a' and 'b', producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in 'dst'.

FOR j := 0 to 3
	i := j*16
	tmp[31:0] := a[i+15:i] * b[i+15:i]
	dst[i+15:i] := tmp[15:0]
ENDFOR

Instruction: 'PMULLW'. Intrinsic: '_mm_mullo_pi16'. Requires MMX.

func OrSi64

func OrSi64(a x86.M64, b x86.M64) (dst x86.M64)

OrSi64: Compute the bitwise OR of 64 bits (representing integer data) in 'a' and 'b', and store the result in 'dst'.

dst[63:0] := (a[63:0] OR b[63:0])

Instruction: 'POR'. Intrinsic: '_mm_or_si64'. Requires MMX.

func PacksPi16

func PacksPi16(a x86.M64, b x86.M64) (dst x86.M64)

PacksPi16: Convert packed 16-bit integers from 'a' and 'b' to packed 8-bit integers using signed saturation, and store the results in 'dst'.

dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
dst[39:32] := Saturate_Int16_To_Int8 (b[15:0])
dst[47:40] := Saturate_Int16_To_Int8 (b[31:16])
dst[55:48] := Saturate_Int16_To_Int8 (b[47:32])
dst[63:56] := Saturate_Int16_To_Int8 (b[63:48])

Instruction: 'PACKSSWB'. Intrinsic: '_mm_packs_pi16'. Requires MMX.

func PacksPi32

func PacksPi32(a x86.M64, b x86.M64) (dst x86.M64)

PacksPi32: Convert packed 32-bit integers from 'a' and 'b' to packed 16-bit integers using signed saturation, and store the results in 'dst'.

dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
dst[47:32] := Saturate_Int32_To_Int16 (b[31:0])
dst[63:48] := Saturate_Int32_To_Int16 (b[63:32])

Instruction: 'PACKSSDW'. Intrinsic: '_mm_packs_pi32'. Requires MMX.

func PacksPu16

func PacksPu16(a x86.M64, b x86.M64) (dst x86.M64)

PacksPu16: Convert packed 16-bit integers from 'a' and 'b' to packed 8-bit integers using unsigned saturation, and store the results in 'dst'.

dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
dst[39:32] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
dst[47:40] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
dst[55:48] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
dst[63:56] := Saturate_Int16_To_UnsignedInt8 (b[63:48])

Instruction: 'PACKUSWB'. Intrinsic: '_mm_packs_pu16'. Requires MMX.

func Packssdw

func Packssdw(a x86.M64, b x86.M64) (dst x86.M64)

Packssdw: Convert packed 32-bit integers from 'a' and 'b' to packed 16-bit integers using signed saturation, and store the results in 'dst'.

dst[15:0] := Saturate_Int32_To_Int16 (a[31:0])
dst[31:16] := Saturate_Int32_To_Int16 (a[63:32])
dst[47:32] := Saturate_Int32_To_Int16 (b[31:0])
dst[63:48] := Saturate_Int32_To_Int16 (b[63:32])

Instruction: 'PACKSSDW'. Intrinsic: '_m_packssdw'. Requires MMX.

func Packsswb

func Packsswb(a x86.M64, b x86.M64) (dst x86.M64)

Packsswb: Convert packed 16-bit integers from 'a' and 'b' to packed 8-bit integers using signed saturation, and store the results in 'dst'.

dst[7:0] := Saturate_Int16_To_Int8 (a[15:0])
dst[15:8] := Saturate_Int16_To_Int8 (a[31:16])
dst[23:16] := Saturate_Int16_To_Int8 (a[47:32])
dst[31:24] := Saturate_Int16_To_Int8 (a[63:48])
dst[39:32] := Saturate_Int16_To_Int8 (b[15:0])
dst[47:40] := Saturate_Int16_To_Int8 (b[31:16])
dst[55:48] := Saturate_Int16_To_Int8 (b[47:32])
dst[63:56] := Saturate_Int16_To_Int8 (b[63:48])

Instruction: 'PACKSSWB'. Intrinsic: '_m_packsswb'. Requires MMX.

func Packuswb

func Packuswb(a x86.M64, b x86.M64) (dst x86.M64)

Packuswb: Convert packed 16-bit integers from 'a' and 'b' to packed 8-bit integers using unsigned saturation, and store the results in 'dst'.

dst[7:0] := Saturate_Int16_To_UnsignedInt8 (a[15:0])
dst[15:8] := Saturate_Int16_To_UnsignedInt8 (a[31:16])
dst[23:16] := Saturate_Int16_To_UnsignedInt8 (a[47:32])
dst[31:24] := Saturate_Int16_To_UnsignedInt8 (a[63:48])
dst[39:32] := Saturate_Int16_To_UnsignedInt8 (b[15:0])
dst[47:40] := Saturate_Int16_To_UnsignedInt8 (b[31:16])
dst[55:48] := Saturate_Int16_To_UnsignedInt8 (b[47:32])
dst[63:56] := Saturate_Int16_To_UnsignedInt8 (b[63:48])

Instruction: 'PACKUSWB'. Intrinsic: '_m_packuswb'. Requires MMX.

func Paddb

func Paddb(a x86.M64, b x86.M64) (dst x86.M64)

Paddb: Add packed 8-bit integers in 'a' and 'b', and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := a[i+7:i] + b[i+7:i]
ENDFOR

Instruction: 'PADDB'. Intrinsic: '_m_paddb'. Requires MMX.

func Paddd

func Paddd(a x86.M64, b x86.M64) (dst x86.M64)

Paddd: Add packed 32-bit integers in 'a' and 'b', and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := a[i+31:i] + b[i+31:i]
ENDFOR

Instruction: 'PADDD'. Intrinsic: '_m_paddd'. Requires MMX.

func Paddsb

func Paddsb(a x86.M64, b x86.M64) (dst x86.M64)

Paddsb: Add packed 8-bit integers in 'a' and 'b' using saturation, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := Saturate_To_Int8( a[i+7:i] + b[i+7:i] )
ENDFOR

Instruction: 'PADDSB'. Intrinsic: '_m_paddsb'. Requires MMX.

func Paddsw

func Paddsw(a x86.M64, b x86.M64) (dst x86.M64)

Paddsw: Add packed 16-bit integers in 'a' and 'b' using saturation, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := Saturate_To_Int16( a[i+15:i] + b[i+15:i] )
ENDFOR

Instruction: 'PADDSW'. Intrinsic: '_m_paddsw'. Requires MMX.

func Paddusb

func Paddusb(a x86.M64, b x86.M64) (dst x86.M64)

Paddusb: Add packed unsigned 8-bit integers in 'a' and 'b' using saturation, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := Saturate_To_UnsignedInt8( a[i+7:i] + b[i+7:i] )
ENDFOR

Instruction: 'PADDUSB'. Intrinsic: '_m_paddusb'. Requires MMX.

func Paddusw

func Paddusw(a x86.M64, b x86.M64) (dst x86.M64)

Paddusw: Add packed unsigned 16-bit integers in 'a' and 'b' using saturation, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := Saturate_To_UnsignedInt16( a[i+15:i] + b[i+15:i] )
ENDFOR

Instruction: 'PADDUSW'. Intrinsic: '_m_paddusw'. Requires MMX.

func Paddw

func Paddw(a x86.M64, b x86.M64) (dst x86.M64)

Paddw: Add packed 16-bit integers in 'a' and 'b', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := a[i+15:i] + b[i+15:i]
ENDFOR

Instruction: 'PADDW'. Intrinsic: '_m_paddw'. Requires MMX.

func Pand

func Pand(a x86.M64, b x86.M64) (dst x86.M64)

Pand: Compute the bitwise AND of 64 bits (representing integer data) in 'a' and 'b', and store the result in 'dst'.

dst[63:0] := (a[63:0] AND b[63:0])

Instruction: 'PAND'. Intrinsic: '_m_pand'. Requires MMX.

func Pandn

func Pandn(a x86.M64, b x86.M64) (dst x86.M64)

Pandn: Compute the bitwise AND NOT of 64 bits (representing integer data) in 'a' and 'b', and store the result in 'dst'.

dst[63:0] := ((NOT a[63:0]) AND b[63:0])

Instruction: 'PANDN'. Intrinsic: '_m_pandn'. Requires MMX.

func Pcmpeqb

func Pcmpeqb(a x86.M64, b x86.M64) (dst x86.M64)

Pcmpeqb: Compare packed 8-bit integers in 'a' and 'b' for equality, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := ( a[i+7:i] == b[i+7:i] ) ? 0xFF : 0
ENDFOR

Instruction: 'PCMPEQB'. Intrinsic: '_m_pcmpeqb'. Requires MMX.

func Pcmpeqd

func Pcmpeqd(a x86.M64, b x86.M64) (dst x86.M64)

Pcmpeqd: Compare packed 32-bit integers in 'a' and 'b' for equality, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := ( a[i+31:i] == b[i+31:i] ) ? 0xFFFFFFFF : 0
ENDFOR

Instruction: 'PCMPEQD'. Intrinsic: '_m_pcmpeqd'. Requires MMX.

func Pcmpeqw

func Pcmpeqw(a x86.M64, b x86.M64) (dst x86.M64)

Pcmpeqw: Compare packed 16-bit integers in 'a' and 'b' for equality, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := ( a[i+15:i] == b[i+15:i] ) ? 0xFFFF : 0
ENDFOR

Instruction: 'PCMPEQW'. Intrinsic: '_m_pcmpeqw'. Requires MMX.

func Pcmpgtb

func Pcmpgtb(a x86.M64, b x86.M64) (dst x86.M64)

Pcmpgtb: Compare packed 8-bit integers in 'a' and 'b' for greater-than, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := ( a[i+7:i] > b[i+7:i] ) ? 0xFF : 0
ENDFOR

Instruction: 'PCMPGTB'. Intrinsic: '_m_pcmpgtb'. Requires MMX.

func Pcmpgtd

func Pcmpgtd(a x86.M64, b x86.M64) (dst x86.M64)

Pcmpgtd: Compare packed 32-bit integers in 'a' and 'b' for greater-than, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := ( a[i+31:i] > b[i+31:i] ) ? 0xFFFFFFFF : 0
ENDFOR

Instruction: 'PCMPGTD'. Intrinsic: '_m_pcmpgtd'. Requires MMX.

func Pcmpgtw

func Pcmpgtw(a x86.M64, b x86.M64) (dst x86.M64)

Pcmpgtw: Compare packed 16-bit integers in 'a' and 'b' for greater-than, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := ( a[i+15:i] > b[i+15:i] ) ? 0xFFFF : 0
ENDFOR

Instruction: 'PCMPGTW'. Intrinsic: '_m_pcmpgtw'. Requires MMX.

func Pmaddwd

func Pmaddwd(a x86.M64, b x86.M64) (dst x86.M64)

Pmaddwd: Multiply packed 16-bit integers in 'a' and 'b', producing intermediate 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	st[i+31:i] := a[i+31:i+16]*b[i+31:i+16] + a[i+15:i]*b[i+15:i]
ENDFOR

Instruction: 'PMADDWD'. Intrinsic: '_m_pmaddwd'. Requires MMX.

func Pmulhw

func Pmulhw(a x86.M64, b x86.M64) (dst x86.M64)

Pmulhw: Multiply the packed 16-bit integers in 'a' and 'b', producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in 'dst'.

FOR j := 0 to 3
	i := j*16
	tmp[31:0] := a[i+15:i] * b[i+15:i]
	dst[i+15:i] := tmp[31:16]
ENDFOR

Instruction: 'PMULHW'. Intrinsic: '_m_pmulhw'. Requires MMX.

func Pmullw

func Pmullw(a x86.M64, b x86.M64) (dst x86.M64)

Pmullw: Multiply the packed 16-bit integers in 'a' and 'b', producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in 'dst'.

FOR j := 0 to 3
	i := j*16
	tmp[31:0] := a[i+15:i] * b[i+15:i]
	dst[i+15:i] := tmp[15:0]
ENDFOR

Instruction: 'PMULLW'. Intrinsic: '_m_pmullw'. Requires MMX.

func Por

func Por(a x86.M64, b x86.M64) (dst x86.M64)

Por: Compute the bitwise OR of 64 bits (representing integer data) in 'a' and 'b', and store the result in 'dst'.

dst[63:0] := (a[63:0] OR b[63:0])

Instruction: 'POR'. Intrinsic: '_m_por'. Requires MMX.

func Pslld

func Pslld(a x86.M64, count x86.M64) (dst x86.M64)

Pslld: Shift packed 32-bit integers in 'a' left by 'count' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF count[63:0] > 31
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := ZeroExtend(a[i+31:i] << count[63:0])
	FI
ENDFOR

Instruction: 'PSLLD'. Intrinsic: '_m_pslld'. Requires MMX.

func Pslldi

func Pslldi(a x86.M64, imm8 byte) (dst x86.M64)

Pslldi: Shift packed 32-bit integers in 'a' left by 'imm8' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF imm8[7:0] > 31
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := ZeroExtend(a[i+31:i] << imm8[7:0])
	FI
ENDFOR

Instruction: 'PSLLD'. Intrinsic: '_m_pslldi'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func Psllq

func Psllq(a x86.M64, count x86.M64) (dst x86.M64)

Psllq: Shift 64-bit integer 'a' left by 'count' while shifting in zeros, and store the result in 'dst'.

IF count[63:0] > 63
	dst[63:0] := 0
ELSE
	dst[63:0] := ZeroExtend(a[63:0] << count[63:0])
FI

Instruction: 'PSLLQ'. Intrinsic: '_m_psllq'. Requires MMX.

func Psllqi

func Psllqi(a x86.M64, imm8 byte) (dst x86.M64)

Psllqi: Shift 64-bit integer 'a' left by 'imm8' while shifting in zeros, and store the result in 'dst'.

IF imm8[7:0] > 63
	dst[63:0] := 0
ELSE
	dst[63:0] := ZeroExtend(a[63:0] << imm8[7:0])
FI

Instruction: 'PSLLQ'. Intrinsic: '_m_psllqi'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func Psllw

func Psllw(a x86.M64, count x86.M64) (dst x86.M64)

Psllw: Shift packed 16-bit integers in 'a' left by 'count' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF count[63:0] > 15
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := ZeroExtend(a[i+15:i] << count[63:0])
	FI
ENDFOR

Instruction: 'PSLLW'. Intrinsic: '_m_psllw'. Requires MMX.

func Psllwi

func Psllwi(a x86.M64, imm8 byte) (dst x86.M64)

Psllwi: Shift packed 16-bit integers in 'a' left by 'imm8' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF imm8[7:0] > 15
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := ZeroExtend(a[i+15:i] << imm8[7:0])
	FI
ENDFOR

Instruction: 'PSLLW'. Intrinsic: '_m_psllwi'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func Psrad

func Psrad(a x86.M64, count x86.M64) (dst x86.M64)

Psrad: Shift packed 32-bit integers in 'a' right by 'count' while shifting in sign bits, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF count[63:0] > 31
		dst[i+31:i] := SignBit
	ELSE
		dst[i+31:i] := SignExtend(a[i+31:i] >> count[63:0])
	FI
ENDFOR

Instruction: 'PSRAD'. Intrinsic: '_m_psrad'. Requires MMX.

func Psradi

func Psradi(a x86.M64, imm8 byte) (dst x86.M64)

Psradi: Shift packed 32-bit integers in 'a' right by 'imm8' while shifting in sign bits, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF imm8[7:0] > 31
		dst[i+31:i] := SignBit
	ELSE
		dst[i+31:i] := SignExtend(a[i+31:i] >> imm8[7:0])
	FI
ENDFOR

Instruction: 'PSRAD'. Intrinsic: '_m_psradi'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func Psraw

func Psraw(a x86.M64, count x86.M64) (dst x86.M64)

Psraw: Shift packed 16-bit integers in 'a' right by 'count' while shifting in sign bits, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF count[63:0] > 15
		dst[i+15:i] := SignBit
	ELSE
		dst[i+15:i] := SignExtend(a[i+15:i] >> count[63:0])
	FI
ENDFOR

Instruction: 'PSRAW'. Intrinsic: '_m_psraw'. Requires MMX.

func Psrawi

func Psrawi(a x86.M64, imm8 byte) (dst x86.M64)

Psrawi: Shift packed 16-bit integers in 'a' right by 'imm8' while shifting in sign bits, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF imm8[7:0] > 15
		dst[i+15:i] := SignBit
	ELSE
		dst[i+15:i] := SignExtend(a[i+15:i] >> imm8[7:0])
	FI
ENDFOR

Instruction: 'PSRAW'. Intrinsic: '_m_psrawi'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func Psrld

func Psrld(a x86.M64, count x86.M64) (dst x86.M64)

Psrld: Shift packed 32-bit integers in 'a' right by 'count' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF count[63:0] > 31
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := ZeroExtend(a[i+31:i] >> count[63:0])
	FI
ENDFOR

Instruction: 'PSRLD'. Intrinsic: '_m_psrld'. Requires MMX.

func Psrldi

func Psrldi(a x86.M64, imm8 byte) (dst x86.M64)

Psrldi: Shift packed 32-bit integers in 'a' right by 'imm8' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF imm8[7:0] > 31
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := ZeroExtend(a[i+31:i] >> imm8[7:0])
	FI
ENDFOR

Instruction: 'PSRLD'. Intrinsic: '_m_psrldi'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func Psrlq

func Psrlq(a x86.M64, count x86.M64) (dst x86.M64)

Psrlq: Shift 64-bit integer 'a' right by 'count' while shifting in zeros, and store the result in 'dst'.

IF count[63:0] > 63
	dst[63:0] := 0
ELSE
	dst[63:0] := ZeroExtend(a[63:0] >> count[63:0])
FI

Instruction: 'PSRLQ'. Intrinsic: '_m_psrlq'. Requires MMX.

func Psrlqi

func Psrlqi(a x86.M64, imm8 byte) (dst x86.M64)

Psrlqi: Shift 64-bit integer 'a' right by 'imm8' while shifting in zeros, and store the result in 'dst'.

IF imm8[7:0] > 63
	dst[63:0] := 0
ELSE
	dst[63:0] := ZeroExtend(a[63:0] >> imm8[7:0])
FI

Instruction: 'PSRLQ'. Intrinsic: '_m_psrlqi'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func Psrlw

func Psrlw(a x86.M64, count x86.M64) (dst x86.M64)

Psrlw: Shift packed 16-bit integers in 'a' right by 'count' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF count[63:0] > 15
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := ZeroExtend(a[i+15:i] >> count[63:0])
	FI
ENDFOR

Instruction: 'PSRLW'. Intrinsic: '_m_psrlw'. Requires MMX.

func Psrlwi

func Psrlwi(a x86.M64, imm8 byte) (dst x86.M64)

Psrlwi: Shift packed 16-bit integers in 'a' right by 'imm8' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF imm8[7:0] > 15
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := ZeroExtend(a[i+15:i] >> imm8[7:0])
	FI
ENDFOR

Instruction: 'PSRLW'. Intrinsic: '_m_psrlwi'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func Psubb

func Psubb(a x86.M64, b x86.M64) (dst x86.M64)

Psubb: Subtract packed 8-bit integers in 'b' from packed 8-bit integers in 'a', and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := a[i+7:i] - b[i+7:i]
ENDFOR

Instruction: 'PSUBB'. Intrinsic: '_m_psubb'. Requires MMX.

func Psubd

func Psubd(a x86.M64, b x86.M64) (dst x86.M64)

Psubd: Subtract packed 32-bit integers in 'b' from packed 32-bit integers in 'a', and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := a[i+31:i] - b[i+31:i]
ENDFOR

Instruction: 'PSUBD'. Intrinsic: '_m_psubd'. Requires MMX.

func Psubsb

func Psubsb(a x86.M64, b x86.M64) (dst x86.M64)

Psubsb: Subtract packed 8-bit integers in 'b' from packed 8-bit integers in 'a' using saturation, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
ENDFOR

Instruction: 'PSUBSB'. Intrinsic: '_m_psubsb'. Requires MMX.

func Psubsw

func Psubsw(a x86.M64, b x86.M64) (dst x86.M64)

Psubsw: Subtract packed 16-bit integers in 'b' from packed 16-bit integers in 'a' using saturation, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
ENDFOR

Instruction: 'PSUBSW'. Intrinsic: '_m_psubsw'. Requires MMX.

func Psubusb

func Psubusb(a x86.M64, b x86.M64) (dst x86.M64)

Psubusb: Subtract packed unsigned 8-bit integers in 'b' from packed unsigned 8-bit integers in 'a' using saturation, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
ENDFOR

Instruction: 'PSUBUSB'. Intrinsic: '_m_psubusb'. Requires MMX.

func Psubusw

func Psubusw(a x86.M64, b x86.M64) (dst x86.M64)

Psubusw: Subtract packed unsigned 16-bit integers in 'b' from packed unsigned 16-bit integers in 'a' using saturation, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
ENDFOR

Instruction: 'PSUBUSW'. Intrinsic: '_m_psubusw'. Requires MMX.

func Psubw

func Psubw(a x86.M64, b x86.M64) (dst x86.M64)

Psubw: Subtract packed 16-bit integers in 'b' from packed 16-bit integers in 'a', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := a[i+15:i] - b[i+15:i]
ENDFOR

Instruction: 'PSUBW'. Intrinsic: '_m_psubw'. Requires MMX.

func Punpckhbw

func Punpckhbw(a x86.M64, b x86.M64) (dst x86.M64)

Punpckhbw: Unpack and interleave 8-bit integers from the high half of 'a' and 'b', and store the results in 'dst'.

INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]){
	dst[7:0] := src1[39:32]
	dst[15:8] := src2[39:32]
	dst[23:16] := src1[47:40]
	dst[31:24] := src2[47:40]
	dst[39:32] := src1[55:48]
	dst[47:40] := src2[55:48]
	dst[55:48] := src1[63:56]
	dst[63:56] := src2[63:56]
	RETURN dst[63:0]
}

dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0])

Instruction: 'PUNPCKHBW'. Intrinsic: '_m_punpckhbw'. Requires MMX.

func Punpckhdq

func Punpckhdq(a x86.M64, b x86.M64) (dst x86.M64)

Punpckhdq: Unpack and interleave 32-bit integers from the high half of 'a' and 'b', and store the results in 'dst'.

dst[31:0] := a[63:32]
dst[63:32] := b[63:32]

Instruction: 'PUNPCKHDQ'. Intrinsic: '_m_punpckhdq'. Requires MMX.

func Punpckhwd

func Punpckhwd(a x86.M64, b x86.M64) (dst x86.M64)

Punpckhwd: Unpack and interleave 16-bit integers from the high half of 'a' and 'b', and store the results in 'dst'.

INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]){
	dst[15:0] := src1[47:32]
	dst[31:16] := src2[47:32]
	dst[47:32] := src1[63:48]
	dst[63:48] := src2[63:48]
	RETURN dst[63:0]
}

dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0])

Instruction: 'PUNPCKLBW'. Intrinsic: '_m_punpckhwd'. Requires MMX.

func Punpcklbw

func Punpcklbw(a x86.M64, b x86.M64) (dst x86.M64)

Punpcklbw: Unpack and interleave 8-bit integers from the low half of 'a' and 'b', and store the results in 'dst'.

INTERLEAVE_BYTES(src1[63:0], src2[63:0]){
	dst[7:0] := src1[7:0]
	dst[15:8] := src2[7:0]
	dst[23:16] := src1[15:8]
	dst[31:24] := src2[15:8]
	dst[39:32] := src1[23:16]
	dst[47:40] := src2[23:16]
	dst[55:48] := src1[31:24]
	dst[63:56] := src2[31:24]
	RETURN dst[63:0]
}

dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0])

Instruction: 'PUNPCKLBW'. Intrinsic: '_m_punpcklbw'. Requires MMX.

func Punpckldq

func Punpckldq(a x86.M64, b x86.M64) (dst x86.M64)

Punpckldq: Unpack and interleave 32-bit integers from the low half of 'a' and 'b', and store the results in 'dst'.

dst[31:0] := a[31:0]
dst[63:32] := b[31:0]

Instruction: 'PUNPCKLDQ'. Intrinsic: '_m_punpckldq'. Requires MMX.

func Punpcklwd

func Punpcklwd(a x86.M64, b x86.M64) (dst x86.M64)

Punpcklwd: Unpack and interleave 16-bit integers from the low half of 'a' and 'b', and store the results in 'dst'.

INTERLEAVE_WORDS(src1[63:0], src2[63:0]){
	dst[15:0] := src1[15:0]
	dst[31:16] := src2[15:0]
	dst[47:32] := src1[31:16]
	dst[63:48] := src2[31:16]
	RETURN dst[63:0]
}

dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0])

Instruction: 'PUNPCKLWD'. Intrinsic: '_m_punpcklwd'. Requires MMX.

func Pxor

func Pxor(a x86.M64, b x86.M64) (dst x86.M64)

Pxor: Compute the bitwise OR of 64 bits (representing integer data) in 'a' and 'b', and store the result in 'dst'.

dst[63:0] := (a[63:0] XOR b[63:0])

Instruction: 'PXOR'. Intrinsic: '_m_pxor'. Requires MMX.

func Set1Pi16

func Set1Pi16(a int16) (dst x86.M64)

Set1Pi16: Broadcast 16-bit integer 'a' to all all elements of 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := a[15:0]
ENDFOR

Instruction: '...'. Intrinsic: '_mm_set1_pi16'. Requires MMX.

func Set1Pi32

func Set1Pi32(a int) (dst x86.M64)

Set1Pi32: Broadcast 32-bit integer 'a' to all elements of 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := a[31:0]
ENDFOR

Instruction: '...'. Intrinsic: '_mm_set1_pi32'. Requires MMX.

func Set1Pi8

func Set1Pi8(a byte) (dst x86.M64)

Set1Pi8: Broadcast 8-bit integer 'a' to all elements of 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := a[7:0]
ENDFOR

Instruction: '...'. Intrinsic: '_mm_set1_pi8'. Requires MMX.

func SetPi16

func SetPi16(e3 int16, e2 int16, e1 int16, e0 int16) (dst x86.M64)

SetPi16: Set packed 16-bit integers in 'dst' with the supplied values.

dst[15:0] := e0
dst[31:16] := e1
dst[47:32] := e2
dst[63:48] := e3

Instruction: '...'. Intrinsic: '_mm_set_pi16'. Requires MMX.

func SetPi32

func SetPi32(e1 int, e0 int) (dst x86.M64)

SetPi32: Set packed 32-bit integers in 'dst' with the supplied values.

dst[31:0] := e0
dst[63:32] := e1

Instruction: '...'. Intrinsic: '_mm_set_pi32'. Requires MMX.

func SetPi8

func SetPi8(e7 byte, e6 byte, e5 byte, e4 byte, e3 byte, e2 byte, e1 byte, e0 byte) (dst x86.M64)

SetPi8: Set packed 8-bit integers in 'dst' with the supplied values in reverse order.

dst[7:0] := e0
dst[15:8] := e1
dst[23:16] := e2
dst[31:24] := e3
dst[39:32] := e4
dst[47:40] := e5
dst[55:48] := e6
dst[63:56] := e7

Instruction: '...'. Intrinsic: '_mm_set_pi8'. Requires MMX.

func SetrPi16

func SetrPi16(e3 int16, e2 int16, e1 int16, e0 int16) (dst x86.M64)

SetrPi16: Set packed 16-bit integers in 'dst' with the supplied values in reverse order.

dst[15:0] := e3
dst[31:16] := e2
dst[47:32] := e1
dst[63:48] := e0

Instruction: '...'. Intrinsic: '_mm_setr_pi16'. Requires MMX.

func SetrPi32

func SetrPi32(e1 int, e0 int) (dst x86.M64)

SetrPi32: Set packed 32-bit integers in 'dst' with the supplied values in reverse order.

dst[31:0] := e1
dst[63:32] := e0

Instruction: '...'. Intrinsic: '_mm_setr_pi32'. Requires MMX.

func SetrPi8

func SetrPi8(e7 byte, e6 byte, e5 byte, e4 byte, e3 byte, e2 byte, e1 byte, e0 byte) (dst x86.M64)

SetrPi8: Set packed 8-bit integers in 'dst' with the supplied values in reverse order.

dst[7:0] := e7
dst[15:8] := e6
dst[23:16] := e5
dst[31:24] := e4
dst[39:32] := e3
dst[47:40] := e2
dst[55:48] := e1
dst[63:56] := e0

Instruction: '...'. Intrinsic: '_mm_setr_pi8'. Requires MMX.

func SetzeroSi64

func SetzeroSi64() (dst x86.M64)

SetzeroSi64: Return vector of type __m64 with all elements set to zero.

dst[MAX:0] := 0

Instruction: 'PXOR'. Intrinsic: '_mm_setzero_si64'. Requires MMX.

func SllPi16

func SllPi16(a x86.M64, count x86.M64) (dst x86.M64)

SllPi16: Shift packed 16-bit integers in 'a' left by 'count' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF count[63:0] > 15
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := ZeroExtend(a[i+15:i] << count[63:0])
	FI
ENDFOR

Instruction: 'PSLLW'. Intrinsic: '_mm_sll_pi16'. Requires MMX.

func SllPi32

func SllPi32(a x86.M64, count x86.M64) (dst x86.M64)

SllPi32: Shift packed 32-bit integers in 'a' left by 'count' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF count[63:0] > 31
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := ZeroExtend(a[i+31:i] << count[63:0])
	FI
ENDFOR

Instruction: 'PSLLD'. Intrinsic: '_mm_sll_pi32'. Requires MMX.

func SllSi64

func SllSi64(a x86.M64, count x86.M64) (dst x86.M64)

SllSi64: Shift 64-bit integer 'a' left by 'count' while shifting in zeros, and store the result in 'dst'.

IF count[63:0] > 63
	dst[63:0] := 0
ELSE
	dst[63:0] := ZeroExtend(a[63:0] << count[63:0])
FI

Instruction: 'PSLLQ'. Intrinsic: '_mm_sll_si64'. Requires MMX.

func SlliPi16

func SlliPi16(a x86.M64, imm8 byte) (dst x86.M64)

SlliPi16: Shift packed 16-bit integers in 'a' left by 'imm8' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF imm8[7:0] > 15
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := ZeroExtend(a[i+15:i] << imm8[7:0])
	FI
ENDFOR

Instruction: 'PSLLW'. Intrinsic: '_mm_slli_pi16'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func SlliPi32

func SlliPi32(a x86.M64, imm8 byte) (dst x86.M64)

SlliPi32: Shift packed 32-bit integers in 'a' left by 'imm8' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF imm8[7:0] > 31
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := ZeroExtend(a[i+31:i] << imm8[7:0])
	FI
ENDFOR

Instruction: 'PSLLD'. Intrinsic: '_mm_slli_pi32'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func SlliSi64

func SlliSi64(a x86.M64, imm8 byte) (dst x86.M64)

SlliSi64: Shift 64-bit integer 'a' left by 'imm8' while shifting in zeros, and store the result in 'dst'.

IF imm8[7:0] > 63
	dst[63:0] := 0
ELSE
	dst[63:0] := ZeroExtend(a[63:0] << imm8[7:0])
FI

Instruction: 'PSLLQ'. Intrinsic: '_mm_slli_si64'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func SraPi16

func SraPi16(a x86.M64, count x86.M64) (dst x86.M64)

SraPi16: Shift packed 16-bit integers in 'a' right by 'count' while shifting in sign bits, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF count[63:0] > 15
		dst[i+15:i] := SignBit
	ELSE
		dst[i+15:i] := SignExtend(a[i+15:i] >> count[63:0])
	FI
ENDFOR

Instruction: 'PSRAW'. Intrinsic: '_mm_sra_pi16'. Requires MMX.

func SraPi32

func SraPi32(a x86.M64, count x86.M64) (dst x86.M64)

SraPi32: Shift packed 32-bit integers in 'a' right by 'count' while shifting in sign bits, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF count[63:0] > 31
		dst[i+31:i] := SignBit
	ELSE
		dst[i+31:i] := SignExtend(a[i+31:i] >> count[63:0])
	FI
ENDFOR

Instruction: 'PSRAD'. Intrinsic: '_mm_sra_pi32'. Requires MMX.

func SraiPi16

func SraiPi16(a x86.M64, imm8 byte) (dst x86.M64)

SraiPi16: Shift packed 16-bit integers in 'a' right by 'imm8' while shifting in sign bits, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF imm8[7:0] > 15
		dst[i+15:i] := SignBit
	ELSE
		dst[i+15:i] := SignExtend(a[i+15:i] >> imm8[7:0])
	FI
ENDFOR

Instruction: 'PSRAW'. Intrinsic: '_mm_srai_pi16'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func SraiPi32

func SraiPi32(a x86.M64, imm8 byte) (dst x86.M64)

SraiPi32: Shift packed 32-bit integers in 'a' right by 'imm8' while shifting in sign bits, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF imm8[7:0] > 31
		dst[i+31:i] := SignBit
	ELSE
		dst[i+31:i] := SignExtend(a[i+31:i] >> imm8[7:0])
	FI
ENDFOR

Instruction: 'PSRAD'. Intrinsic: '_mm_srai_pi32'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func SrlPi16

func SrlPi16(a x86.M64, count x86.M64) (dst x86.M64)

SrlPi16: Shift packed 16-bit integers in 'a' right by 'count' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF count[63:0] > 15
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := ZeroExtend(a[i+15:i] >> count[63:0])
	FI
ENDFOR

Instruction: 'PSRLW'. Intrinsic: '_mm_srl_pi16'. Requires MMX.

func SrlPi32

func SrlPi32(a x86.M64, count x86.M64) (dst x86.M64)

SrlPi32: Shift packed 32-bit integers in 'a' right by 'count' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF count[63:0] > 31
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := ZeroExtend(a[i+31:i] >> count[63:0])
	FI
ENDFOR

Instruction: 'PSRLD'. Intrinsic: '_mm_srl_pi32'. Requires MMX.

func SrlSi64

func SrlSi64(a x86.M64, count x86.M64) (dst x86.M64)

SrlSi64: Shift 64-bit integer 'a' right by 'count' while shifting in zeros, and store the result in 'dst'.

IF count[63:0] > 63
	dst[63:0] := 0
ELSE
	dst[63:0] := ZeroExtend(a[63:0] >> count[63:0])
FI

Instruction: 'PSRLQ'. Intrinsic: '_mm_srl_si64'. Requires MMX.

func SrliPi16

func SrliPi16(a x86.M64, imm8 byte) (dst x86.M64)

SrliPi16: Shift packed 16-bit integers in 'a' right by 'imm8' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	IF imm8[7:0] > 15
		dst[i+15:i] := 0
	ELSE
		dst[i+15:i] := ZeroExtend(a[i+15:i] >> imm8[7:0])
	FI
ENDFOR

Instruction: 'PSRLW'. Intrinsic: '_mm_srli_pi16'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func SrliPi32

func SrliPi32(a x86.M64, imm8 byte) (dst x86.M64)

SrliPi32: Shift packed 32-bit integers in 'a' right by 'imm8' while shifting in zeros, and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	IF imm8[7:0] > 31
		dst[i+31:i] := 0
	ELSE
		dst[i+31:i] := ZeroExtend(a[i+31:i] >> imm8[7:0])
	FI
ENDFOR

Instruction: 'PSRLD'. Intrinsic: '_mm_srli_pi32'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func SrliSi64

func SrliSi64(a x86.M64, imm8 byte) (dst x86.M64)

SrliSi64: Shift 64-bit integer 'a' right by 'imm8' while shifting in zeros, and store the result in 'dst'.

IF imm8[7:0] > 63
	dst[63:0] := 0
ELSE
	dst[63:0] := ZeroExtend(a[63:0] >> imm8[7:0])
FI

Instruction: 'PSRLQ'. Intrinsic: '_mm_srli_si64'. Requires MMX.

FIXME: Requires compiler support (has immediate)

func SubPi16

func SubPi16(a x86.M64, b x86.M64) (dst x86.M64)

SubPi16: Subtract packed 16-bit integers in 'b' from packed 16-bit integers in 'a', and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := a[i+15:i] - b[i+15:i]
ENDFOR

Instruction: 'PSUBW'. Intrinsic: '_mm_sub_pi16'. Requires MMX.

func SubPi32

func SubPi32(a x86.M64, b x86.M64) (dst x86.M64)

SubPi32: Subtract packed 32-bit integers in 'b' from packed 32-bit integers in 'a', and store the results in 'dst'.

FOR j := 0 to 1
	i := j*32
	dst[i+31:i] := a[i+31:i] - b[i+31:i]
ENDFOR

Instruction: 'PSUBD'. Intrinsic: '_mm_sub_pi32'. Requires MMX.

func SubPi8

func SubPi8(a x86.M64, b x86.M64) (dst x86.M64)

SubPi8: Subtract packed 8-bit integers in 'b' from packed 8-bit integers in 'a', and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := a[i+7:i] - b[i+7:i]
ENDFOR

Instruction: 'PSUBB'. Intrinsic: '_mm_sub_pi8'. Requires MMX.

func SubsPi16

func SubsPi16(a x86.M64, b x86.M64) (dst x86.M64)

SubsPi16: Subtract packed 16-bit integers in 'b' from packed 16-bit integers in 'a' using saturation, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := Saturate_To_Int16(a[i+15:i] - b[i+15:i])
ENDFOR

Instruction: 'PSUBSW'. Intrinsic: '_mm_subs_pi16'. Requires MMX.

func SubsPi8

func SubsPi8(a x86.M64, b x86.M64) (dst x86.M64)

SubsPi8: Subtract packed 8-bit integers in 'b' from packed 8-bit integers in 'a' using saturation, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := Saturate_To_Int8(a[i+7:i] - b[i+7:i])
ENDFOR

Instruction: 'PSUBSB'. Intrinsic: '_mm_subs_pi8'. Requires MMX.

func SubsPu16

func SubsPu16(a x86.M64, b x86.M64) (dst x86.M64)

SubsPu16: Subtract packed unsigned 16-bit integers in 'b' from packed unsigned 16-bit integers in 'a' using saturation, and store the results in 'dst'.

FOR j := 0 to 3
	i := j*16
	dst[i+15:i] := Saturate_To_UnsignedInt16(a[i+15:i] - b[i+15:i])
ENDFOR

Instruction: 'PSUBUSW'. Intrinsic: '_mm_subs_pu16'. Requires MMX.

func SubsPu8

func SubsPu8(a x86.M64, b x86.M64) (dst x86.M64)

SubsPu8: Subtract packed unsigned 8-bit integers in 'b' from packed unsigned 8-bit integers in 'a' using saturation, and store the results in 'dst'.

FOR j := 0 to 7
	i := j*8
	dst[i+7:i] := Saturate_To_UnsignedInt8(a[i+7:i] - b[i+7:i])
ENDFOR

Instruction: 'PSUBUSB'. Intrinsic: '_mm_subs_pu8'. Requires MMX.

func ToInt

func ToInt(a x86.M64) int

ToInt: Copy the lower 32-bit integer in 'a' to 'dst'.

dst[31:0] := a[31:0]

Instruction: 'MOVD'. Intrinsic: '_m_to_int'. Requires MMX.

func ToInt64

func ToInt64(a x86.M64) int64

ToInt64: Copy 64-bit integer 'a' to 'dst'.

dst[63:0] := a[63:0]

Instruction: 'MOVQ'. Intrinsic: '_m_to_int64'. Requires MMX.

func UnpackhiPi16

func UnpackhiPi16(a x86.M64, b x86.M64) (dst x86.M64)

UnpackhiPi16: Unpack and interleave 16-bit integers from the high half of 'a' and 'b', and store the results in 'dst'.

INTERLEAVE_HIGH_WORDS(src1[63:0], src2[63:0]){
	dst[15:0] := src1[47:32]
	dst[31:16] := src2[47:32]
	dst[47:32] := src1[63:48]
	dst[63:48] := src2[63:48]
	RETURN dst[63:0]
}

dst[63:0] := INTERLEAVE_HIGH_WORDS(a[63:0], b[63:0])

Instruction: 'PUNPCKLBW'. Intrinsic: '_mm_unpackhi_pi16'. Requires MMX.

func UnpackhiPi32

func UnpackhiPi32(a x86.M64, b x86.M64) (dst x86.M64)

UnpackhiPi32: Unpack and interleave 32-bit integers from the high half of 'a' and 'b', and store the results in 'dst'.

dst[31:0] := a[63:32]
dst[63:32] := b[63:32]

Instruction: 'PUNPCKHDQ'. Intrinsic: '_mm_unpackhi_pi32'. Requires MMX.

func UnpackhiPi8

func UnpackhiPi8(a x86.M64, b x86.M64) (dst x86.M64)

UnpackhiPi8: Unpack and interleave 8-bit integers from the high half of 'a' and 'b', and store the results in 'dst'.

INTERLEAVE_HIGH_BYTES(src1[63:0], src2[63:0]){
	dst[7:0] := src1[39:32]
	dst[15:8] := src2[39:32]
	dst[23:16] := src1[47:40]
	dst[31:24] := src2[47:40]
	dst[39:32] := src1[55:48]
	dst[47:40] := src2[55:48]
	dst[55:48] := src1[63:56]
	dst[63:56] := src2[63:56]
	RETURN dst[63:0]
}

dst[63:0] := INTERLEAVE_HIGH_BYTES(a[63:0], b[63:0])

Instruction: 'PUNPCKHBW'. Intrinsic: '_mm_unpackhi_pi8'. Requires MMX.

func UnpackloPi16

func UnpackloPi16(a x86.M64, b x86.M64) (dst x86.M64)

UnpackloPi16: Unpack and interleave 16-bit integers from the low half of 'a' and 'b', and store the results in 'dst'.

INTERLEAVE_WORDS(src1[63:0], src2[63:0]){
	dst[15:0] := src1[15:0]
	dst[31:16] := src2[15:0]
	dst[47:32] := src1[31:16]
	dst[63:48] := src2[31:16]
	RETURN dst[63:0]
}

dst[63:0] := INTERLEAVE_WORDS(a[63:0], b[63:0])

Instruction: 'PUNPCKLWD'. Intrinsic: '_mm_unpacklo_pi16'. Requires MMX.

func UnpackloPi32

func UnpackloPi32(a x86.M64, b x86.M64) (dst x86.M64)

UnpackloPi32: Unpack and interleave 32-bit integers from the low half of 'a' and 'b', and store the results in 'dst'.

dst[31:0] := a[31:0]
dst[63:32] := b[31:0]

Instruction: 'PUNPCKLDQ'. Intrinsic: '_mm_unpacklo_pi32'. Requires MMX.

func UnpackloPi8

func UnpackloPi8(a x86.M64, b x86.M64) (dst x86.M64)

UnpackloPi8: Unpack and interleave 8-bit integers from the low half of 'a' and 'b', and store the results in 'dst'.

INTERLEAVE_BYTES(src1[63:0], src2[63:0]){
	dst[7:0] := src1[7:0]
	dst[15:8] := src2[7:0]
	dst[23:16] := src1[15:8]
	dst[31:24] := src2[15:8]
	dst[39:32] := src1[23:16]
	dst[47:40] := src2[23:16]
	dst[55:48] := src1[31:24]
	dst[63:56] := src2[31:24]
	RETURN dst[63:0]
}

dst[63:0] := INTERLEAVE_BYTES(a[63:0], b[63:0])

Instruction: 'PUNPCKLBW'. Intrinsic: '_mm_unpacklo_pi8'. Requires MMX.

func XorSi64

func XorSi64(a x86.M64, b x86.M64) (dst x86.M64)

XorSi64: Compute the bitwise XOR of 64 bits (representing integer data) in 'a' and 'b', and store the result in 'dst'.

dst[63:0] := (a[63:0] XOR b[63:0])

Instruction: 'PXOR'. Intrinsic: '_mm_xor_si64'. Requires MMX.

Types

This section is empty.

Source Files

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
t or T : Toggle theme light dark auto
y or Y : Canonical URL