matrixprofile

package
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 16, 2019 License: MIT Imports: 9 Imported by: 0

Documentation

Overview

Package matrixprofile computes the matrix profile and matrix profile index of a time series

Example
sig := []float64{0, 0.99, 1, 0, 0, 0.98, 1, 0, 0, 0.96, 1, 0}

mp, err := New(sig, nil, 4)
if err != nil {
	panic(err)
}

if err = mp.Stomp(1); err != nil {
	panic(err)
}

fmt.Printf("Signal:         %.3f\n", sig)
fmt.Printf("Matrix Profile: %.3f\n", mp.MP)
fmt.Printf("Profile Index:  %5d\n", mp.Idx)
Output:

Signal:         [0.000 0.990 1.000 0.000 0.000 0.980 1.000 0.000 0.000 0.960 1.000 0.000]
Matrix Profile: [0.014 0.014 0.029 0.029 0.014 0.014 0.029 0.029 0.029]
Profile Index:  [    4     5     6     7     0     1     2     3     4]
Example (CaseStudy)
sin := siggen.Sin(1, 5, 0, 0, 100, 2)
sin2 := siggen.Sin(0.25, 10, 0, 0.75, 100, 0.25)
saw := siggen.Sawtooth(0.5, 7, 0, 0, 100, 1)
line := siggen.Line(0, 0, len(sin2)*4)
sig := siggen.Append(sin, sin2, sin, line, sin2, line, sin2, line, saw)

noise := siggen.Noise(0.1, len(sig))
sig = siggen.Add(sig, noise)

var m, k int
var r float64
m = 32
k = 6
r = 3
mp, err := New(sig, nil, m)
if err != nil {
	panic(err)
}

if err = mp.Stomp(2); err != nil {
	panic(err)
}

_, _, cac := mp.Segment()

motifs, err := mp.TopKMotifs(k, r)
if err != nil {
	panic(err)
}

discords, err := mp.TopKDiscords(3, mp.M/2)
if err != nil {
	panic(err)
}

sigPts := Points(sig, len(sig))
mpPts := Points(mp.MP, len(sig))
cacPts := Points(cac, len(sig))
motifPts := make([][]plotter.XYs, k)
discordPts := make([]plotter.XYs, k)
discordLabels := make([]string, k)

for i := 0; i < k; i++ {
	motifPts[i] = make([]plotter.XYs, len(motifs[i].Idx))
}

for i := 0; i < k; i++ {
	for j, idx := range motifs[i].Idx {
		motifPts[i][j] = Points(sig[idx:idx+m], m)
	}
}

for i, idx := range discords {
	discordPts[i] = Points(sig[idx:idx+m], m)
	discordLabels[i] = strconv.Itoa(idx)
}

if err = PlotMP(sigPts, mpPts, cacPts, motifPts, discordPts, discordLabels, "../mp_sine.png"); err != nil {
	panic(err)
}

fmt.Println("Saved png file result to mp_sine.png")
Output:

Saved png file result to mp_sine.png
Example (KDimensionalCaseStudy)
sin := siggen.Sin(1, 4, 0, 0, 100, 0.25)
saw := siggen.Sawtooth(1, 4, 0, 0, 100, 0.25)
square := siggen.Square(1, 4, 0, 0, 100, 0.25)
line := siggen.Line(0, 0, len(sin)*4)
line2 := siggen.Line(0, 0, len(sin)*3)
sig := make([][]float64, 3)
sig[0] = siggen.Append(line, line, line, saw, line2, saw, line2)
sig[1] = siggen.Append(line, sin, line2, sin, line2, sin, line2, sin, line2)
sig[2] = siggen.Append(line, square, line2, square, line2, square, line2, square, line2)

noise := siggen.Noise(0.1, len(sig[0]))
sig[0] = siggen.Add(sig[0], noise)

noise = siggen.Noise(0.1, len(sig[0]))
sig[1] = siggen.Add(sig[1], noise)

noise = siggen.Noise(0.1, len(sig[0]))
sig[2] = siggen.Add(sig[2], noise)

var m int
m = 25
mp, err := NewK(sig, m)
if err != nil {
	panic(err)
}

if err = mp.MStomp(); err != nil {
	panic(err)
}

sigPts := make([]plotter.XYs, 3)
sigPts[0] = Points(sig[0], len(sig[0]))
sigPts[1] = Points(sig[1], len(sig[0]))
sigPts[2] = Points(sig[2], len(sig[0]))

mpPts := make([]plotter.XYs, 3)
mpPts[0] = Points(mp.MP[0], len(sig[0]))
mpPts[1] = Points(mp.MP[1], len(sig[0]))
mpPts[2] = Points(mp.MP[2], len(sig[0]))

if err = PlotKMP(sigPts, mpPts, "../mp_kdim.png"); err != nil {
	panic(err)
}

fmt.Println("Saved png file result to mp_kdim.png")
Output:

Saved png file result to mp_kdim.png

Index

Examples

Constants

This section is empty.

Variables

View Source
var (
	// DefaultAV is the default annotation vector of all ones
	DefaultAV = "default"

	// ComplexityAV is the annotation vector that focuses on areas of high "complexity"
	ComplexityAV = "complexity"

	// MeanStdAV is the annotation vector focusing on areas where the signal is within a
	// standard deviation of the mean
	MeanStdAV = "meanstd"

	// ClippingAV is the annotation vector reducing the importance of areas showing clipping
	// effects on the positive and negative regime
	ClippingAV = "clipping"
)

Functions

func MakeClippingAV

func MakeClippingAV(d []float64, m int) []float64

MakeClippingAV creates an annotation vector by setting subsequences with more clipping on the positive or negative side of the signal to lower importance.

func MakeCompexityAV

func MakeCompexityAV(d []float64, m int) []float64

MakeCompexityAV creates an annotation vector that is based on the complexity estimation of the signal.

func MakeDefaultAV added in v0.2.0

func MakeDefaultAV(d []float64, m int) []float64

MakeDefaultAV creates a default annotation vector of all ones resulting in no change to the matrix profile when applied

func MakeMeanStdAV

func MakeMeanStdAV(d []float64, m int) []float64

MakeMeanStdAV creates an annotation vector by setting any values above the mean of the standard deviation vector to 0 and below to 1.

func ZNormalize added in v0.2.0

func ZNormalize(ts []float64) ([]float64, error)

ZNormalize computes a z-normalized version of a slice of floats. This is represented by y[i] = (x[i] - mean(x))/std(x)

Types

type KMatrixProfile

type KMatrixProfile struct {
	MP  [][]float64 // matrix profile
	Idx [][]int     // matrix profile index
	// contains filtered or unexported fields
}

KMatrixProfile is a struct that tracks the current k-dimensional matrix profile computation for a given slice of timeseries of length N and subsequence length of M. The profile and the profile index are stored here.

func NewK

func NewK(t [][]float64, m int) (*KMatrixProfile, error)

NewK creates a matrix profile struct specifically to be used with the k dimensional matrix profile computation. The number of rows represents the number of dimensions, and each row holds a series of points of equal length as each other.

func (*KMatrixProfile) MStomp

func (mp *KMatrixProfile) MStomp() error

MStomp computes the k dimensional matrix profile

type MatrixProfile

type MatrixProfile struct {
	A        []float64    // query time series
	B        []float64    // timeseries to perform full join with
	AMean    []float64    // sliding mean of a with a window of m each
	AStd     []float64    // sliding standard deviation of a with a window of m each
	BMean    []float64    // sliding mean of b with a window of m each
	BStd     []float64    // sliding standard deviation of b with a window of m each
	BF       []complex128 // holds an existing calculation of the FFT of b timeseries
	N        int          // length of the timeseries
	M        int          // length of a subsequence
	SelfJoin bool         // indicates whether a self join is performed with an exclusion zone
	MP       []float64    // matrix profile
	Idx      []int        // matrix profile index
	AV       string       // type of annotation vector which defaults to all ones
}

MatrixProfile is a struct that tracks the current matrix profile computation for a given timeseries of length N and subsequence length of M. The profile and the profile index are stored here.

func New

func New(a, b []float64, m int) (*MatrixProfile, error)

New creates a matrix profile struct with a given timeseries length n and subsequence length of m. The first slice, a, is used as the initial timeseries to join with the second, b. If b is nil, then the matrix profile assumes a self join on the first timeseries.

func (MatrixProfile) ApplyAV

func (mp MatrixProfile) ApplyAV(av []float64) ([]float64, error)

ApplyAV applies an annotation vector to the current matrix profile. Annotation vector values must be between 0 and 1.

func (MatrixProfile) GetAV added in v0.2.0

func (mp MatrixProfile) GetAV() ([]float64, error)

GetAV returns the annotation vector given the matrix profile configured AV field

func (MatrixProfile) Segment

func (mp MatrixProfile) Segment() (int, float64, []float64)

Segment finds the the index where there may be a potential timeseries change. Returns the index of the potential change, value of the corrected arc curve score and the histogram of all the crossings for each index in the matrix profile index. This approach is based on the UCR paper on segmentation of timeseries using matrix profiles which can be found https://www.cs.ucr.edu/%7Eeamonn/Segmentation_ICDM.pdf

Example
// generate a signal mainly composed of sine waves and switches
// frequencies, amplitude, and offset midway through

// amplitude of 1, frequency of 5Hz, sampling frequency of 100 Hz,
// time of 2 seconds
sin := siggen.Sin(1, 5, 0, 0, 100, 2)

// amplitude of 0.25, frequency of 10Hz, offset of 0.75, sampling
// frequency of 100 Hz, time of 1 second
sin2 := siggen.Sin(0.25, 10, 0, 0.75, 100, 1)
sig := siggen.Append(sin, sin2)

// noise with an amplitude of 0.1
noise := siggen.Noise(0.01, len(sig))
sig = siggen.Add(sig, noise)

// create a new MatrixProfile struct using the signal and a
// subsequence length of 32. The second subsequence is set to nil
// so we perform a self join.
mp, err := New(sig, nil, 32)
if err != nil {
	panic(err)
}

// run the STMP algorithm with self join. The matrix profile
// will be stored in mp.MP and the matrix profile index will
// be stored in mp.Idx
if err = mp.Stmp(); err != nil {
	panic(err)
}

// segment the timeseries using the number of arc crossings over
// each index in the matrix profile index
idx, cac, _ := mp.Segment()
fmt.Printf("Signal change foud at index: %d\n", idx)
fmt.Printf("Corrected Arc Curve (CAC) value: %.3f\n", cac)
Output:

Signal change foud at index: 194
Corrected Arc Curve (CAC) value: 0.000

func (*MatrixProfile) Stamp

func (mp *MatrixProfile) Stamp(sample float64, parallelism int) error

Stamp uses random ordering to compute the matrix profile. User can specify the sample to be anything between 0 and 1 so that the computation early terminates and provides the current computed matrix profile. 1 represents the exact matrix profile. This should compute far faster at the cost of an approximation of the matrix profile. Stores the matrix profile and matrix profile index in the struct.

Example
// generate a signal mainly composed of sine waves and switches
// frequencies, amplitude, and offset midway through

// amplitude of 1, frequency of 5Hz, sampling frequency of 100 Hz,
// time of 2 seconds
sin := siggen.Sin(1, 5, 0, 0, 100, 2)

// amplitude of 0.25, frequency of 10Hz, offset of 0.75, sampling
// frequency of 100 Hz, time of 1 second
sin2 := siggen.Sin(0.25, 10, 0, 0.75, 100, 1)
sig := siggen.Append(sin, sin2)

// noise with an amplitude of 0.1
noise := siggen.Noise(0.1, len(sig))
sig = siggen.Add(sig, noise)

// create a new MatrixProfile struct using the signal and a
// subsequence length of 32. The second subsequence is set to nil
// so we perform a self join.
mp, err := New(sig, nil, 32)
if err != nil {
	panic(err)
}

// run the STAMP algorithm with self join and a sample of 0.2 of
// all subsequences. The matrix profile will be stored in mp.MP
// and the matrix profile index will be stored in mp.Idx
if err = mp.Stamp(0.2, 2); err != nil {
	panic(err)
}
Output:

func (*MatrixProfile) StampUpdate

func (mp *MatrixProfile) StampUpdate(newValues []float64) error

StampUpdate updates a matrix profile and matrix profile index in place providing streaming like behavior.

func (*MatrixProfile) Stmp

func (mp *MatrixProfile) Stmp() error

Stmp computes the full matrix profile given two time series as inputs. If the second time series is set to nil then a self join on the first will be performed. Stores the matrix profile and matrix profile index in the struct.

Example
// generate a signal mainly composed of sine waves and switches
// frequencies, amplitude, and offset midway through

// amplitude of 1, frequency of 5Hz, sampling frequency of 100 Hz,
// time of 2 seconds
sin := siggen.Sin(1, 5, 0, 0, 100, 2)

// amplitude of 0.25, frequency of 10Hz, offset of 0.75, sampling
// frequency of 100 Hz, time of 1 second
sin2 := siggen.Sin(0.25, 10, 0, 0.75, 100, 1)
sig := siggen.Append(sin, sin2)

// noise with an amplitude of 0.1
noise := siggen.Noise(0.1, len(sig))
sig = siggen.Add(sig, noise)

// create a new MatrixProfile struct using the signal and a
// subsequence length of 32. The second subsequence is set to nil
// so we perform a self join.
mp, err := New(sig, nil, 32)
if err != nil {
	panic(err)
}

// run the STMP algorithm with self join. The matrix profile
// will be stored in mp.MP and the matrix profile index will
// be stored in mp.Idx
if err = mp.Stmp(); err != nil {
	panic(err)
}
Output:

func (*MatrixProfile) Stomp

func (mp *MatrixProfile) Stomp(parallelism int) error

Stomp is an optimization on the STAMP approach reducing the runtime from O(n^2logn) down to O(n^2). This is an ordered approach, since the sliding dot product or cross correlation can be easily updated for the next sliding window, if the previous window dot product is available. This should also greatly reduce the number of memory allocations needed to compute an arbitrary timeseries length.

Example
// generate a signal mainly composed of sine waves and switches
// frequencies, amplitude, and offset midway through

// amplitude of 1, frequency of 5Hz, sampling frequency of 100 Hz,
// time of 2 seconds
sin := siggen.Sin(1, 5, 0, 0, 100, 2)

// amplitude of 0.25, frequency of 10Hz, offset of 0.75, sampling
// frequency of 100 Hz, time of 1 second
sin2 := siggen.Sin(0.25, 10, 0, 0.75, 100, 1)
sig := siggen.Append(sin, sin2)

// noise with an amplitude of 0.1
noise := siggen.Noise(0.1, len(sig))
sig = siggen.Add(sig, noise)

// create a new MatrixProfile struct using the signal and a
// subsequence length of 32. The second subsequence is set to nil
// so we perform a self join.
mp, err := New(sig, nil, 32)
if err != nil {
	panic(err)
}

// run the STOMP algorithm with self join. The matrix profile
// will be stored in mp.MP and the matrix profile index will
// be stored in mp.Idx
if err = mp.Stomp(1); err != nil {
	panic(err)
}
Output:

func (MatrixProfile) TopKDiscords

func (mp MatrixProfile) TopKDiscords(k int, exclusionZone int) ([]int, error)

TopKDiscords finds the top k time series discords starting indexes from a computed matrix profile. Each discovery of a discord will apply an exclusion zone around the found index so that new discords can be discovered.

func (MatrixProfile) TopKMotifs

func (mp MatrixProfile) TopKMotifs(k int, r float64) ([]MotifGroup, error)

TopKMotifs will iteratively go through the matrix profile to find the top k motifs with a given radius. Only applies to self joins.

Example
// generate a signal mainly composed of sine waves and switches
// frequencies, amplitude, and offset midway through

// amplitude of 1, frequency of 5Hz, sampling frequency of 100 Hz,
// time of 2 seconds
sin := siggen.Sin(1, 5, 0, 0, 100, 2)

// amplitude of 0.25, frequency of 10Hz, offset of 0.75, sampling
// frequency of 100 Hz, time of 1 second
sin2 := siggen.Sin(0.25, 10, 0, 0.75, 100, 1)
sig := siggen.Append(sin, sin2)

// create a new MatrixProfile struct using the signal and a
// subsequence length of 32. The second subsequence is set to nil
// so we perform a self join.
mp, err := New(sig, nil, 32)
if err != nil {
	panic(err)
}

// run the STMP algorithm with self join. The matrix profile
// will be stored in mp.MP and the matrix profile index will
// be stored in mp.Idx
if err = mp.Stmp(); err != nil {
	panic(err)
}

// finds the top 3 motifs in the signal. Motif groups include
// all subsequences that are within 2 times the distance of the
// original motif pair
motifs, err := mp.TopKMotifs(2, 2)
if err != nil {
	panic(err)
}

for i, mg := range motifs {
	fmt.Printf("Motif Group %d\n", i)
	fmt.Printf("  %d motifs\n", len(mg.Idx))
}
Output:

Motif Group 0
  2 motifs
Motif Group 1
  2 motifs

type MotifGroup

type MotifGroup struct {
	Idx     []int
	MinDist float64
}

MotifGroup stores a list of indices representing a similar motif along with the minimum distance that this set of motif composes of.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL