cmd

package

v0.13.0 Latest Latest Go to latest Published: Oct 23, 2020 License: MIT Imports: 41 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/shenwei356/unikmer

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func CalcSignatureSize(numElements uint64, numHashes int, falsePositiveRate float64) uint64
func Execute()
func MergeUnikIndex(opt *Options, prefix string, files []string, outFile string) error
func ParseByteSize(val string) (int, error)
type Options
type UnikFileInfo
- func (i UnikFileInfo) String() string
type UnikFileInfos
- func (l UnikFileInfos) Len() int
- func (l UnikFileInfos) Less(i int, j int) bool
- func (l UnikFileInfos) Swap(i int, j int)
type UnikIndex
- func NewUnixIndex(file string, useMmap bool) (*UnikIndex, error)
- func (idx *UnikIndex) Close() error
- func (idx *UnikIndex) Search(hashes [][]uint64, queryCov float64, targetCov float64) map[string][3]float64
- func (idx *UnikIndex) String() string
type UnikIndexDB
- func NewUnikIndexDB(path string, useMmap bool) (*UnikIndexDB, error)
- func (db *UnikIndexDB) Close() error
- func (db *UnikIndexDB) Search(kmers []uint64, threads int, queryCov float64, targetCov float64) map[string][3]float64
- func (db *UnikIndexDB) SearchMap(kmers map[uint64]interface{}, threads int, queryCov float64, targetCov float64) map[string][3]float64
- func (db *UnikIndexDB) String() string
type UnikIndexDBInfo
- func NewUnikIndexDBInfo(files []string) UnikIndexDBInfo
- func UnikIndexDBInfoFromFile(file string) (UnikIndexDBInfo, error)
- func (i UnikIndexDBInfo) Check() error
- func (i UnikIndexDBInfo) String() string
- func (i UnikIndexDBInfo) WriteTo(file string) error

Constants ¶

View Source

const PosPopCountBufSize = 128

PosPopCountBufSize defines the buffer size of byte slice feeding to pospopcount (github.com/clausecker/pospop).

Theoretically, size >240 is better, but in this scenario, we need firstly transposing the signature matrix, which is the performance bottleneck. Column size of the matrix is fixed, therefore we must control the row size to balance time of matrix transposing and popopcount.

128 is the best value for my machine (AMD ryzen 2700X).

View Source

const UnikIndexDBVersion uint8 = 2

Variables ¶

View Source

var BufferSize = 65536 //os.Getpagesize()

BufferSize is size of buffer

View Source

var ErrVersionMismatch = errors.New("unikmer/index-db: version mismatch")

View Source

var RootCmd = &cobra.Command{
	Use:   "unikmer",
	Short: "Unique-Kmer Toolkit",
	Long: fmt.Sprintf(`unikmer - Unique-Kmer Toolkit

unikmer is a toolkit for nucleic acid k-mer analysis, providing functions
including set operation, indexing, and searching on k-mers optional with
TaxIDs but without count information.

K-mers are either encoded (k<=32) or hashed (arbitrary k) into 'uint64',
and serialized in binary file with extension '.unik'.

TaxIDs can be assigned when counting k-mers from genome sequences,
and LCA (Lowest Common Ancestor) is computed during set opertions
including computing union, intersecton, set difference, unique and
repeated k-mers.

Version: v%s

Author: Wei Shen <shenwei356@gmail.com>

Documents  : https://shenwei356.github.io/unikmer
Source code: https://github.com/shenwei356/unikmer

Dataset (optional):

  Manipulating k-mers with TaxIDs needs taxonomy file from e.g., 
  NCBI Taxonomy database, please extract "nodes.dmp", "names.dmp",
  "delnodes.dmp" and "merged.dmp" from link below into ~/.unikmer/ ,
  ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz , 
  or some other directory, and later you can refer to using flag
  --data-dir or environment variable UNIKMER_DB.

  For GTDB, use https://github.com/nick-youngblut/gtdb_to_taxdump 
  for taxonomy convertion.

  Note that TaxIDs are represented using uint32 and stored in 4 or
  less bytes, all TaxIDs should be in range of [1, %d]

`, VERSION, maxUint32),
}

RootCmd represents the base command when called without any subcommands

View Source

var VERSION = "0.12.1"

VERSION is the version

Functions ¶

func CalcSignatureSize ¶ added in v0.12.0

func CalcSignatureSize(numElements uint64, numHashes int, falsePositiveRate float64) uint64

From https://github.com/bingmann/cobs/blob/master/cobs/util/calc_signature_size.cpp

func Execute ¶

func Execute()

Execute adds all child commands to the root command sets flags appropriately. This is called by main.main(). It only needs to happen once to the rootCmd.

func MergeUnikIndex ¶ added in v0.12.0

func MergeUnikIndex(opt *Options, prefix string, files []string, outFile string) error

func ParseByteSize ¶ added in v0.7.0

func ParseByteSize(val string) (int, error)

ParseByteSize parses byte size from string.

Types ¶

type Options ¶

type Options struct {
	NumCPUs          int
	Verbose          bool
	Compress         bool
	Compact          bool
	CompressionLevel int
	MaxTaxid         uint32
	IgnoreTaxid      bool
	DataDir          string
	NodesFile        string
	CacheLCA         bool

	NoCheckFile bool
}

Options contains the global flags

type UnikFileInfo ¶ added in v0.12.0

type UnikFileInfo struct {
	Path  string
	Name  string
	Kmers int64
}

func (UnikFileInfo) String ¶ added in v0.12.0

func (i UnikFileInfo) String() string

type UnikFileInfos ¶ added in v0.12.0

type UnikFileInfos []UnikFileInfo

func (UnikFileInfos) Len ¶ added in v0.12.0

func (l UnikFileInfos) Len() int

func (UnikFileInfos) Less ¶ added in v0.12.0

func (l UnikFileInfos) Less(i int, j int) bool

func (UnikFileInfos) Swap ¶ added in v0.12.0

func (l UnikFileInfos) Swap(i int, j int)

type UnikIndex ¶ added in v0.12.0

type UnikIndex struct {
	Path   string
	Header index.Header
	// contains filtered or unexported fields
}

func NewUnixIndex ¶ added in v0.12.0

func NewUnixIndex(file string, useMmap bool) (*UnikIndex, error)

func (*UnikIndex) Close ¶ added in v0.12.0

func (idx *UnikIndex) Close() error

func (*UnikIndex) Search ¶ added in v0.12.0

func (idx *UnikIndex) Search(hashes [][]uint64, queryCov float64, targetCov float64) map[string][3]float64

func (*UnikIndex) String ¶ added in v0.12.0

func (idx *UnikIndex) String() string

type UnikIndexDB ¶ added in v0.12.0

type UnikIndexDB struct {
	Info   UnikIndexDBInfo
	Header index.Header

	Indices []*UnikIndex
	// contains filtered or unexported fields
}

func NewUnikIndexDB ¶ added in v0.12.0

func NewUnikIndexDB(path string, useMmap bool) (*UnikIndexDB, error)

func (*UnikIndexDB) Close ¶ added in v0.12.0

func (db *UnikIndexDB) Close() error

func (*UnikIndexDB) Search ¶ added in v0.12.0

func (db *UnikIndexDB) Search(kmers []uint64, threads int, queryCov float64, targetCov float64) map[string][3]float64

func (*UnikIndexDB) SearchMap ¶ added in v0.12.0

func (db *UnikIndexDB) SearchMap(kmers map[uint64]interface{}, threads int, queryCov float64, targetCov float64) map[string][3]float64

func (*UnikIndexDB) String ¶ added in v0.12.0

func (db *UnikIndexDB) String() string

type UnikIndexDBInfo ¶ added in v0.12.0

type UnikIndexDBInfo struct {
	Version      uint8    `yaml:"version"`
	IndexVersion uint8    `yaml:"unikiVersion"`
	K            int      `yaml:"k"`
	Hashed       bool     `yaml:"hashed"`
	Canonical    bool     `yaml:"canonical"`
	NumHashes    int      `yaml:"hashes"`
	FPR          float64  `yaml:"fpr"`
	BlockSize    int      `yaml:"blocksize"`
	Kmers        int      `yaml:"totalKmers"`
	Files        []string `yaml:"files"`
	NumNames     int      `yaml:"numNames"`
	Names        []string `yaml:"names"`
	Sizes        []uint64 `yaml:"kmers"`
	// contains filtered or unexported fields
}

func NewUnikIndexDBInfo ¶ added in v0.12.0

func NewUnikIndexDBInfo(files []string) UnikIndexDBInfo

func UnikIndexDBInfoFromFile ¶ added in v0.12.0

func UnikIndexDBInfoFromFile(file string) (UnikIndexDBInfo, error)

func (UnikIndexDBInfo) Check ¶ added in v0.12.0

func (i UnikIndexDBInfo) Check() error

func (UnikIndexDBInfo) String ¶ added in v0.12.0

func (i UnikIndexDBInfo) String() string

func (UnikIndexDBInfo) WriteTo ¶ added in v0.12.0

func (i UnikIndexDBInfo) WriteTo(file string) error

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL