enhgrid
import "gitlab.com/Grouumf/enhlinktools/enhgrid"
Library that compiles the enhgrid executable
enhgrid performs enhlink on multiple processes for a range of hyperparameter values. enhgrid generates output files for each hyperparameter combination. The following parameters can accept multiple values:
-downsample <int>
-n_boot <int>
-depth <int>
-max_features <int>
-secondOrderMaxFeat <int>
-threshold <float>
-min_matsize <int>
-min_leafsize <int>
-merging_cutoff <int>
-neighborhood <int>
-maxFeatType <string/int/float>
-lambda1 <float>
-lambda2 <float>
-threads <int>
Multiple values can be passed as input using either comma or space: for example -depth 2,3,4 or -depth "2 3 4"
Enhgrid can accept the exact same parameters than Enhlink with additional functionalities:
## Parameters unique to enhgrid:
-randomNTargets <int> which allows to pick, for each grid iteration, N tatgets at random from the index and process them instead of the full list of targets
-repetition <int> Number of repetition to be performed for each iteration (default: 1)
-processes <int> Number of Enhlink processes to be launched in parallel (default: 1)
-splitTargetList Split the list of genes through the n processes
<<<<<<<<<<<<<<<<<<<< WARNING >>>>>>>>>>>>>>>>>>>> As of March 20 2024, Enhlink v0.21.0, we Changed some of Enhgrid's parameters names for clarity and consistency purpose.
Below are the list of changes: (version < 0.21.0) -> (version >= 0.21.0) cluster -> clusters promoter -> gtf genes -> targets gene -> target isGeneExpr -> isExpr rmPeaksInPromoter -> rmPeaksInTargets splitGeneList -> splitTargetList randomNGenes -> randomNTargets onlyPositiveLink -> linkType <<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>
USAGE:
enhgrid -mat <file> -xgi <file> -ygi <file> -gtf <file> -out <path> -tag <string>
-mat2 <file> -xgi2 <file> -ygi2 <file> # IF PASSING A GENE MATRIX FILE
-target <string> # IF FOCUSING ON ONE TARGET
-targets <file> # IF FOCUSING ON A LIST OF TARGETS
-isExpr # IF THE GENE MATRIX IS A EXPRESSION MATRIX
-covariates <file> -xgi_subset <file> -ygi_subset <file> -clusters <file> # OPTIONAL
-downsample <int> -threads <int> -n_boot <int> -depth <int> -max_features <int> # OPTIONAL
-threshold <float> min_matsize <int> -min_leafsize <int> -merging_cutoff <int> # OPTIONAL
-format {coo, mtx, cellRanger} -keep_sparse -maxFeatType <string/int/float> # OPTIONAL
-rmPeaksInTargets -linkType {"all", "positive", "negative"} -secondOrder -ignoreEnhancerWeight # OPTIONAL
-neighborhood <int> -secondOrderMaxFeat <int> -uniformSampling # OPTIONAL
-randomGenes <int> -repetition <int> -processes <int> --splitTargetList # OPTIONAL and specific to enhgrid
please check enhgrid -h and the tutorial and introduction sections for a more precise description of the input parameters
Index
- Variables
- func analyseOneGeneList(enhObj enhlinkobject.EnhlinkObject, processID int, bucket map[string]bool, waiting *sync.WaitGroup, guard chan bool)
- func getGeneBucketsFromGene(geneFile utils.Filename, processes int) ([]map[string]bool, int)
- func getGeneBucketsFromPromoter(plist *enhlinkobject.PromoterList, processes int) ([]map[string]bool, int)
- func launchOneIterThread(isOver bool, count int, attributes enhlinkobject.TreeAttributes, enhMat, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *enhlinkobject.PromoterList, writer *io.WriteCloser, waiting *sync.WaitGroup, mutex *sync.Mutex, guard chan bool)
- func main()
- func mergeBucketResultsFile(outTag string, clusterList []string, nbBuckets int)
- func mergeOneSetOfBucketFiles(outTag, cluster, ext string, nbBuckets int)
- func processNGeneLists(attributes enhlinkobject.TreeAttributes, enhMat, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *enhlinkobject.PromoterList, outTag string)
- func reduce(combinations [][][]int) (res [][]int)
- func splitGenesToBucket(geneMap map[string]uint, processes int) (geneBuckets []map[string]bool, nbGenes int)
- func stringToFloatArray(stringArr, option string) (outArr []float64)
- func stringToIntArray(stringArr, option string) (outArr []int)
- func stringToMaxFeatTypeArray(stringArr, option string) (outArr []enhlinkobject.MaxFeaturesType)
- func testIfRequiredFilesExist()
- type paramArrays
Variables
CLUSTERFILE cluster file
var CLUSTERFILE utils.Filename
DOWNSAMPLEARR Downsample the number of samples to use
var DOWNSAMPLEARR string
GENE gene
var GENE string
IGNOREENHANCERWEIGHT Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified Information Gain
var IGNOREENHANCERWEIGHT bool
INPUTFORMAT iput matrix format
var INPUTFORMAT string
INPUTGENEMAT input matrix name for the gene matrix (input)
var INPUTGENEMAT utils.Filename
INPUTMAT input matrix name (input)
var INPUTMAT utils.Filename
ISGENEEXPR using gene expression for the gene mat
var ISGENEEXPR bool
KEEPSPARSE Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large
var KEEPSPARSE bool
LAMBDA1ARR Lambda parameter of a poisson distribution, that controls the amount of dropouts of the simulated variables
var LAMBDA1ARR string
LAMBDA2ARR Lambda parameter of a poisson distribution, that controls the amount of false positives in the simulated variables
var LAMBDA2ARR string
LINKTYPE Which link to keep {"all", "positive", "negative"}
var LINKTYPE string
MAXFEATURESARR Maximum number of explanatory features per bootstrap model.
var MAXFEATURESARR string
MAXFEATURESTYPEARR max feature type
var MAXFEATURESTYPEARR string
MERGINGCUTOFF merging cutoff for closeby promoters
var MERGINGCUTOFF int
METADATA optional covariate matrix
var METADATA utils.Filename
MINLEAFSIZEARR Min size of leaf
var MINLEAFSIZEARR string
MINMATSIZEARR Min matrix size (int)
var MINMATSIZEARR string
NBBOOTARR Number of boostrap
var NBBOOTARR string
NBPROCESSES Number of Enhlink processes to be launched in parallel
var NBPROCESSES int
NBSIMFEATURESARR Number of simulated features to use
var NBSIMFEATURESARR string
NBTHREADSARR number of internal threads for each enhlink computation
var NBTHREADSARR string
NEIGHBORHOODARR number of internal threads
var NEIGHBORHOODARR string
ONLYSIM only perform simulation
var ONLYSIM bool
OUTDIR output directory
var OUTDIR string
OUTTAG output files tag
var OUTTAG string
PROMOTERFILE promoter file
var PROMOTERFILE utils.Filename
RANDOMNBGENES random subset of genes to analyze
var RANDOMNBGENES int
REPETITION Number of repetition to be performed for each iteration (default: 1)
var REPETITION int
RMPEAKSINPROMOTERS Remove peaks within promoter boundaries
var RMPEAKSINPROMOTERS bool
SECONDORDER compute second order links - covar correlation
var SECONDORDER bool
SECONDORDERMAXFEATURESARR Maximum number of explanatory features per bootstrap model for the second order model.
var SECONDORDERMAXFEATURESARR string
SHOWVERSION show version and quit
var SHOWVERSION bool
SPLITGENELIST Split the gene list through the processes
var SPLITGENELIST bool
THRESHOLDARR Significance level
var THRESHOLDARR string
TREEDEPTHARR Max tree level
var TREEDEPTHARR string
UNIFORMSAMPLING Randomly sample the cells to have an uniform covariate distribution for each bootstrap. Needs a covariate matrix
var UNIFORMSAMPLING bool
XGI row index for input mat
var XGI utils.Filename
XGIGENE row index for input gene mat
var XGIGENE utils.Filename
XGISUBSET row index subset for input mat
var XGISUBSET utils.Filename
YGI column index for input mat
var YGI utils.Filename
YGIGENE column index for input gene mat
var YGIGENE utils.Filename
YGIGENESUBSET column index subset for input gene mat
var YGIGENESUBSET utils.Filename
YGISUBSET column index subset for input mat
var YGISUBSET utils.Filename
func analyseOneGeneList(enhObj enhlinkobject.EnhlinkObject, processID int, bucket map[string]bool, waiting *sync.WaitGroup, guard chan bool)
func getGeneBucketsFromGene(geneFile utils.Filename, processes int) ([]map[string]bool, int)
func getGeneBucketsFromPromoter(plist *enhlinkobject.PromoterList, processes int) ([]map[string]bool, int)
func launchOneIterThread(isOver bool, count int, attributes enhlinkobject.TreeAttributes, enhMat, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *enhlinkobject.PromoterList, writer *io.WriteCloser, waiting *sync.WaitGroup, mutex *sync.Mutex, guard chan bool)
func main()
func mergeBucketResultsFile(outTag string, clusterList []string, nbBuckets int)
func mergeOneSetOfBucketFiles(outTag, cluster, ext string, nbBuckets int)
func processNGeneLists(attributes enhlinkobject.TreeAttributes, enhMat, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *enhlinkobject.PromoterList, outTag string)
func reduce(combinations [][][]int) (res [][]int)
func splitGenesToBucket(geneMap map[string]uint, processes int) (geneBuckets []map[string]bool, nbGenes int)
func stringToFloatArray(stringArr, option string) (outArr []float64)
func stringToIntArray(stringArr, option string) (outArr []int)
func stringToMaxFeatTypeArray(stringArr, option string) (outArr []enhlinkobject.MaxFeaturesType)
func testIfRequiredFilesExist()
type paramArrays struct {
downsample []int
nbBoot []int
depth []int
maxFeatures []int
secondOrderMaxFeatures []int
minMatsize []int
minLeafsize []int
nbThreads []int
neighborhood []int
nbSimFeatures []int
maxFeatType []enhlinkobject.MaxFeaturesType
threshold []float64
lambda1, lambda2 []float64
iterators map[string]int
nbSteps, currentStep int
keys []string
paramCombinations [][]int
}
func (pa *paramArrays) generateAllCombination()
func (*paramArrays) init
func (pa *paramArrays) init()
func (pa *paramArrays) initIterators()
func (*paramArrays) iter
func (pa *paramArrays) iter(attributes enhlinkobject.TreeAttributes, tStart time.Time) (newAttr enhlinkobject.TreeAttributes, isOver bool)
func (pa *paramArrays) returnLastThreadVal() int
Generated by gomarkdoc