dirscanner

package module
v0.0.0-...-7b842ff Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 6, 2019 License: Apache-2.0 Imports: 6 Imported by: 2

README

dirscanner

dirscanner is a recursive file lister which uses channels for go.

Why?

When there's 1000000+ files in multiple directories crawling can take minutes. With a dirscanner channel you can start parsing files more quickly.

Features

  • You can provide a filter function to the scanner which validates what files will be sent for processing. For example: get only files that are between 1-10 MiB.

Example usage:

package main

import (
	"log"
	"time"
	"github.com/raspi/dirscanner"
	"runtime"
	"os"
	"sort"
)

// Example custom file validator
func validateFile(info os.FileInfo) bool {
	return info.Mode().IsRegular()
}

func main() {
	var err error

	workerCount := runtime.NumCPU()
	//workerCount := 1

	s := dirscanner.New()

	err = s.Init(workerCount, validateFile)
	if err != nil {
		panic(err)
	}
	defer s.Close()

	err = s.ScanDirectory(`/home/raspi`)
	if err != nil {
		panic(err)
	}

	lastDir := ``
	lastFile := ``
	fileCount := uint64(0)

	// Ticker for stats
	ticker := time.NewTicker(time.Second * 1)
	defer ticker.Stop()
	now := time.Now()

	sortedBySize := map[uint64][]string{}

scanloop:
	for {
		select {

		case <-s.Finished: // Finished getting file list
			log.Printf(`got all files`)
			break scanloop

		case e, ok := <-s.Errors: // Error happened, handle, discard or abort
			if ok {
				log.Printf(`got error: %v`, e)
				//s.Aborted <- true // Abort
			}


		case info, ok := <-s.Information: // Got information where worker is currently
			if ok {
				lastDir = info.Directory
			}


		case <-ticker.C: // Display some progress stats
			log.Printf(`%v Files scanned: %v Last file: %#v Dir: %#v`, time.Since(now).Truncate(time.Second), fileCount, lastFile, lastDir)

		case res, ok := <-s.Results:
			if ok {
				// Process file:
				lastFile = res.Path
				fileCount++
				sortedBySize[res.Size] = append(sortedBySize[res.Size], res.Path)
				//time.Sleep(time.Millisecond * 100)
			}
		}
	}

	// Sort
	var keys []uint64

	for k, _ := range sortedBySize {
		keys = append(keys, k)
	}

	sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })

	// Print in size order
	for _, k := range keys {
		log.Printf(`S:%v C:%v %v`, k, len(sortedBySize[k]), sortedBySize[k])
	}

	log.Printf(`last: %v`, lastFile)
	log.Printf(`count: %v`, fileCount)

}

Installation

To install this package, simply go get it:

go get -u github.com/raspi/dirscanner

Dependencies

There are no 3rd party package dependencies.

Projects using this library

Documentation

Index

Constants

View Source
const DIRECTORY_QUEUE_SIZE = 65536

How many directories to keep in queue

Variables

This section is empty.

Functions

This section is empty.

Types

type DirectoryScanner

type DirectoryScanner struct {
	Results     chan FileInformation // Results
	Finished    chan bool            // Scanner has finished?
	Aborted     chan bool            // Scanner has aborted?
	Information chan workerInfo      // Information about scan progress
	Errors      chan error           // Errors that happened during scanning

	FileValidatorFunc FileValidatorFunction // Function for file validation
	// contains filtered or unexported fields
}

Always use New() to get proper scanner

func New

func New() DirectoryScanner

Create new directory scanner

func (*DirectoryScanner) Close

func (s *DirectoryScanner) Close() (err error)

Close channels

func (*DirectoryScanner) Init

func (s *DirectoryScanner) Init(workerCount int, fileValidatorFunc FileValidatorFunction) (err error)

Initialize workers

func (*DirectoryScanner) ScanDirectory

func (s *DirectoryScanner) ScanDirectory(dir string) (err error)

ScanDirectory scans given directory and send results (file paths) to a channel

type FileInformation

type FileInformation struct {
	Path       string // Path to file
	Size       uint64 // File size
	Identifier uint64 // Identifier (inode)
	Mode       os.FileMode
}

Information about a file

type FileValidatorFunction

type FileValidatorFunction func(info FileInformation) bool

File validator signature

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL