crawl

package module
v0.2.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 22, 2023 License: BSD-3-Clause Imports: 5 Imported by: 18

README

go-mapzen-whosonfirst-crawl

Go tools and libraries for crawling a directory of Who's On First data

Install

You will need to have both Go (specifically version 1.12 or higher) and the make programs installed on your computer. Assuming you do just type:

make tools

All of this package's dependencies are bundled with the code in the vendor directory.

Example

crawl.Crawl
package main

import (
	"flag"
	"fmt"
	"github.com/whosonfirst/go-whosonfirst-crawl"
	"log"
	"os"
	"time"
	"sync/atomic"
)

func main() {

	root := flag.String("root", "", "The root directory you want to crawl")

	flag.Parse()

	var files int64
	var dirs int64
	
	callback := func(path string, info os.FileInfo) error {
		
		if info.IsDir() {
			atomic.AddInt64(&dirs, 1)
			return nil
		}
		
		atomic.AddInt64(&files, 1)			
		return nil
	}
	
	t0 := time.Now()
	
	defer func(){
		t1 := float64(time.Since(t0)) / 1e9
		fmt.Printf("walked %d files (and %d dirs) in %s in %.3f seconds\n", files, dirs, *root, t1)
	}()
	
	c := crawl.NewCrawler(*root)
	err := c.Crawl(callback)

	if err != nil {
		log.Fatal(err)
	}
}
crawl.CrawlWithContext

Please write me

crawl.CrawlWithChannels

Please write me

Tools

wof-count
./bin/wof-count /usr/local/data/sfomuseum-data-flights-2019-*
go build -o bin/wof-count cmd/wof-count/main.go
count files and directories in  /usr/local/data/sfomuseum-data-flights-2019-01
walked 98116 files (and 0 dirs) in 4.203 seconds
count files and directories in  /usr/local/data/sfomuseum-data-flights-2019-02
walked 87989 files (and 0 dirs) in 4.969 seconds
count files and directories in  /usr/local/data/sfomuseum-data-flights-2019-03
walked 102354 files (and 0 dirs) in 6.178 seconds
count files and directories in  /usr/local/data/sfomuseum-data-flights-2019-04
walked 107775 files (and 0 dirs) in 6.377 seconds
count files and directories in  /usr/local/data/sfomuseum-data-flights-2019-05
walked 143200 files (and 0 dirs) in 7.565 seconds
count files and directories in  /usr/local/data/sfomuseum-data-flights-2019-06
walked 124490 files (and 0 dirs) in 6.326 seconds
count files and directories in  /usr/local/data/sfomuseum-data-flights-2019-07
walked 63 files (and 0 dirs) in 0.007 seconds
count files and directories in  /usr/local/data/sfomuseum-data-flights-2019-08
walked 43 files (and 0 dirs) in 0.005 seconds

See also

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func IsCallbackError added in v0.2.0

func IsCallbackError(err error) bool

func IsCrawlError added in v0.2.0

func IsCrawlError(err error) bool

func IsDoneError added in v0.2.0

func IsDoneError(err error) bool

func IsWalkError added in v0.2.0

func IsWalkError(err error) bool

Types

type CallbackError added in v0.2.0

type CallbackError struct {
	Path    string
	Details error
}

func NewCallbackError added in v0.2.0

func NewCallbackError(path string, details error) *CallbackError

func (*CallbackError) Error added in v0.2.0

func (e *CallbackError) Error() string

func (*CallbackError) String added in v0.2.0

func (e *CallbackError) String() string

type CrawlError added in v0.2.0

type CrawlError struct {
	Path    string
	Details error
}

func NewCrawlError added in v0.2.0

func NewCrawlError(path string, details error) *CrawlError

func (*CrawlError) Error added in v0.2.0

func (e *CrawlError) Error() string

func (*CrawlError) String added in v0.2.0

func (e *CrawlError) String() string

type CrawlFunc

type CrawlFunc func(path string, info os.FileInfo) error

type Crawler

type Crawler struct {
	Root             string
	CrawlDirectories bool
}

func NewCrawler

func NewCrawler(path string) *Crawler

func (Crawler) Crawl

func (c Crawler) Crawl(cb CrawlFunc) error

func (Crawler) CrawlWithChannels added in v0.2.0

func (c Crawler) CrawlWithChannels(ctx context.Context, cb CrawlFunc, processing_ch chan *ProcessingRequest, error_ch chan error, done_ch chan bool)

func (Crawler) CrawlWithContext added in v0.2.0

func (c Crawler) CrawlWithContext(ctx context.Context, cb CrawlFunc) error

func (Crawler) CrawlWithContextAndRequestHandler added in v0.2.0

func (c Crawler) CrawlWithContextAndRequestHandler(ctx context.Context, cb CrawlFunc, req_handler RequestHandlerFunc) error

type DoneError added in v0.2.0

type DoneError struct{}

func NewDoneError added in v0.2.0

func NewDoneError() *DoneError

func (*DoneError) Error added in v0.2.0

func (e *DoneError) Error() string

type ProcessingRequest added in v0.2.0

type ProcessingRequest struct {
	Path  string
	Ready chan bool
}

type RequestHandlerFunc added in v0.2.0

type RequestHandlerFunc func(req *ProcessingRequest) bool

type WalkError added in v0.2.0

type WalkError struct {
	Path    string
	Details error
}

func NewWalkError added in v0.2.0

func NewWalkError(path string, details error) *WalkError

func (*WalkError) Error added in v0.2.0

func (e *WalkError) Error() string

func (*WalkError) String added in v0.2.0

func (e *WalkError) String() string

Directories

Path Synopsis
cmd
wof-count command

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL