core

package
v0.0.0-...-d9bb2be Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 7, 2021 License: MIT Imports: 30 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CLIName = "gocrawler"
	AUTHOR  = "@zerokeeper"
	VERSION = "v1.0.0"
)
View Source
const SUBRE = `(?i)(([a-zA-Z0-9]{1}|[_a-zA-Z0-9]{1}[_a-zA-Z0-9-]{0,61}[a-zA-Z0-9]{1})[.]{1})+`

Variables

View Source
var AWSS3 = regexp.MustCompile(`(?i)[a-z0-9.-]+\.s3\.amazonaws\.com|[a-z0-9.-]+\.s3-[a-z0-9-]\.amazonaws\.com|[a-z0-9.-]+\.s3-website[.-](eu|ap|us|ca|sa|cn)|//s3\.amazonaws\.com/[a-z0-9._-]+|//s3-[a-z0-9-]+\.amazonaws\.com/[a-z0-9._-]+`)
View Source
var DefaultHTTPTransport = &http.Transport{
	DialContext: (&net.Dialer{
		Timeout: 10 * time.Second,

		KeepAlive: 30 * time.Second,
	}).DialContext,
	MaxIdleConns:    100,
	MaxConnsPerHost: 1000,
	IdleConnTimeout: 30 * time.Second,

	TLSClientConfig: &tls.Config{InsecureSkipVerify: true, Renegotiation: tls.RenegotiateOnceAsClient},
}
View Source
var Logger *logrus.Logger

Functions

func BodyToString

func BodyToString(b io.Reader) string

func CleanSubdomain

func CleanSubdomain(s string) string

func DecodeChars

func DecodeChars(s string) string

func FilterNewLines

func FilterNewLines(s string) string

func FixUrl

func FixUrl(mainSite *url.URL, nextLoc string) string

func GetAWSS3

func GetAWSS3(source string) []string

func GetDomain

func GetDomain(site *url.URL) string

func GetExtType

func GetExtType(rawUrl string) string

func GetRawCookie

func GetRawCookie(cookies []*http.Cookie) string

func GetSubdomains

func GetSubdomains(source, domain string) []string

func InScope

func InScope(u *url.URL, regexps []*regexp.Regexp) bool

func LinkFinder

func LinkFinder(source string) ([]string, error)

func LoadCookies

func LoadCookies(rawCookie string) []*http.Cookie

func NormalizePath

func NormalizePath(path string) string

NormalizePath the path

func OtherSources

func OtherSources(domain string, includeSubs bool) []string

func ParseRobots

func ParseRobots(site *url.URL, crawler *Crawler, c *colly.Collector, wg *sync.WaitGroup)

func ParseSiteMap

func ParseSiteMap(site *url.URL, crawler *Crawler, c *colly.Collector, wg *sync.WaitGroup)

func ReadingLines

func ReadingLines(filename string) []string

ReadingLines Reading file and return content as []string

func Unique

func Unique(intSlice []string) []string

Types

type Crawler

type Crawler struct {
	C                   *colly.Collector
	LinkFinderCollector *colly.Collector
	Output              *Output

	Input      string
	Quiet      bool
	JsonOutput bool
	// contains filtered or unexported fields
}

func NewCrawler

func NewCrawler(site *url.URL, cmd *cobra.Command) *Crawler

func (*Crawler) Start

func (crawler *Crawler) Start(linkfinder bool)

type Output

type Output struct {
	// contains filtered or unexported fields
}

func NewOutput

func NewOutput(folder, filename string) *Output

func (*Output) Close

func (o *Output) Close()

func (*Output) WriteToFile

func (o *Output) WriteToFile(msg string)

type SpiderOutput

type SpiderOutput struct {
	//Input      string `json:"input"`
	Source string `json:"source"`
	//OutputType string `json:"type"`
	Url        string            `json:"output"`
	Method     string            `json:"method"`
	Header     map[string]string `json:"header"`
	Data       string            `json:"data"`
	StatusCode int               `json:"status"`
	Length     int               `json:"length"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL