twitterscraper

package module
v0.0.0-...-ae86dca Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 29, 2022 License: MIT Imports: 16 Imported by: 0

README

Twitter Scraper

Go Reference

Twitter's API is annoying to work with, and has lots of limitations — luckily their frontend (JavaScript) has it's own API, which I reverse-engineered. No API rate limits. No tokens needed. No restrictions. Extremely fast.

You can use this library to get the text of any user's Tweets trivially.

Installation

go get -u github.com/n0madic/twitter-scraper

Usage

Get user tweets
package main

import (
    "context"
    "fmt"
    twitterscraper "github.com/n0madic/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()

    for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
        if tweet.Error != nil {
            panic(tweet.Error)
        }
        fmt.Println(tweet.Text)
    }
}

It appears you can ask for up to 50 tweets (limit ~3200 tweets).

Get single tweet
package main

import (
    "fmt"

    twitterscraper "github.com/n0madic/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    tweet, err := scraper.GetTweet("1328684389388185600")
    if err != nil {
        panic(err)
    }
    fmt.Println(tweet.Text)
}
Search tweets by query standard operators

Tweets containing “twitter” and “scraper” and “data“, filtering out retweets:

package main

import (
    "context"
    "fmt"
    twitterscraper "github.com/n0madic/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    for tweet := range scraper.SearchTweets(context.Background(),
        "twitter scraper data -filter:retweets", 50) {
        if tweet.Error != nil {
            panic(tweet.Error)
        }
        fmt.Println(tweet.Text)
    }
}

The search ends if we have 50 tweets.

See Rules and filtering for build standard queries.

Set search mode
scraper.SetSearchMode(twitterscraper.SearchLatest)

Options:

  • twitterscraper.SearchTop - default mode
  • twitterscraper.SearchLatest - live mode
  • twitterscraper.SearchPhotos - image mode
  • twitterscraper.SearchVideos - video mode
  • twitterscraper.SearchUsers - user mode
Get profile
package main

import (
    "fmt"
    twitterscraper "github.com/n0madic/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    profile, err := scraper.GetProfile("Twitter")
    if err != nil {
        panic(err)
    }
    fmt.Printf("%+v\n", profile)
}
Search profiles by query
package main

import (
    "context"
    "fmt"
    twitterscraper "github.com/n0madic/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers)
    for profile := range scraper.SearchProfiles(context.Background(), "Twitter", 50) {
        if profile.Error != nil {
            panic(profile.Error)
        }
        fmt.Println(profile.Name)
    }
}
package main

import (
    "fmt"
    twitterscraper "github.com/n0madic/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    trends, err := scraper.GetTrends()
    if err != nil {
        panic(err)
    }
    fmt.Println(trends)
}

Some specified user tweets are protected that you must login and follow. Cookie and xCsrfToken is optional.

scraper.WithCookie("twitter cookie after login")
scraper.WithXCsrfToken("twitter X-Csrf-Token after login")
Use Proxy

Support HTTP(s) and SOCKS5 proxy

with HTTP
err := scraper.SetProxy("http://localhost:3128")
if err != nil {
    panic(err)
}
with SOCKS5
err := scraper.SetProxy("socks5://localhost:1080")
if err != nil {
    panic(err)
}
Delay requests

Add delay between API requests (in seconds)

scraper.WithDelay(5)
Load timeline with tweet replies
scraper.WithReplies(true)

Documentation

Index

Constants

View Source
const DefaultClientTimeout = 10 * time.Second

default http client timeout

Variables

This section is empty.

Functions

func GetTrends deprecated

func GetTrends() ([]string, error)

Deprecated: GetTrends wrapper for default Scraper

func GetTweets deprecated

func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult

Deprecated: GetTweets wrapper for default Scraper

func SearchProfiles deprecated

func SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult

Deprecated: SearchProfiles wrapper for default Scraper

func SearchTweets deprecated

func SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult

Deprecated: SearchTweets wrapper for default Scraper

func SetProxy deprecated

func SetProxy(proxy string) error

Deprecated: SetProxy wrapper for default Scraper

Types

type Media

type Media interface{}

Media type

type MediaPhoto

type MediaPhoto struct {
	Url string
	Alt string
}

MediaPhoto type

type MediaVideo

type MediaVideo struct {
	IsAnimatedGif bool
	Preview       string
	Url           string
	Alt           string
}

MediaVideo type

type Place

type Place struct {
	ID          string `json:"id"`
	PlaceType   string `json:"place_type"`
	Name        string `json:"name"`
	FullName    string `json:"full_name"`
	CountryCode string `json:"country_code"`
	Country     string `json:"country"`
	BoundingBox struct {
		Type        string        `json:"type"`
		Coordinates [][][]float64 `json:"coordinates"`
	} `json:"bounding_box"`
}

type Profile

type Profile struct {
	Avatar         string
	Banner         string
	Biography      string
	Birthday       string
	FollowersCount int
	FollowingCount int
	FriendsCount   int
	IsFollowing    bool
	IsPrivate      bool
	IsVerified     bool
	Joined         *time.Time
	LikesCount     int
	ListedCount    int
	Location       string
	Name           string
	PinnedTweetIDs []string
	TweetsCount    int
	URL            string
	UserID         string
	Username       string
	Website        string
}

Profile of twitter user.

func GetProfile deprecated

func GetProfile(username string) (Profile, error)

Deprecated: GetProfile wrapper for default scraper

type ProfileResult

type ProfileResult struct {
	Profile
	Error error
}

ProfileResult of scrapping.

type Scraper

type Scraper struct {
	// contains filtered or unexported fields
}

Scraper object

func New

func New() *Scraper

New creates a Scraper object

func SetSearchMode deprecated

func SetSearchMode(mode SearchMode) *Scraper

Deprecated: SetSearchMode wrapper for default Scraper

func WithDelay deprecated

func WithDelay(seconds int64) *Scraper

Deprecated: WithDelay wrapper for default Scraper

func WithReplies deprecated

func WithReplies(b bool) *Scraper

Deprecated: WithReplies wrapper for default Scraper

func (*Scraper) FetchHomeLatestTimeline

func (s *Scraper) FetchHomeLatestTimeline(_ string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchHomeLatestTimeline get tweets from home timeline.

func (*Scraper) FetchHomeTimeline

func (s *Scraper) FetchHomeTimeline(_ string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchHomeTimeline get tweets from home timeline.

func (*Scraper) FetchSearchProfiles

func (s *Scraper) FetchSearchProfiles(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error)

FetchSearchProfiles gets users for a given search query, via the Twitter frontend API

func (*Scraper) FetchSearchTweets

func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API

func (*Scraper) FetchTweets

func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchTweets gets tweets for a given user, via the Twitter frontend API.

func (*Scraper) Follow

func (s *Scraper) Follow(user string) (*friendships, error)

func (*Scraper) GetGuestToken

func (s *Scraper) GetGuestToken() error

GetGuestToken from Twitter API

func (*Scraper) GetHomeLatestTimeline

func (s *Scraper) GetHomeLatestTimeline(ctx context.Context, maxTweetsNbr int) <-chan *TweetResult

GetHomeLatestTimeline returns channel with tweets from home latest timeline.

func (*Scraper) GetHomeTimeline

func (s *Scraper) GetHomeTimeline(ctx context.Context, maxTweetsNbr int) <-chan *TweetResult

GetHomeTimeline returns channel with tweets from home timeline.

func (*Scraper) GetProfile

func (s *Scraper) GetProfile(username string) (Profile, error)

GetProfile return parsed user profile.

func (*Scraper) GetTrends

func (s *Scraper) GetTrends() ([]string, error)

GetTrends return list of trends.

func (*Scraper) GetTweet

func (s *Scraper) GetTweet(id string) (*Tweet, error)

GetTweet get a single tweet by ID.

func (*Scraper) GetTweets

func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult

GetTweets returns channel with tweets for a given user.

func (*Scraper) GetUserIDByScreenName

func (s *Scraper) GetUserIDByScreenName(screenName string) (string, error)

GetUserIDByScreenName from API

func (*Scraper) IsGuestToken

func (s *Scraper) IsGuestToken() bool

IsGuestToken check if guest token not empty

func (*Scraper) RequestAPI

func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error

RequestAPI get JSON from frontend API and decodes it

func (*Scraper) SearchProfiles

func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult

SearchProfiles returns channel with profiles for a given search query

func (*Scraper) SearchTweets

func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult

SearchTweets returns channel with tweets for a given search query

func (*Scraper) SetProxy

func (s *Scraper) SetProxy(proxyAddr string) error

SetProxy set http proxy in the format `http://HOST:PORT` set socket proxy in the format `socks5://HOST:PORT`

func (*Scraper) SetSearchMode

func (s *Scraper) SetSearchMode(mode SearchMode) *Scraper

SetSearchMode switcher

func (*Scraper) Unfollow

func (s *Scraper) Unfollow(user string) (*friendships, error)

func (*Scraper) WithClientTimeout

func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper

client timeout

func (*Scraper) WithCookie

func (s *Scraper) WithCookie(cookie string) *Scraper

cookie

func (*Scraper) WithDelay

func (s *Scraper) WithDelay(seconds int64) *Scraper

WithDelay add delay between API requests (in seconds)

func (*Scraper) WithReplies

func (s *Scraper) WithReplies(b bool) *Scraper

WithReplies enable/disable load timeline with tweet replies

func (*Scraper) WithXCsrfToken

func (s *Scraper) WithXCsrfToken(xcsrfToken string) *Scraper

x csrf token

type SearchMode

type SearchMode int

SearchMode type

const (
	// SearchTop - default mode
	SearchTop SearchMode = iota
	// SearchLatest - live mode
	SearchLatest
	// SearchPhotos - image mode
	SearchPhotos
	// SearchVideos - video mode
	SearchVideos
	// SearchUsers - user mode
	SearchUsers
)

type Tweet

type Tweet struct {
	Hashtags         []string
	HTML             string
	ID               string
	InReplyToStatus  *Tweet
	IsQuoted         bool
	IsPin            bool
	IsReply          bool
	IsRetweet        bool
	IsRecommended    bool
	Likes            int
	Mentions         []string
	PermanentURL     string
	Place            *Place
	QuotedStatus     *Tweet
	Replies          int
	Retweets         int
	RetweetedStatus  *Tweet
	Text             string
	TimeParsed       time.Time
	Timestamp        int64
	URLs             []string
	UserID           string
	Username         string
	SensitiveContent bool
	Medias           []Media
}

Tweet type.

func GetTweet deprecated

func GetTweet(id string) (*Tweet, error)

Deprecated: GetTweet wrapper for default Scraper

type TweetResult

type TweetResult struct {
	Tweet
	Error error
}

TweetResult of scrapping.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL