twitterscraper

package module
v0.0.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 21, 2023 License: MIT Imports: 20 Imported by: 0

README

Twitter Scraper

Go Reference

Twitter's API is annoying to work with, and has lots of limitations — luckily their frontend (JavaScript) has it's own API, which I reverse-engineered. No API rate limits. No tokens needed. No restrictions. Extremely fast.

You can use this library to get the text of any user's Tweets trivially.

Installation

go get -u github.com/SametAvcii/twitter-scraper

Usage

Authentication

Now all methods require authentication!

Login
err := scraper.Login("username", "password")

Use username to login, not email! But if you have email confirmation, use email address in addition:

err := scraper.Login("username", "password", "email")

If you have two-factor authentication, use code:

err := scraper.Login("username", "password", "code")

Status of login can be checked with:

scraper.IsLoggedIn()

Logout (clear session):

scraper.Logout()

If you want save session between restarts, you can save cookies with scraper.GetCookies() and restore with scraper.SetCookies().

For example, save cookies:

cookies := scraper.GetCookies()
// serialize to JSON
js, _ := json.Marshal(cookies)
// save to file
f, _ = os.Create("cookies.json")
f.Write(js)

and load cookies:

f, _ := os.Open("cookies.json")
// deserialize from JSON
var cookies []*http.Cookie
json.NewDecoder(f).Decode(&cookies)
// load cookies
scraper.SetCookies(cookies)
// check login status
scraper.IsLoggedIn()
Open account

If you don't want to use your account, you can try login as a Twitter app:

err := scraper.LoginOpenAccount()
Get user tweets
package main

import (
    "context"
    "fmt"
    twitterscraper "github.com/SametAvcii/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    err := scraper.LoginOpenAccount()
    if err != nil {
        panic(err)
    }
    for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
        if tweet.Error != nil {
            panic(tweet.Error)
        }
        fmt.Println(tweet.Text)
    }
}

It appears you can ask for up to 50 tweets.

Get single tweet
package main

import (
    "fmt"

    twitterscraper "github.com/SametAvcii/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    err := scraper.Login(username, password)
    if err != nil {
        panic(err)
    }
    tweet, err := scraper.GetTweet("1328684389388185600")
    if err != nil {
        panic(err)
    }
    fmt.Println(tweet.Text)
}
Search tweets by query standard operators

Now the search only works for authenticated users!

Tweets containing “twitter” and “scraper” and “data“, filtering out retweets:

package main

import (
    "context"
    "fmt"
    twitterscraper "github.com/SametAvcii/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    err := scraper.Login(username, password)
    if err != nil {
        panic(err)
    }
    for tweet := range scraper.SearchTweets(context.Background(),
        "twitter scraper data -filter:retweets", 50) {
        if tweet.Error != nil {
            panic(tweet.Error)
        }
        fmt.Println(tweet.Text)
    }
}

The search ends if we have 50 tweets.

See Rules and filtering for build standard queries.

Set search mode
scraper.SetSearchMode(twitterscraper.SearchLatest)

Options:

  • twitterscraper.SearchTop - default mode
  • twitterscraper.SearchLatest - live mode
  • twitterscraper.SearchPhotos - image mode
  • twitterscraper.SearchVideos - video mode
  • twitterscraper.SearchUsers - user mode
Get profile
package main

import (
    "fmt"
    twitterscraper "github.com/SametAvcii/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    scraper.LoginOpenAccount()
    profile, err := scraper.GetProfile("Twitter")
    if err != nil {
        panic(err)
    }
    fmt.Printf("%+v\n", profile)
}
Search profiles by query
package main

import (
    "context"
    "fmt"
    twitterscraper "github.com/SametAvcii/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers)
    err := scraper.Login(username, password)
    if err != nil {
        panic(err)
    }
    for profile := range scraper.SearchProfiles(context.Background(), "Twitter", 50) {
        if profile.Error != nil {
            panic(profile.Error)
        }
        fmt.Println(profile.Name)
    }
}
package main

import (
    "fmt"
    twitterscraper "github.com/SametAvcii/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    trends, err := scraper.GetTrends()
    if err != nil {
        panic(err)
    }
    fmt.Println(trends)
}
Use Proxy

Support HTTP(s) and SOCKS5 proxy

with HTTP
err := scraper.SetProxy("http://localhost:3128")
if err != nil {
    panic(err)
}
with SOCKS5
err := scraper.SetProxy("socks5://localhost:1080")
if err != nil {
    panic(err)
}
Delay requests

Add delay between API requests (in seconds)

scraper.WithDelay(5)
Load timeline with tweet replies
scraper.WithReplies(true)

Documentation

Index

Constants

View Source
const DefaultClientTimeout = 10 * time.Second

default http client timeout

Variables

This section is empty.

Functions

This section is empty.

Types

type Entry

type Entry struct {
	EntryID   string       `json:"entryId"`
	SortIndex string       `json:"sortIndex"`
	Content   EntryContent `json:"content"`
}

type EntryContent

type EntryContent struct {
	ItemType    string      `json:"itemType"`
	ItemContent ItemContent `json:"itemContent"`
}

type FetchFailed

type FetchFailed struct {
	Data struct {
		ThreadedConversationWithInjectionsV2 ThreadedConversation `json:"threaded_conversation_with_injections_v2"`
	} `json:"data"`
}

type GIF

type GIF struct {
	ID      string
	Preview string
	URL     string
}

GIF type.

type ItemContent

type ItemContent struct {
	TweetResults        TweetResults `json:"tweet_results"`
	TweetDisplayType    string       `json:"tweetDisplayType"`
	HasModeratedReplies bool         `json:"hasModeratedReplies"`
}

type Mention

type Mention struct {
	ID       string
	Username string
	Name     string
}

Mention type.

type Photo

type Photo struct {
	ID  string
	URL string
}

Photo type.

type Place

type Place struct {
	ID          string `json:"id"`
	PlaceType   string `json:"place_type"`
	Name        string `json:"name"`
	FullName    string `json:"full_name"`
	CountryCode string `json:"country_code"`
	Country     string `json:"country"`
	BoundingBox struct {
		Type        string        `json:"type"`
		Coordinates [][][]float64 `json:"coordinates"`
	} `json:"bounding_box"`
}

type Profile

type Profile struct {
	Avatar         string
	Banner         string
	Biography      string
	Birthday       string
	FollowersCount int
	FollowingCount int
	FriendsCount   int
	IsPrivate      bool
	IsVerified     bool
	Joined         *time.Time
	LikesCount     int
	ListedCount    int
	Location       string
	Name           string
	PinnedTweetIDs []string
	TweetsCount    int
	URL            string
	UserID         string
	Username       string
	Website        string
}

Profile of twitter user.

type ProfileResult

type ProfileResult struct {
	Profile
	Error error
}

ProfileResult of scrapping.

type Scraper

type Scraper struct {
	// contains filtered or unexported fields
}

Scraper object

func New

func New() *Scraper

New creates a Scraper object

func (*Scraper) ClearCookies

func (s *Scraper) ClearCookies()

func (*Scraper) FetchSearchProfiles

func (s *Scraper) FetchSearchProfiles(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error)

FetchSearchProfiles gets users for a given search query, via the Twitter frontend API

func (*Scraper) FetchSearchTweets

func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API

func (*Scraper) FetchTweetIDS

func (s *Scraper) FetchTweetIDS(userID string, maxTweetsNbr int, cursor string) ([]string, error)

func (*Scraper) FetchTweets

func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchTweets gets tweets for a given user, via the Twitter frontend API.

func (*Scraper) FetchTweetsByUserID

func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchTweetsByUserID gets tweets for a given userID, via the Twitter frontend GraphQL API.

func (*Scraper) FetchTweetsByUserIDLegacy

func (s *Scraper) FetchTweetsByUserIDLegacy(userID string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchTweetsByUserIDLegacy gets tweets for a given userID, via the Twitter frontend legacy API.

func (*Scraper) GetCookies

func (s *Scraper) GetCookies() []*http.Cookie

func (*Scraper) GetGuestToken

func (s *Scraper) GetGuestToken() error

GetGuestToken from Twitter API

func (*Scraper) GetProfile

func (s *Scraper) GetProfile(username string) (Profile, error)

GetProfile return parsed user profile.

func (*Scraper) GetTrends

func (s *Scraper) GetTrends() ([]string, error)

GetTrends return list of trends.

func (*Scraper) GetTweet

func (s *Scraper) GetTweet(id string) (*Tweet, error)

GetTweet get a single tweet by ID.

func (*Scraper) GetTweets

func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult

GetTweets returns channel with tweets for a given user.

func (*Scraper) GetUserIDByScreenName

func (s *Scraper) GetUserIDByScreenName(screenName string) (string, error)

GetUserIDByScreenName from API

func (*Scraper) IsGuestToken

func (s *Scraper) IsGuestToken() bool

IsGuestToken check if guest token not empty

func (*Scraper) IsLoggedIn

func (s *Scraper) IsLoggedIn() bool

IsLoggedIn check if scraper logged in

func (*Scraper) Login

func (s *Scraper) Login(credentials ...string) error

Login to Twitter Use Login(username, password) for ordinary login or Login(username, password, email) for login if you have email confirmation or Login(username, password, code_for_2FA) for login if you have two-factor authentication

func (*Scraper) LoginOpenAccount

func (s *Scraper) LoginOpenAccount() error

LoginOpenAccount as Twitter app

func (*Scraper) Logout

func (s *Scraper) Logout() error

Logout is reset session

func (*Scraper) RequestAPI

func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error

RequestAPI get JSON from frontend API and decodes it

func (*Scraper) SearchProfiles

func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult

SearchProfiles returns channel with profiles for a given search query

func (*Scraper) SearchTweets

func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult

SearchTweets returns channel with tweets for a given search query

func (*Scraper) SetCookies

func (s *Scraper) SetCookies(cookies []*http.Cookie)

func (*Scraper) SetProxy

func (s *Scraper) SetProxy(proxyAddr string) error

SetProxy set http proxy in the format `http://HOST:PORT` set socket proxy in the format `socks5://HOST:PORT`

func (*Scraper) SetSearchMode

func (s *Scraper) SetSearchMode(mode SearchMode) *Scraper

SetSearchMode switcher

func (*Scraper) WithClientTimeout

func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper

client timeout

func (*Scraper) WithDelay

func (s *Scraper) WithDelay(seconds int64) *Scraper

WithDelay add delay between API requests (in seconds)

func (*Scraper) WithReplies

func (s *Scraper) WithReplies(b bool) *Scraper

WithReplies enable/disable load timeline with tweet replies

type SearchMode

type SearchMode int

SearchMode type

const (
	// SearchTop - default mode
	SearchTop SearchMode = iota
	// SearchLatest - live mode
	SearchLatest
	// SearchPhotos - image mode
	SearchPhotos
	// SearchVideos - video mode
	SearchVideos
	// SearchUsers - user mode
	SearchUsers
)

type ThreadedConversation

type ThreadedConversation struct {
	Instructions []struct {
		Type    string  `json:"type"`
		Entries []Entry `json:"entries"`
	} `json:"instructions"`
}

type Tweet

type Tweet struct {
	ConversationID    string
	GIFs              []GIF
	Hashtags          []string
	HTML              string
	ID                string
	InReplyToStatus   *Tweet
	InReplyToStatusID string
	IsQuoted          bool
	IsPin             bool
	IsReply           bool
	IsRetweet         bool
	IsSelfThread      bool
	Likes             int
	Name              string
	Mentions          []Mention
	PermanentURL      string
	Photos            []Photo
	Place             *Place
	QuotedStatus      *Tweet
	QuotedStatusID    string
	Replies           int
	Retweets          int
	RetweetedStatus   *Tweet
	RetweetedStatusID string
	Text              string
	Thread            []*Tweet
	TimeParsed        time.Time
	Timestamp         int64
	URLs              []string
	UserID            string
	Username          string
	Videos            []Video
	Views             int
	SensitiveContent  bool
}

Tweet type.

func (*Tweet) FailMapping

func (t *Tweet) FailMapping(fail FetchFailed)

type TweetFail

type TweetFail struct {
	RestID            string `json:"rest_id"`
	HasBirdwatchNotes bool   `json:"has_birdwatch_notes"`
	Core              struct {
		UserResults struct {
			Result struct {
				IsBlueVerified bool       `json:"is_blue_verified"`
				Legacy         legacyUser `json:"legacy"`
			} `json:"result"`
		} `json:"user_results"`
	} `json:"core"`
	LegacyTweet legacyTweet `json:"legacy"`
	IDStr       string      `json:"id_str"`
}

type TweetResult

type TweetResult struct {
	Tweet
	Error error
}

TweetResult of scrapping.

type TweetResults

type TweetResults struct {
	Result struct {
		Tweet struct {
			RestID            string `json:"rest_id"`
			HasBirdwatchNotes bool   `json:"has_birdwatch_notes"`
			IsTranslatable    bool   `json:"is_translatable"`
			Views             struct {
				State string `json:"state"`
			} `json:"views"`
			Source        string   `json:"source"`
			AwardEligible bool     `json:"award_eligible"`
			GrantedAwards struct{} `json:"granted_awards"`
			Legacy        struct {
				BookmarkCount       int    `json:"bookmark_count"`
				Bookmarked          bool   `json:"bookmarked"`
				CreatedAt           string `json:"created_at"`
				ConversationControl struct {
					Policy                   string `json:"policy"`
					ConversationOwnerResults struct {
						Result struct {
							Type   string `json:"__typename"`
							Legacy struct {
								ScreenName string `json:"screen_name"`
							} `json:"legacy"`
						} `json:"result"`
					} `json:"conversation_owner_results"`
				} `json:"conversation_control"`
				ConversationIDStr string `json:"conversation_id_str"`
				DisplayTextRange  []int  `json:"display_text_range"`
				Entities          struct {
					UserMentions []interface{} `json:"user_mentions"`
					Urls         []interface{} `json:"urls"`
					Hashtags     []interface{} `json:"hashtags"`
					Symbols      []interface{} `json:"symbols"`
				} `json:"entities"`
				FavoriteCount  int    `json:"favorite_count"`
				Favorited      bool   `json:"favorited"`
				FullText       string `json:"full_text"`
				IsQuoteStatus  bool   `json:"is_quote_status"`
				Lang           string `json:"lang"`
				LimitedActions string `json:"limited_actions"`
				QuoteCount     int    `json:"quote_count"`
				ReplyCount     int    `json:"reply_count"`
				RetweetCount   int    `json:"retweet_count"`
				Retweeted      bool   `json:"retweeted"`
				UserIDStr      string `json:"user_id_str"`
				IDStr          string `json:"id_str"`
			} `json:"legacy"`
			QuickPromoteEligibility struct {
				Eligibility string `json:"eligibility"`
			} `json:"quick_promote_eligibility"`
		} `json:"tweet"`
		LimitedActionResults struct {
			LimitedActions []struct {
				Action string `json:"action"`
				Prompt struct {
					Type     string `json:"__typename"`
					CtaType  string `json:"cta_type"`
					Headline struct {
						Text     string        `json:"text"`
						Entities []interface{} `json:"entities"`
					} `json:"headline"`
					Subtext struct {
						Text     string        `json:"text"`
						Entities []interface{} `json:"entities"`
					} `json:"subtext"`
				} `json:"prompt"`
			} `json:"limited_actions"`
		} `json:"limitedActionResults"`
	} `json:"result"`
}

Failed Tweet

type Video

type Video struct {
	ID      string
	Preview string
	URL     string
}

Video type.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL