Initial commit

This commit is contained in:
Manuel 2023-08-29 18:00:00 +02:00
commit eca46fcadd
Signed by: Manuel
GPG key ID: 4085037435E1F07A
21 changed files with 1666 additions and 0 deletions

28
cmd/config.go Normal file
View file

@ -0,0 +1,28 @@
package cmd
import (
"github.com/BurntSushi/toml"
"os"
)
type Config struct {
Username string
Password string
ProxyAddr string
Channels []string
Filter []uint8
Webhook string
DbPath string
CookiePath string
}
func ConfigFromFile(filePath string) (conf *Config, err error) {
tomlData, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
_, err = toml.Decode(string(tomlData), &conf)
return
}

174
cmd/database.go Normal file
View file

@ -0,0 +1,174 @@
package cmd
import (
"errors"
"fmt"
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
ts "github.com/n0madic/twitter-scraper"
//"log"
"strconv"
)
const (
SqliteSchema = `
CREATE TABLE IF NOT EXISTS tweet (
tweet_id INTEGER PRIMARY KEY AUTOINCREMENT,
snowflake SQLITE_UINT64_TYPE NOT NULL UNIQUE,
channel VARCHAR(15) NOT NULL,
timestamp SQLITE_INT64_TYPE NOT NULL
);
`
KeepTweets int = 10 // How many tweets to keep in database before pruning
)
type Tweet struct {
TweetId int `db:"tweet_id"`
Snowflake uint64 `db:"snowflake"`
Channel string `db:"channel"`
Timestamp int64 `db:"timestamp"`
}
type Database struct {
*sqlx.DB
}
func NewDatabase(driver string, connectString string) (*Database, error) {
var connection *sqlx.DB
var err error
switch driver {
case "sqlite3":
connection, err = sqlx.Connect(driver, "file:"+connectString+"?cache=shared")
if err != nil {
return nil, err
}
connection.SetMaxOpenConns(1)
if _, err = connection.Exec(SqliteSchema); err != nil {
return nil, err
}
default:
return nil, errors.New(fmt.Sprintf("Database driver %s not supported right now!", driver))
}
return &Database{connection}, err
}
func (db *Database) GetNewestTweet(channel string) (*Tweet, error) {
tweet := Tweet{}
err := db.Get(&tweet, "SELECT * FROM tweet WHERE channel=$1 ORDER BY timestamp DESC LIMIT 1", channel)
if err != nil {
return nil, err
}
return &tweet, nil
}
func (db *Database) GetTweets(channel string) ([]*Tweet, error) {
tweet := []*Tweet{}
err := db.Select(&tweet, "SELECT * FROM tweet WHERE channel=$1 ORDER BY timestamp DESC", channel)
if err != nil {
return nil, err
}
return tweet, nil
}
func (db *Database) ContainsTweet(channel string, tweet *ts.Tweet) (bool, error) {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return false, err
}
t := Tweet{}
rows, err := db.Queryx("SELECT * FROM tweet WHERE channel=$1 ORDER BY timestamp DESC", channel)
if err != nil {
return false, err
}
for rows.Next() {
err := rows.StructScan(&t)
if err != nil {
return false, err
}
if t.Snowflake == snowflake {
return true, nil
}
}
return false, nil
}
func (db *Database) InsertTweet(channel string, tweet *ts.Tweet) error {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return err
}
_, dberr := db.NamedExec("INSERT INTO tweet (snowflake, channel, timestamp) VALUES (:snowflake, :channel, :timestamp)", &Tweet{0, snowflake, channel, tweet.Timestamp})
if dberr != nil {
return err
}
return nil
}
func (db *Database) PruneOldestTweets(channel string) error {
var count int
err := db.Get(&count, "SELECT COUNT(*) FROM tweet WHERE channel=$1", channel)
if err != nil {
return err
}
//log.Println(count)
if count > KeepTweets {
tx, err := db.Beginx()
if err != nil {
tx.Rollback()
return err
}
rows, err := tx.Queryx("SELECT tweet_id from tweet WHERE channel=$1 ORDER by timestamp ASC LIMIT $2", channel, count-KeepTweets)
if err != nil {
tx.Rollback()
return err
}
for rows.Next() {
var i int
err = rows.Scan(&i)
//log.Println(i)
if err != nil {
tx.Rollback()
return err
}
_, err = tx.Exec("DELETE FROM tweet WHERE tweet_id=$1", i)
if err != nil {
tx.Rollback()
return err
}
}
tx.Commit()
}
return nil
}
func FromTweet(channel string, tweet *ts.Tweet) (*Tweet, error) {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return nil, err
}
return &Tweet{0, snowflake, channel, tweet.Timestamp}, nil
}
func (t *Tweet) EqualsTweet(tweet *ts.Tweet) bool {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return false
}
return t.Snowflake == snowflake
}
func (t *Tweet) Equals(tweet *Tweet) bool {
return t.Snowflake == tweet.Snowflake
}

54
cmd/database_test.go Normal file
View file

@ -0,0 +1,54 @@
package cmd
import (
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
"testing"
)
const (
testDbPath = "../db/testdb.db"
)
var (
connection *sqlx.DB
)
func setupSuite(t *testing.T) func(t *testing.T) {
conn, err := sqlx.Connect("sqlite3", testDbPath)
if err != nil {
t.Errorf("")
}
connection = conn
return func(t *testing.T) {
conn.Close()
}
}
func setupTest(t *testing.T) func(t *testing.T) {
return func(t *testing.T) {
}
}
func TestGetNewestTweet(t *testing.T) {
}
func TestGetTweets(t *testing.T) {
}
func TestContainsTweet(t *testing.T) {
}
func TestInsertTweet(t *testing.T) {
}
func TestPruneOldestTweets(t *testing.T) {
}

98
cmd/embed.go Normal file
View file

@ -0,0 +1,98 @@
package cmd
import (
"errors"
"strconv"
"strings"
)
type Embed struct {
Author Author `json:"author"`
Title string `json:"title"`
URL string `json:"url"`
Description string `json:"description"`
Color int64 `json:"color"`
Fields []Field `json:"fields"`
Thumbnail Image `json:"thumbnail,omitempty"`
Image Image `json:"image,omitempty"`
Footer Footer `json:"footer"`
}
type Author struct {
Name string `json:"name"`
URL string `json:"url"`
IconURL string `json:"icon_url"`
}
type Field struct {
Name string `json:"name"`
Value string `json:"value"`
Inline bool `json:"inline,omitempty"`
}
type Footer struct {
Text string `json:"text"`
IconURL string `json:"icon_url,omitempty"`
}
type Image struct {
URL string `json:"url"`
}
func (w *Webhook) NewEmbed(Title, Description, URL string) {
emb := Embed{Title: Title, Description: Description, URL: URL}
w.Embeds = append(w.Embeds, &emb)
}
func (w *Webhook) SetAuthor(Name, URL, IconURL string) {
if len(w.Embeds) == 0 {
emb := Embed{Author: Author{Name, URL, IconURL}}
w.Embeds = append(w.Embeds, &emb)
} else {
w.Embeds[0].Author = Author{Name, URL, IconURL}
}
}
func (w *Webhook) SetColor(color string) error {
color = strings.Replace(color, "0x", "", -1)
color = strings.Replace(color, "0X", "", -1)
color = strings.Replace(color, "#", "", -1)
colorInt, err := strconv.ParseInt(color, 16, 64)
if err != nil {
return errors.New("Invalid hex code passed")
}
w.Embeds[0].Color = colorInt
return nil
}
func (w *Webhook) SetThumbnail(URL string) error {
if len(w.Embeds) < 1 {
return errors.New("Invalid Embed passed in, Webhook.Embeds must have at least one Embed element")
}
w.Embeds[0].Thumbnail = Image{URL}
return nil
}
func (w *Webhook) SetImage(URL string) error {
if len(w.Embeds) < 1 {
return errors.New("Invalid Embed passed in, Webhook.Embeds must have at least one Embed element")
}
w.Embeds[0].Image = Image{URL}
return nil
}
func (w *Webhook) SetFooter(Text, IconURL string) error {
if len(w.Embeds) < 1 {
return errors.New("Invalid Embed passed in, Webhook.Embeds must have at least one Embed element")
}
w.Embeds[0].Footer = Footer{Text, IconURL}
return nil
}
func (w *Webhook) AddField(Name, Value string, Inline bool) error {
if len(w.Embeds) < 1 {
return errors.New("Invalid Embed passed in, Webhook.Embeds must have at least one Embed element")
}
w.Embeds[0].Fields = append(w.Embeds[0].Fields, Field{Name, Value, Inline})
return nil
}

300
cmd/tweeter.go Normal file
View file

@ -0,0 +1,300 @@
package cmd
import (
"context"
"encoding/json"
ts "github.com/n0madic/twitter-scraper"
"log"
"math/rand"
"net/http"
"os"
"os/signal"
"syscall"
"time"
)
var (
configPath = "./config.toml"
dbPath = "./db/tweets.db"
cookiePath = "./db/cookies.json"
)
const (
ScrapeInterval int = 3 // How often to check for new tweets (in minutes)
ScrapeDelay int64 = 0 // How long to wait between api requests (in seconds)
ScrapeStep int = 10 // How many tweets to get at a time
)
type App struct {
config *Config
db *Database
scraper *ts.Scraper
}
func Run() {
args := os.Args[1:]
if len(args) > 1 {
log.Fatalln("Too many arguments")
}
if len(args) == 1 {
if args[0] == "" {
log.Fatalln("No config path given")
}
configPath = args[0]
}
config, err := ConfigFromFile(configPath)
if err != nil {
log.Fatalf("There has been an error parsing config file '%s': %s\n", configPath, err.Error())
}
if len(config.Channels) == 0 {
log.Fatalln("List of channels cannot be empty")
}
if len(config.Channels) != len(config.Filter) {
log.Fatalln("List of filters has to be same length as channel list")
}
if config.Webhook == "" {
log.Fatalln("Webhook address cannot be empty")
}
if config.DbPath != "" {
dbPath = config.DbPath
}
if config.CookiePath != "" {
cookiePath = config.CookiePath
}
db, dberr := NewDatabase("sqlite3", dbPath)
if dberr != nil {
log.Fatalf("An error occurred while creating database connection: %s\n", dberr.Error())
}
defer db.Close()
scraper := ts.New()
if config.ProxyAddr != "" {
err = scraper.SetProxy(config.ProxyAddr)
if err != nil {
log.Fatalf("An error occurred with proxy connection: %s\n", err.Error())
}
}
{
f, err := os.Open(cookiePath)
if err != nil {
log.Println("Cookie file does not yet exist")
} else {
var cookies []*http.Cookie
json.NewDecoder(f).Decode(&cookies)
scraper.SetCookies(cookies)
}
}
if scraper.IsLoggedIn() {
log.Println("We're already logged in, skipping login...")
//defer scraper.Logout()
} else {
scraper.ClearCookies()
err = scraper.Login(config.Username, config.Password)
if err != nil {
log.Printf("An error occurred during scraper login: %s\n", err.Error())
log.Println("Trying open account login... ")
err = scraper.LoginOpenAccount()
if err != nil {
log.Fatalf("An error occurred during scraper login: %s\n", err.Error())
}
defer scraper.Logout()
} else {
log.Printf("New Login - Saving cookies to %s\n", cookiePath)
js, jsonErr := json.Marshal(scraper.GetCookies())
if jsonErr != nil {
log.Fatalf("An error occurred during cookie serialization: %s\n", jsonErr.Error())
}
f, fErr := os.Create(cookiePath)
if fErr != nil {
log.Fatalf("Failed to create cookie file at %s with the following error: %s\n", cookiePath, fErr.Error())
}
f.Write(js)
//defer scraper.Logout()
}
}
scraper.WithDelay(ScrapeDelay)
app := App{config, db, scraper}
for i, c := range config.Channels {
go app.queryX(i, c)
}
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
done := make(chan bool, 1)
go func() {
sig := <-sigs
log.Println(sig)
done <- true
}()
<-done
log.Println("Exiting...")
}
func (app *App) queryX(id int, channel string) {
log.Printf("Starting worker %d for channel %s", id, channel)
// Sleep to stagger api queries of workers
time.Sleep(time.Duration(id) * time.Minute)
db := app.db
/*
db, dberr := NewDatabase("sqlite3", dbPath)
if dberr != nil {
log.Fatalf("An error occurred while creating database connection: %s\n", dberr.Error())
}
defer db.Close()
*/
filter := app.config.Filter[id]
/*
filterQuoted := filter & 1
filterPin := filter & 2
filterReply := filter & 4
filterRetweet := filter & 8
filterSelfThread := filter & 16
*/
init := true
ScrapeLoop:
for {
if !init {
// Sleep for set interval +-30 seconds
time.Sleep(time.Duration(ScrapeInterval)*time.Minute + time.Duration(rand.Intn(60)-30)*time.Second)
}
init = false
/*
newestTweet, err := db.GetNewestTweet(channel)
if err != nil {
log.Printf("No tweets in database yet for channel %s", channel)
}
*/
step := ScrapeStep
tweets := []*ts.Tweet{}
tweetsToParse := []*ts.Tweet{}
tweetsToPost := []*ts.Tweet{}
dbTweets, dbErr := db.GetTweets(channel)
if dbErr != nil {
log.Printf("Error while retrieving tweets from database for channel %s: %s", channel, dbErr.Error())
continue ScrapeLoop
}
GetTweets:
for {
for tweet := range app.scraper.GetTweets(context.Background(), channel, step) {
if tweet.Error != nil {
log.Printf("Error while retrieving tweet for channel %s: %s", channel, tweet.Error.Error())
continue ScrapeLoop
}
tweets = append(tweets, &tweet.Tweet)
}
if len(tweets) == 0 {
break GetTweets
}
if len(dbTweets) > 0 {
for _, dbTweet := range dbTweets {
for i, tweet := range tweets {
if dbTweet.EqualsTweet(tweet) {
tweetsToParse = tweets[:i]
break GetTweets
}
}
}
} else {
tweetsToParse = append(tweetsToParse, tweets[0])
break GetTweets
}
if step >= 50 {
tweetsToParse = append(tweetsToParse, tweets[0])
break GetTweets
} else if step+ScrapeStep > 50 {
step = 50
} else {
step += ScrapeStep
}
log.Printf("Fetching more tweets for %s...", channel)
time.Sleep(time.Duration(3) * time.Second) // Wait a few seconds for next api request
}
//slices.Reverse(tweetsToParse)
ParseTweets:
// We want to parse old to new
for i := len(tweetsToParse) - 1; i >= 0; i-- {
tweet := tweetsToParse[i]
if filterTweet(filter, tweet) {
// One of the filters applies as same bits are 1, so we skip this tweet
continue ParseTweets
}
err := db.InsertTweet(channel, tweet)
if err != nil {
log.Printf("Error while inserting tweet for channel %s into the database: %s", channel, err.Error())
continue ParseTweets
}
tweetsToPost = append(tweetsToPost, tweet)
/*
contains, dberr := db.ContainsTweet(channel, tweet)
if dberr != nil {
log.Printf("Error while checking tweet for channel %s: %s", channel, dberr.Error())
continue ParseTweets
}
if contains {
// Since we posted this tweet already, let's break the loop and post the tweets to Discord (if there are any)
break ParseTweets
} else {
// Tweet not yet in database so we store this one
err := db.InsertTweet(channel, tweet)
if err != nil {
log.Printf("Error while inserting tweet for channel %s into the database: %s", channel, err.Error())
}
tweetsToPost = append(tweetsToPost, tweet)
}
*/
}
sendToWebhook(app.config.Webhook, tweetsToPost)
err := db.PruneOldestTweets(channel)
if err != nil {
log.Printf("Error while pruning old tweets for channel %s: %s", channel, err.Error())
}
}
}
func filterTweet(filter uint8, tweet *ts.Tweet) bool {
var tweetFilter uint8 = 0
filterMap := []bool{tweet.IsSelfThread, tweet.IsRetweet, tweet.IsReply, tweet.IsPin, tweet.IsQuoted}
for _, f := range filterMap {
tweetFilter <<= 1
if f {
tweetFilter |= 1
}
}
return filter&tweetFilter > 0
}

37
cmd/tweeter_test.go Normal file
View file

@ -0,0 +1,37 @@
package cmd
import (
"testing"
)
func TestFilter(t *testing.T) {
filter1 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b10111, 0b10101, 0b01101, 0b10101}
filter2 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b01000, 0b01010, 0b01000, 0b11011}
filterR := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b00000, 0b00000, 0b01000, 0b10001}
for i, filterResult := range filterR {
resultByte := compareBytes(filter1[i], filter2[i])
if resultByte != filterResult {
t.Errorf("%b AND %b = %b; got %b\n", filter1[i], filter2[i], filterResult, resultByte)
}
}
}
func compareBytes(filter uint8, filterCheck uint8) uint8 {
filterQuoted := (filterCheck & 1) != 0 // first bit
filterPin := (filterCheck & 2) != 0 // second bit
filterReply := (filterCheck & 4) != 0 // third bit
filterRetweet := (filterCheck & 8) != 0 // fourth bit
filterSelfThread := (filterCheck & 16) != 0 // fifth bit
var tweetFilter uint8 = 0
filterMap := []bool{filterSelfThread, filterRetweet, filterReply, filterPin, filterQuoted}
for _, f := range filterMap {
tweetFilter <<= 1
if f {
tweetFilter |= 1
}
}
return filter & tweetFilter
}

48
cmd/webhook.go Normal file
View file

@ -0,0 +1,48 @@
package cmd
import (
"bytes"
"encoding/json"
ts "github.com/n0madic/twitter-scraper"
"log"
"net/http"
)
type Webhook struct {
Content string `json:"content"`
Name string `json:"username,omitempty"`
Avatar string `json:"avatar_url,omitempty"`
Mention *Mention `json:"allowed_mentions,omitempty"`
Embeds []*Embed `json:"embeds,omitempty"`
}
type Mention struct {
Parse []string `json:"parse,omitempty"`
Roles []string `json:"roles,omitempty"`
Users []string `json:"users,omitempty"`
RepliedUser bool `json:"replied_user,omitempty"`
}
func sendToWebhook(webhookURL string, tweets []*ts.Tweet) {
for _, tweet := range tweets {
// Temporarily hardcoded
data := Webhook{
Content: tweet.PermanentURL,
Mention: &Mention{Parse: []string{"roles"}},
Embeds: []*Embed{},
}
jsonData, err := json.Marshal(data)
if err != nil {
log.Printf("Error while generating JSON for tweet %s: %s", tweet.ID, err.Error())
continue
}
resp, err := http.Post(webhookURL, "application/json", bytes.NewBuffer(jsonData))
if err != nil {
log.Printf("Error while sending webhook for tweet %s: %s", tweet.ID, err.Error())
continue
}
defer resp.Body.Close()
}
}