feat: Major refactor, implement web, caching, better tests and build files

* Update golang version to 1.24
* Update multiarch Dockerfile to be more ISA agnostic
* Refactor existing code and properly structure project into modules
* Get rid of global variables except where necessary (go:embed)
* Add default values to Config
* Add webserver with templates to finally correctly serve videos and gifs
* Add tiny caching library to decrease api load and improve latency
* Improve Webhook data preparation by filtering out redundant links
  from the tweet text and properly attaching videos and gifs in separate
  webhook request by utilising new webserver
* Improve tests for filter function
* Improve bake definition for easier CI integration
This commit is contained in:
Manuel 2025-03-18 19:22:00 +01:00
parent 7562b86894
commit 21d580d1a6
Signed by: Manuel
GPG key ID: 4085037435E1F07A
24 changed files with 752 additions and 209 deletions

View file

@ -1,28 +0,0 @@
package cmd
import (
"github.com/BurntSushi/toml"
"os"
)
type Config struct {
Username string
Password string
ProxyAddr string
Channels []string
Filter []uint8
Webhook string
DbPath string
CookiePath string
}
func ConfigFromFile(filePath string) (conf *Config, err error) {
tomlData, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
_, err = toml.Decode(string(tomlData), &conf)
return
}

View file

@ -1,171 +0,0 @@
package cmd
import (
"errors"
"fmt"
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
ts "github.com/imperatrona/twitter-scraper"
"strconv"
)
const (
SqliteSchema = `
CREATE TABLE IF NOT EXISTS tweet (
tweet_id INTEGER PRIMARY KEY AUTOINCREMENT,
snowflake SQLITE_UINT64_TYPE NOT NULL UNIQUE,
channel VARCHAR(15) NOT NULL,
timestamp SQLITE_INT64_TYPE NOT NULL
);
`
KeepTweets int = 10 // How many tweets to keep in database before pruning
)
type Tweet struct {
TweetId int `db:"tweet_id"`
Snowflake uint64 `db:"snowflake"`
Channel string `db:"channel"`
Timestamp int64 `db:"timestamp"`
}
type Database struct {
*sqlx.DB
}
func NewDatabase(driver string, connectString string) (*Database, error) {
var connection *sqlx.DB
var err error
switch driver {
case "sqlite3":
connection, err = sqlx.Connect(driver, "file:"+connectString+"?cache=shared")
if err != nil {
return nil, err
}
connection.SetMaxOpenConns(1)
if _, err = connection.Exec(SqliteSchema); err != nil {
return nil, err
}
default:
return nil, errors.New(fmt.Sprintf("Database driver %s not supported right now!", driver))
}
return &Database{connection}, err
}
func (db *Database) GetNewestTweet(channel string) (*Tweet, error) {
tweet := Tweet{}
err := db.Get(&tweet, "SELECT * FROM tweet WHERE channel=$1 ORDER BY timestamp DESC, snowflake DESC LIMIT 1", channel)
if err != nil {
return nil, err
}
return &tweet, nil
}
func (db *Database) GetTweets(channel string) ([]*Tweet, error) {
tweet := []*Tweet{}
err := db.Select(&tweet, "SELECT * FROM tweet WHERE channel=$1 ORDER BY timestamp DESC, snowflake DESC", channel)
if err != nil {
return nil, err
}
return tweet, nil
}
func (db *Database) ContainsTweet(channel string, tweet *ts.Tweet) (bool, error) {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return false, err
}
t := Tweet{}
rows, err := db.Queryx("SELECT * FROM tweet WHERE channel=$1 ORDER BY timestamp DESC, snowflake DESC", channel)
if err != nil {
return false, err
}
for rows.Next() {
err := rows.StructScan(&t)
if err != nil {
return false, err
}
if t.Snowflake == snowflake {
return true, nil
}
}
return false, nil
}
func (db *Database) InsertTweet(channel string, tweet *ts.Tweet) error {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return err
}
_, dberr := db.NamedExec("INSERT INTO tweet (snowflake, channel, timestamp) VALUES (:snowflake, :channel, :timestamp)", &Tweet{0, snowflake, channel, tweet.Timestamp})
if dberr != nil {
return err
}
return nil
}
func (db *Database) PruneOldestTweets(channel string) error {
var count int
err := db.Get(&count, "SELECT COUNT(*) FROM tweet WHERE channel=$1", channel)
if err != nil {
return err
}
if count > KeepTweets {
tx, err := db.Beginx()
if err != nil {
tx.Rollback()
return err
}
rows, err := tx.Queryx("SELECT tweet_id from tweet WHERE channel=$1 ORDER by timestamp ASC, snowflake ASC LIMIT $2", channel, count-KeepTweets)
if err != nil {
tx.Rollback()
return err
}
for rows.Next() {
var i int
err = rows.Scan(&i)
if err != nil {
tx.Rollback()
return err
}
_, err = tx.Exec("DELETE FROM tweet WHERE tweet_id=$1", i)
if err != nil {
tx.Rollback()
return err
}
}
tx.Commit()
}
return nil
}
func FromTweet(channel string, tweet *ts.Tweet) (*Tweet, error) {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return nil, err
}
return &Tweet{0, snowflake, channel, tweet.Timestamp}, nil
}
func (t *Tweet) EqualsTweet(tweet *ts.Tweet) bool {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return false
}
return t.Snowflake == snowflake
}
func (t *Tweet) Equals(tweet *Tweet) bool {
return t.Snowflake == tweet.Snowflake
}

View file

@ -1,54 +0,0 @@
package cmd
import (
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
"testing"
)
const (
testDbPath = "../db/testdb.db"
)
var (
connection *sqlx.DB
)
func setupSuite(t *testing.T) func(t *testing.T) {
conn, err := sqlx.Connect("sqlite3", testDbPath)
if err != nil {
t.Errorf("")
}
connection = conn
return func(t *testing.T) {
conn.Close()
}
}
func setupTest(t *testing.T) func(t *testing.T) {
return func(t *testing.T) {
}
}
func TestGetNewestTweet(t *testing.T) {
}
func TestGetTweets(t *testing.T) {
}
func TestContainsTweet(t *testing.T) {
}
func TestInsertTweet(t *testing.T) {
}
func TestPruneOldestTweets(t *testing.T) {
}

View file

@ -1,4 +1,4 @@
package cmd
package tweeter
import (
"errors"
@ -79,7 +79,7 @@ func (e *Embed) SetColor(color string) (*Embed, error) {
color = strings.Replace(color, "#", "", -1)
colorInt, err := strconv.ParseInt(color, 16, 64)
if err != nil {
return nil, errors.New("Invalid hex code passed")
return nil, errors.New("invalid hex code passed")
}
e.Color = colorInt
return e, nil

View file

@ -1,9 +1,8 @@
package cmd
package tweeter
import (
"context"
"encoding/json"
ts "github.com/imperatrona/twitter-scraper"
"log"
"math/rand"
"net/http"
@ -11,23 +10,23 @@ import (
"os/signal"
"syscall"
"time"
)
var (
configPath = "./config.toml"
dbPath = "./db/tweets.db"
cookiePath = "./db/cookies.json"
"git.snrd.eu/sunred/discord-tweeter/pkg/config"
"git.snrd.eu/sunred/discord-tweeter/pkg/db"
"git.snrd.eu/sunred/discord-tweeter/pkg/web"
ts "github.com/imperatrona/twitter-scraper"
)
const (
ScrapeInterval int = 3 // How often to check for new tweets (in minutes)
ScrapeDelay int64 = 0 // How long to wait between api requests (in seconds)
ScrapeStep int = 10 // How many tweets to get at a time
DefaultConfig = "./config.toml"
)
type App struct {
config *Config
db *Database
config *config.Config
db db.Database
scraper *ts.Scraper
}
@ -38,6 +37,7 @@ func Run() {
log.Fatalln("Too many arguments")
}
configPath := DefaultConfig
if len(args) == 1 {
if args[0] == "" {
log.Fatalln("No config path given")
@ -45,7 +45,7 @@ func Run() {
configPath = args[0]
}
config, err := ConfigFromFile(configPath)
config, err := config.ConfigFromFile(configPath)
if err != nil {
log.Fatalf("There has been an error parsing config file '%s': %s\n", configPath, err.Error())
}
@ -62,15 +62,11 @@ func Run() {
log.Fatalln("Webhook address cannot be empty")
}
if config.DbPath != "" {
dbPath = config.DbPath
if config.UseWebServer && config.HostURL == "" {
log.Fatalln("HostURL cannot be empty")
}
if config.CookiePath != "" {
cookiePath = config.CookiePath
}
db, dberr := NewDatabase("sqlite3", dbPath)
db, dberr := db.New("sqlite3", config.DbPath)
if dberr != nil {
log.Fatalf("An error occurred while creating database connection: %s\n", dberr.Error())
}
@ -86,7 +82,7 @@ func Run() {
}
{
f, err := os.Open(cookiePath)
f, err := os.Open(config.CookiePath)
if err != nil {
log.Println("Cookie file does not yet exist")
} else {
@ -105,14 +101,14 @@ func Run() {
if err != nil {
log.Fatalf("An error occurred during scraper login: %s\n", err.Error())
} else {
log.Printf("New Login - Saving cookies to %s\n", cookiePath)
log.Printf("New Login - Saving cookies to %s\n", config.CookiePath)
js, jsonErr := json.Marshal(scraper.GetCookies())
if jsonErr != nil {
log.Fatalf("An error occurred during cookie serialization: %s\n", jsonErr.Error())
}
f, fErr := os.Create(cookiePath)
f, fErr := os.Create(config.CookiePath)
if fErr != nil {
log.Fatalf("Failed to create cookie file at %s with the following error: %s\n", cookiePath, fErr.Error())
log.Fatalf("Failed to create cookie file at %s with the following error: %s\n", config.CookiePath, fErr.Error())
}
f.Write(js)
writeErr := f.Close()
@ -132,10 +128,19 @@ func Run() {
scraper.WithDelay(ScrapeDelay)
app := App{config, db, scraper}
if config.UseWebServer {
log.Printf("Starting webserver on port %d", config.WebPort)
ws, err := web.New(config, scraper)
if err != nil {
log.Fatalf("An error occurred while starting webserver: %s\n", err.Error())
}
go ws.Server.ListenAndServe()
}
log.Printf("Starting main app with %d workers", len(config.Channels))
app := App{config, db, scraper}
for i, c := range config.Channels {
go app.queryX(i, c)
go app.queryLoop(i, c)
}
sigs := make(chan os.Signal, 1)
@ -152,7 +157,7 @@ func Run() {
log.Println("Exiting...")
}
func (app *App) queryX(id int, channel string) {
func (app App) queryLoop(id int, channel string) {
log.Printf("Starting worker %d for channel %s", id, channel)
// Sleep to stagger api queries of workers
time.Sleep(time.Duration(id) * time.Minute)
@ -237,7 +242,7 @@ ScrapeLoop:
tweetsToPost = append(tweetsToPost, tweet)
}
sendToWebhook(app.config.Webhook, tweetsToPost)
app.SendToWebhook(tweetsToPost)
err := db.PruneOldestTweets(channel)
if err != nil {
log.Printf("Error while pruning old tweets for channel %s: %s", channel, err.Error())
@ -246,6 +251,10 @@ ScrapeLoop:
}
func filterTweet(filter uint8, tweet *ts.Tweet) bool {
return filterByte(filter, tweet) > 0
}
func filterByte(filter uint8, tweet *ts.Tweet) uint8 {
var tweetFilter uint8 = 0
filterMap := []bool{tweet.IsSelfThread, tweet.IsRetweet, tweet.IsReply, tweet.IsPin, tweet.IsQuoted}
for _, f := range filterMap {
@ -255,5 +264,5 @@ func filterTweet(filter uint8, tweet *ts.Tweet) bool {
}
}
return filter&tweetFilter > 0
return filter & tweetFilter
}

View file

@ -0,0 +1,43 @@
package tweeter
import (
"testing"
ts "github.com/imperatrona/twitter-scraper"
)
func TestFilter(t *testing.T) {
filter1 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b10111, 0b10101, 0b01101, 0b10101}
filter2 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b01000, 0b01010, 0b01000, 0b11011}
filterR := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b00000, 0b00000, 0b01000, 0b10001}
filterB := []bool{true, true, false, true, true, true, false, false, true, true}
for i, filterBool := range filterB {
filterResult := filterR[i]
filterCheck := filter2[i]
filterQuoted := (filterCheck & 1) != 0 // first bit
filterPin := (filterCheck & 2) != 0 // second bit
filterReply := (filterCheck & 4) != 0 // third bit
filterRetweet := (filterCheck & 8) != 0 // fourth bit
filterSelfThread := (filterCheck & 16) != 0 // fifth bit
tweet := ts.Tweet{
IsSelfThread: filterSelfThread,
IsRetweet: filterRetweet,
IsReply: filterReply,
IsPin: filterPin,
IsQuoted: filterQuoted,
}
resultBool := filterTweet(filter1[i], &tweet)
if resultBool != filterBool {
t.Errorf("%b AND %b > 0 = %t; got %t\n", filter1[i], filter2[i], filterBool, resultBool)
}
resultByte := filterByte(filter1[i], &tweet)
if resultByte != filterResult {
t.Errorf("%b AND %b = %b; got %b\n", filter1[i], filter2[i], filterResult, resultByte)
}
}
}

View file

@ -1,17 +1,17 @@
package cmd
package tweeter
import (
"bytes"
"encoding/json"
ts "github.com/imperatrona/twitter-scraper"
"log"
"net/http"
"strings"
ts "github.com/imperatrona/twitter-scraper"
//"strconv"
)
const (
BaseURL = "https://twitter.com/"
BaseIcon = "https://abs.twimg.com/icons/apple-touch-icon-192x192.png"
)
@ -30,62 +30,66 @@ type Mention struct {
RepliedUser bool `json:"replied_user,omitempty"`
}
func sendToWebhook(webhookURL string, tweets []*ts.Tweet) {
func (app App) SendToWebhook(tweets []*ts.Tweet) {
for _, tweet := range tweets {
webhooksToSend := []*Webhook{}
data := Webhook{
Content: "<" + tweet.PermanentURL + ">",
Mention: &Mention{Parse: []string{"roles"}},
}
webhooksToSend = append(webhooksToSend, &data)
urlsToAppend := []string{}
userUrl := BaseURL + tweet.Username
if len(tweet.Videos) > 0 {
webhooksToSend = append(webhooksToSend, &Webhook{
Content: "[\u2800](" + app.config.HostURL + "/video/" + tweet.ID + ")",
Mention: &Mention{Parse: []string{"roles"}},
})
}
mainEmbed := data.NewEmbedWithURL(tweet.PermanentURL)
mainEmbed.SetAuthor(tweet.Name+" (@"+tweet.Username+")", userUrl, "https://unavatar.io/twitter/"+tweet.Username)
mainEmbed.SetText(tweet.Text)
mainEmbed.SetAuthor(tweet.Name+" (@"+tweet.Username+")", app.config.HostURL+"/tweet/"+tweet.ID, app.config.HostURL+"/avatar/"+tweet.Username)
mainEmbed.SetColor("#26a7de")
mainEmbed.SetFooter("Twitter", BaseIcon)
mainEmbed.SetTimestamp(tweet.TimeParsed)
//mainEmbed.SetFooter("Twitter", BaseIcon)
//mainEmbed.SetFooter("Twitter • <t:" + strconv.FormatInt((tweet.Timestamp), 10) + ":R>", BaseIcon)
tweetText := tweet.Text
for i, photo := range tweet.Photos {
embed := mainEmbed
if i > 0 {
embed = data.NewEmbedWithURL(tweet.PermanentURL)
}
embed.SetImage(photo.URL)
tweetText = strings.ReplaceAll(tweetText, photo.URL, "")
}
for i, gif := range tweet.GIFs {
embed := mainEmbed
if i > 0 {
embed = data.NewEmbedWithURL(tweet.PermanentURL)
}
embed.SetImage(gif.Preview)
for _, gif := range tweet.GIFs {
//embed := mainEmbed
//if i > 0 {
// embed = data.NewEmbedWithURL(tweet.PermanentURL)
//}
//embed.SetImage(gif.Preview)
tweetText = strings.ReplaceAll(tweetText, gif.Preview, "")
tweetText = strings.ReplaceAll(tweetText, gif.URL, "")
}
for i, video := range tweet.Videos {
embed := mainEmbed
if i > 0 {
embed = data.NewEmbedWithURL(tweet.PermanentURL)
}
// Video embeds are not supported right now
embed.SetImage(video.Preview)
embed.SetVideo(video.URL) // This has sadly no effect
urlsToAppend = append(urlsToAppend, strings.Replace(tweet.PermanentURL, "twitter", "fxtwitter", 1))
for _, video := range tweet.Videos {
//embed := mainEmbed
//if i > 0 {
// embed = data.NewEmbedWithURL(tweet.PermanentURL)
//}
//embed.SetImage(video.Preview)
tweetText = strings.ReplaceAll(tweetText, video.Preview, "")
tweetText = strings.ReplaceAll(tweetText, video.URL, "")
}
err := sendRequest(webhookURL, &data)
if err != nil {
log.Println("Error while sending webhook for tweet %s: %s", tweet.ID, err.Error())
continue
}
mainEmbed.SetText(strings.TrimSpace(tweetText))
for _, url := range urlsToAppend {
err := sendRequest(webhookURL, &Webhook{Content: url})
for _, data := range webhooksToSend {
err := sendRequest(app.config.Webhook, data)
if err != nil {
log.Println("Error while sending webhook for tweet %s: %s", tweet.ID, err.Error())
log.Printf("Error while sending webhook for tweet %s: %s", tweet.ID, err.Error())
continue
}
}
}
}

View file

@ -1,37 +0,0 @@
package cmd
import (
"testing"
)
func TestFilter(t *testing.T) {
filter1 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b10111, 0b10101, 0b01101, 0b10101}
filter2 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b01000, 0b01010, 0b01000, 0b11011}
filterR := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b00000, 0b00000, 0b01000, 0b10001}
for i, filterResult := range filterR {
resultByte := compareBytes(filter1[i], filter2[i])
if resultByte != filterResult {
t.Errorf("%b AND %b = %b; got %b\n", filter1[i], filter2[i], filterResult, resultByte)
}
}
}
func compareBytes(filter uint8, filterCheck uint8) uint8 {
filterQuoted := (filterCheck & 1) != 0 // first bit
filterPin := (filterCheck & 2) != 0 // second bit
filterReply := (filterCheck & 4) != 0 // third bit
filterRetweet := (filterCheck & 8) != 0 // fourth bit
filterSelfThread := (filterCheck & 16) != 0 // fifth bit
var tweetFilter uint8 = 0
filterMap := []bool{filterSelfThread, filterRetweet, filterReply, filterPin, filterQuoted}
for _, f := range filterMap {
tweetFilter <<= 1
if f {
tweetFilter |= 1
}
}
return filter & tweetFilter
}