feat: Major refactor, implement web, caching, better tests and build files

* Update golang version to 1.24
* Update multiarch Dockerfile to be more ISA agnostic
* Refactor existing code and properly structure project into modules
* Get rid of global variables except where necessary (go:embed)
* Add default values to Config
* Add webserver with templates to finally correctly serve videos and gifs
* Add tiny caching library to decrease api load and improve latency
* Improve Webhook data preparation by filtering out redundant links
  from the tweet text and properly attaching videos and gifs in separate
  webhook request by utilising new webserver
* Improve tests for filter function
* Improve bake definition for easier CI integration
This commit is contained in:
Manuel 2025-03-18 19:22:00 +01:00
parent 7562b86894
commit 21d580d1a6
Signed by: Manuel
GPG key ID: 4085037435E1F07A
24 changed files with 752 additions and 209 deletions

116
cmd/tweeter/embed.go Normal file
View file

@ -0,0 +1,116 @@
package tweeter
import (
"errors"
"strconv"
"strings"
"time"
)
type Embed struct {
Author Author `json:"author,omitempty"`
Title string `json:"title,omitempty"`
URL string `json:"url,omitempty"`
Description string `json:"description,omitempty"`
Timestamp string `json:"timestamp,omitempty"`
Color int64 `json:"color,omitempty"`
Fields []Field `json:"fields,omitempty"`
Thumbnail Image `json:"thumbnail,omitempty"`
Image Image `json:"image,omitempty"`
Video Video `json:"video,omitempty"`
Footer Footer `json:"footer,omitempty"`
}
type Author struct {
Name string `json:"name"`
URL string `json:"url"`
IconURL string `json:"icon_url"`
}
type Field struct {
Name string `json:"name"`
Value string `json:"value"`
Inline bool `json:"inline,omitempty"`
}
type Footer struct {
Text string `json:"text"`
IconURL string `json:"icon_url,omitempty"`
}
type Image struct {
URL string `json:"url"`
}
type Video struct {
URL string `json:"url"`
}
func (w *Webhook) NewEmbedWithURL(URL string) *Embed {
emb := Embed{URL: URL}
w.Embeds = append(w.Embeds, &emb)
return &emb
}
func (w *Webhook) NewEmbed() *Embed {
emb := Embed{}
w.Embeds = append(w.Embeds, &emb)
return &emb
}
func (e *Embed) SetTitle(Title string) *Embed {
e.Title = Title
return e
}
func (e *Embed) SetText(Description string) *Embed {
e.Description = Description
return e
}
func (e *Embed) SetAuthor(Name, URL, IconURL string) *Embed {
e.Author = Author{Name, URL, IconURL}
return e
}
func (e *Embed) SetColor(color string) (*Embed, error) {
color = strings.Replace(color, "0x", "", -1)
color = strings.Replace(color, "0X", "", -1)
color = strings.Replace(color, "#", "", -1)
colorInt, err := strconv.ParseInt(color, 16, 64)
if err != nil {
return nil, errors.New("invalid hex code passed")
}
e.Color = colorInt
return e, nil
}
func (e *Embed) SetThumbnail(URL string) *Embed {
e.Thumbnail = Image{URL}
return e
}
func (e *Embed) SetImage(URL string) *Embed {
e.Image = Image{URL}
return e
}
func (e *Embed) SetVideo(URL string) *Embed {
e.Video = Video{URL}
return e
}
func (e *Embed) SetFooter(Text, IconURL string) *Embed {
e.Footer = Footer{Text, IconURL}
return e
}
func (e *Embed) SetTimestamp(timestamp time.Time) *Embed {
e.Timestamp = timestamp.Format(time.RFC3339)
return e
}
func (e *Embed) AddField(Name, Value string, Inline bool) *Embed {
e.Fields = append(e.Fields, Field{Name, Value, Inline})
return e
}

268
cmd/tweeter/tweeter.go Normal file
View file

@ -0,0 +1,268 @@
package tweeter
import (
"context"
"encoding/json"
"log"
"math/rand"
"net/http"
"os"
"os/signal"
"syscall"
"time"
"git.snrd.eu/sunred/discord-tweeter/pkg/config"
"git.snrd.eu/sunred/discord-tweeter/pkg/db"
"git.snrd.eu/sunred/discord-tweeter/pkg/web"
ts "github.com/imperatrona/twitter-scraper"
)
const (
ScrapeInterval int = 3 // How often to check for new tweets (in minutes)
ScrapeDelay int64 = 0 // How long to wait between api requests (in seconds)
ScrapeStep int = 10 // How many tweets to get at a time
DefaultConfig = "./config.toml"
)
type App struct {
config *config.Config
db db.Database
scraper *ts.Scraper
}
func Run() {
args := os.Args[1:]
if len(args) > 1 {
log.Fatalln("Too many arguments")
}
configPath := DefaultConfig
if len(args) == 1 {
if args[0] == "" {
log.Fatalln("No config path given")
}
configPath = args[0]
}
config, err := config.ConfigFromFile(configPath)
if err != nil {
log.Fatalf("There has been an error parsing config file '%s': %s\n", configPath, err.Error())
}
if len(config.Channels) == 0 {
log.Fatalln("List of channels cannot be empty")
}
if len(config.Channels) != len(config.Filter) {
log.Fatalln("List of filters has to be same length as channel list")
}
if config.Webhook == "" {
log.Fatalln("Webhook address cannot be empty")
}
if config.UseWebServer && config.HostURL == "" {
log.Fatalln("HostURL cannot be empty")
}
db, dberr := db.New("sqlite3", config.DbPath)
if dberr != nil {
log.Fatalf("An error occurred while creating database connection: %s\n", dberr.Error())
}
defer db.Close()
scraper := ts.New()
if config.ProxyAddr != "" {
err := scraper.SetProxy(config.ProxyAddr)
if err != nil {
log.Fatalf("An error occurred with proxy connection: %s\n", err.Error())
}
}
{
f, err := os.Open(config.CookiePath)
if err != nil {
log.Println("Cookie file does not yet exist")
} else {
var cookies []*http.Cookie
json.NewDecoder(f).Decode(&cookies)
scraper.SetCookies(cookies)
}
}
if scraper.IsLoggedIn() {
log.Println("We're already logged in, skipping login...")
} else {
scraper.ClearCookies()
if len(config.Username) > 0 {
err := scraper.Login(config.Username, config.Password)
if err != nil {
log.Fatalf("An error occurred during scraper login: %s\n", err.Error())
} else {
log.Printf("New Login - Saving cookies to %s\n", config.CookiePath)
js, jsonErr := json.Marshal(scraper.GetCookies())
if jsonErr != nil {
log.Fatalf("An error occurred during cookie serialization: %s\n", jsonErr.Error())
}
f, fErr := os.Create(config.CookiePath)
if fErr != nil {
log.Fatalf("Failed to create cookie file at %s with the following error: %s\n", config.CookiePath, fErr.Error())
}
f.Write(js)
writeErr := f.Close()
if writeErr != nil {
log.Fatalf("An error occurred on closing cookie file: %s\n", writeErr.Error())
}
}
} else {
log.Println("Trying open account login... ")
_, err := scraper.LoginOpenAccount() // TODO: save openaccount token/secret
if err != nil {
log.Fatalf("An error occurred during scraper login: %s\n", err.Error())
}
defer scraper.Logout()
}
}
scraper.WithDelay(ScrapeDelay)
if config.UseWebServer {
log.Printf("Starting webserver on port %d", config.WebPort)
ws, err := web.New(config, scraper)
if err != nil {
log.Fatalf("An error occurred while starting webserver: %s\n", err.Error())
}
go ws.Server.ListenAndServe()
}
log.Printf("Starting main app with %d workers", len(config.Channels))
app := App{config, db, scraper}
for i, c := range config.Channels {
go app.queryLoop(i, c)
}
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
done := make(chan bool, 1)
go func() {
sig := <-sigs
log.Println(sig)
done <- true
}()
<-done
log.Println("Exiting...")
}
func (app App) queryLoop(id int, channel string) {
log.Printf("Starting worker %d for channel %s", id, channel)
// Sleep to stagger api queries of workers
time.Sleep(time.Duration(id) * time.Minute)
db := app.db
filter := app.config.Filter[id]
init := true
ScrapeLoop:
for {
if !init {
// Sleep for set interval +-30 seconds
time.Sleep(time.Duration(ScrapeInterval)*time.Minute + time.Duration(rand.Intn(60)-30)*time.Second)
}
init = false
step := ScrapeStep
tweets := []*ts.Tweet{}
tweetsToParse := []*ts.Tweet{}
tweetsToPost := []*ts.Tweet{}
dbTweets, dbErr := db.GetTweets(channel)
if dbErr != nil {
log.Printf("Error while retrieving tweets from database for channel %s: %s", channel, dbErr.Error())
continue ScrapeLoop
}
GetTweets:
for {
for tweet := range app.scraper.GetTweets(context.Background(), channel, step) {
if tweet.Error != nil {
log.Printf("Error while retrieving tweet for channel %s: %s", channel, tweet.Error.Error())
continue ScrapeLoop
}
tweets = append(tweets, &tweet.Tweet)
}
if len(tweets) == 0 {
break GetTweets
}
if len(dbTweets) > 0 {
for _, dbTweet := range dbTweets {
for i, tweet := range tweets {
if dbTweet.EqualsTweet(tweet) {
tweetsToParse = tweets[:i]
break GetTweets
}
}
}
} else {
tweetsToParse = append(tweetsToParse, tweets[0])
break GetTweets
}
if step >= 50 {
tweetsToParse = append(tweetsToParse, tweets[0])
break GetTweets
} else if step+ScrapeStep > 50 {
step = 50
} else {
step += ScrapeStep
}
log.Printf("Fetching more tweets for %s...", channel)
time.Sleep(time.Duration(3) * time.Second) // Wait a few seconds for next api request
}
ParseTweets:
// We want to parse old to new
for i := len(tweetsToParse) - 1; i >= 0; i-- {
tweet := tweetsToParse[i]
if filterTweet(filter, tweet) {
// One of the filters applies as same bits are 1, so we skip this tweet
continue ParseTweets
}
err := db.InsertTweet(channel, tweet)
if err != nil {
log.Printf("Error while inserting tweet for channel %s into the database: %s", channel, err.Error())
continue ParseTweets
}
tweetsToPost = append(tweetsToPost, tweet)
}
app.SendToWebhook(tweetsToPost)
err := db.PruneOldestTweets(channel)
if err != nil {
log.Printf("Error while pruning old tweets for channel %s: %s", channel, err.Error())
}
}
}
func filterTweet(filter uint8, tweet *ts.Tweet) bool {
return filterByte(filter, tweet) > 0
}
func filterByte(filter uint8, tweet *ts.Tweet) uint8 {
var tweetFilter uint8 = 0
filterMap := []bool{tweet.IsSelfThread, tweet.IsRetweet, tweet.IsReply, tweet.IsPin, tweet.IsQuoted}
for _, f := range filterMap {
tweetFilter <<= 1
if f {
tweetFilter |= 1
}
}
return filter & tweetFilter
}

View file

@ -0,0 +1,43 @@
package tweeter
import (
"testing"
ts "github.com/imperatrona/twitter-scraper"
)
func TestFilter(t *testing.T) {
filter1 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b10111, 0b10101, 0b01101, 0b10101}
filter2 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b01000, 0b01010, 0b01000, 0b11011}
filterR := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b00000, 0b00000, 0b01000, 0b10001}
filterB := []bool{true, true, false, true, true, true, false, false, true, true}
for i, filterBool := range filterB {
filterResult := filterR[i]
filterCheck := filter2[i]
filterQuoted := (filterCheck & 1) != 0 // first bit
filterPin := (filterCheck & 2) != 0 // second bit
filterReply := (filterCheck & 4) != 0 // third bit
filterRetweet := (filterCheck & 8) != 0 // fourth bit
filterSelfThread := (filterCheck & 16) != 0 // fifth bit
tweet := ts.Tweet{
IsSelfThread: filterSelfThread,
IsRetweet: filterRetweet,
IsReply: filterReply,
IsPin: filterPin,
IsQuoted: filterQuoted,
}
resultBool := filterTweet(filter1[i], &tweet)
if resultBool != filterBool {
t.Errorf("%b AND %b > 0 = %t; got %t\n", filter1[i], filter2[i], filterBool, resultBool)
}
resultByte := filterByte(filter1[i], &tweet)
if resultByte != filterResult {
t.Errorf("%b AND %b = %b; got %b\n", filter1[i], filter2[i], filterResult, resultByte)
}
}
}

109
cmd/tweeter/webhook.go Normal file
View file

@ -0,0 +1,109 @@
package tweeter
import (
"bytes"
"encoding/json"
"log"
"net/http"
"strings"
ts "github.com/imperatrona/twitter-scraper"
//"strconv"
)
const (
BaseIcon = "https://abs.twimg.com/icons/apple-touch-icon-192x192.png"
)
type Webhook struct {
Content string `json:"content"`
Name string `json:"username,omitempty"`
Avatar string `json:"avatar_url,omitempty"`
Mention *Mention `json:"allowed_mentions,omitempty"`
Embeds []*Embed `json:"embeds,omitempty"`
}
type Mention struct {
Parse []string `json:"parse,omitempty"`
Roles []string `json:"roles,omitempty"`
Users []string `json:"users,omitempty"`
RepliedUser bool `json:"replied_user,omitempty"`
}
func (app App) SendToWebhook(tweets []*ts.Tweet) {
for _, tweet := range tweets {
webhooksToSend := []*Webhook{}
data := Webhook{
Mention: &Mention{Parse: []string{"roles"}},
}
webhooksToSend = append(webhooksToSend, &data)
if len(tweet.Videos) > 0 {
webhooksToSend = append(webhooksToSend, &Webhook{
Content: "[\u2800](" + app.config.HostURL + "/video/" + tweet.ID + ")",
Mention: &Mention{Parse: []string{"roles"}},
})
}
mainEmbed := data.NewEmbedWithURL(tweet.PermanentURL)
mainEmbed.SetAuthor(tweet.Name+" (@"+tweet.Username+")", app.config.HostURL+"/tweet/"+tweet.ID, app.config.HostURL+"/avatar/"+tweet.Username)
mainEmbed.SetColor("#26a7de")
mainEmbed.SetTimestamp(tweet.TimeParsed)
//mainEmbed.SetFooter("Twitter", BaseIcon)
//mainEmbed.SetFooter("Twitter • <t:" + strconv.FormatInt((tweet.Timestamp), 10) + ":R>", BaseIcon)
tweetText := tweet.Text
for i, photo := range tweet.Photos {
embed := mainEmbed
if i > 0 {
embed = data.NewEmbedWithURL(tweet.PermanentURL)
}
embed.SetImage(photo.URL)
tweetText = strings.ReplaceAll(tweetText, photo.URL, "")
}
for _, gif := range tweet.GIFs {
//embed := mainEmbed
//if i > 0 {
// embed = data.NewEmbedWithURL(tweet.PermanentURL)
//}
//embed.SetImage(gif.Preview)
tweetText = strings.ReplaceAll(tweetText, gif.Preview, "")
tweetText = strings.ReplaceAll(tweetText, gif.URL, "")
}
for _, video := range tweet.Videos {
//embed := mainEmbed
//if i > 0 {
// embed = data.NewEmbedWithURL(tweet.PermanentURL)
//}
//embed.SetImage(video.Preview)
tweetText = strings.ReplaceAll(tweetText, video.Preview, "")
tweetText = strings.ReplaceAll(tweetText, video.URL, "")
}
mainEmbed.SetText(strings.TrimSpace(tweetText))
for _, data := range webhooksToSend {
err := sendRequest(app.config.Webhook, data)
if err != nil {
log.Printf("Error while sending webhook for tweet %s: %s", tweet.ID, err.Error())
continue
}
}
}
}
func sendRequest(url string, data *Webhook) error {
jsonData, err := json.Marshal(data)
if err != nil {
return err
}
resp, err := http.Post(url, "application/json", bytes.NewBuffer(jsonData))
if err != nil {
return err
}
defer resp.Body.Close()
return nil
}