feat: Major refactor, implement web, caching, better tests and build files

* Update golang version to 1.24
* Update multiarch Dockerfile to be more ISA agnostic
* Refactor existing code and properly structure project into modules
* Get rid of global variables except where necessary (go:embed)
* Add default values to Config
* Add webserver with templates to finally correctly serve videos and gifs
* Add tiny caching library to decrease api load and improve latency
* Improve Webhook data preparation by filtering out redundant links
  from the tweet text and properly attaching videos and gifs in separate
  webhook request by utilising new webserver
* Improve tests for filter function
* Improve bake definition for easier CI integration
This commit is contained in:
Manuel 2025-03-18 19:22:00 +01:00
parent 7562b86894
commit 21d580d1a6
Signed by: Manuel
GPG key ID: 4085037435E1F07A
24 changed files with 752 additions and 209 deletions

52
pkg/cache/cache.go vendored Normal file
View file

@ -0,0 +1,52 @@
package cache
import (
"sync"
"time"
)
type Cache[K comparable, V any] struct {
data map[K]entry[K, V]
lock sync.RWMutex
}
type entry[K comparable, V any] struct {
value V
expiration time.Time
}
func New[K comparable, V any]() *Cache[K, V] {
return &Cache[K, V]{
data: make(map[K]entry[K, V]),
}
}
func (c *Cache[K, V]) Set(key K, value V, ttl time.Duration) {
c.lock.Lock()
defer c.lock.Unlock()
c.data[key] = entry[K, V]{
value: value,
expiration: time.Now().Add(ttl),
}
}
func (c *Cache[K, V]) Get(key K) (V, bool) {
c.lock.RLock()
defer c.lock.RUnlock()
entry, ok := c.data[key]
if !ok || time.Now().After(entry.expiration) {
delete(c.data, key)
var zero V
return zero, false
}
return entry.value, true
}
func (c *Cache[K, V]) Delete(key K) {
c.lock.Lock()
defer c.lock.Unlock()
delete(c.data, key)
}

145
pkg/cache/cache_test.go vendored Normal file
View file

@ -0,0 +1,145 @@
package cache
import (
"fmt"
"reflect"
"sync"
"testing"
"time"
)
func TestCacheBasicOperations(t *testing.T) {
cache := New[string, int]()
// Test Set and Get
cache.Set("key1", 42, 10*time.Second)
value, exists := cache.Get("key1")
if !exists {
t.Errorf("expected key to exist")
}
if value != 42 {
t.Errorf("expected value 42, got %v", value)
}
// Test non-existent key
value, exists = cache.Get("nonexistent")
if exists {
t.Errorf("expected key to not exist")
}
if value != 0 {
t.Errorf("expected zero value, got %v", value)
}
}
func TestCacheExpiration(t *testing.T) {
cache := New[string, int]()
// Set value with very short TTL
cache.Set("short-lived", 42, 100*time.Millisecond)
// Verify initial existence
value, exists := cache.Get("short-lived")
if !exists {
t.Errorf("expected key to exist initially")
}
if value != 42 {
t.Errorf("expected value 42, got %v", value)
}
// Wait for expiration
time.Sleep(150 * time.Millisecond)
// Verify expiration
value, exists = cache.Get("short-lived")
if exists {
t.Errorf("expected key to have expired")
}
if value != 0 {
t.Errorf("expected zero value, got %v", value)
}
}
func TestCacheConcurrentAccess(t *testing.T) {
cache := New[string, int]()
// Start multiple writers
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
cache.Set(fmt.Sprintf("key%d", i), i*2, 10*time.Second)
}(i)
}
// Start multiple readers
for i := 0; i < 10; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
cache.Get(fmt.Sprintf("key%d", i))
}(i)
}
wg.Wait()
// Verify all values were written correctly
for i := 0; i < 10; i++ {
value, exists := cache.Get(fmt.Sprintf("key%d", i))
if !exists {
t.Errorf("expected key%d to exist", i)
}
if value != (i * 2) {
t.Errorf("expected value %d for key%d, got %d", i*2, i, value)
}
}
}
func TestCacheGenericTypes(t *testing.T) {
// Test with string values
strCache := New[int, string]()
strCache.Set(1, "hello", 10*time.Second)
value, exists := strCache.Get(1)
if !exists {
t.Errorf("expected key to exist")
}
if value != "hello" {
t.Errorf("expected value hello, got %v", value)
}
// Test with struct values
type Person struct {
Name string
Age int
}
personCache := New[string, Person]()
personCache.Set("john", Person{"John", 30}, 10*time.Second)
person, exists := personCache.Get("john")
if !exists {
t.Errorf("expected key to exist")
}
expected := Person{"John", 30}
if !reflect.DeepEqual(person, expected) {
t.Errorf("expected %+v, got %+v", expected, person)
}
}
func TestCacheDelete(t *testing.T) {
cache := New[string, int]()
// Set and verify
cache.Set("key1", 42, 10*time.Second)
_, exists := cache.Get("key1")
if !exists {
t.Errorf("expected key to exist")
}
// Delete
cache.Delete("key1")
// Verify deletion
_, exists = cache.Get("key1")
if exists {
t.Errorf("expected key to not exist after deletion")
}
}

44
pkg/config/config.go Normal file
View file

@ -0,0 +1,44 @@
package config
import (
"os"
"github.com/BurntSushi/toml"
)
type Config struct {
Username string
Password string
ProxyAddr string
Channels []string
Filter []uint8
Webhook string
DbPath string
CookiePath string
UseWebServer bool
HostURL string
WebPort uint16
UserAgents []string
NitterBase string
}
func ConfigFromFile(filePath string) (*Config, error) {
conf := &Config{
// Default values
DbPath: "./data/tweets.db",
CookiePath: "./data/cookies.json",
UseWebServer: true,
WebPort: 8080,
UserAgents: []string{"discordbot", "curl", "httpie", "lwp-request", "wget", "python-requests", "openbsd ftp", "powershell"},
NitterBase: "https://xcancel.com",
}
tomlData, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
_, err = toml.Decode(string(tomlData), conf)
return conf, err
}

171
pkg/db/database.go Normal file
View file

@ -0,0 +1,171 @@
package db
import (
"fmt"
"strconv"
ts "github.com/imperatrona/twitter-scraper"
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
)
const (
SqliteSchema = `
CREATE TABLE IF NOT EXISTS tweet (
tweet_id INTEGER PRIMARY KEY AUTOINCREMENT,
snowflake SQLITE_UINT64_TYPE NOT NULL UNIQUE,
channel VARCHAR(15) NOT NULL,
timestamp SQLITE_INT64_TYPE NOT NULL
);
`
KeepTweets int = 10 // How many tweets to keep in database before pruning
)
type Tweet struct {
TweetId int `db:"tweet_id"`
Snowflake uint64 `db:"snowflake"`
Channel string `db:"channel"`
Timestamp int64 `db:"timestamp"`
}
type Database struct {
*sqlx.DB
}
func New(driver string, connectString string) (Database, error) {
var connection *sqlx.DB
var err error
switch driver {
case "sqlite3":
connection, err = sqlx.Connect(driver, "file:"+connectString+"?cache=shared")
if err != nil {
return Database{}, err
}
connection.SetMaxOpenConns(1)
if _, err = connection.Exec(SqliteSchema); err != nil {
return Database{}, err
}
default:
return Database{}, fmt.Errorf("database driver %s not supported right now", driver)
}
return Database{connection}, err
}
func (db Database) GetNewestTweet(channel string) (*Tweet, error) {
tweet := Tweet{}
err := db.Get(&tweet, "SELECT * FROM tweet WHERE channel=$1 ORDER BY timestamp DESC, snowflake DESC LIMIT 1", channel)
if err != nil {
return nil, err
}
return &tweet, nil
}
func (db Database) GetTweets(channel string) ([]*Tweet, error) {
tweet := []*Tweet{}
err := db.Select(&tweet, "SELECT * FROM tweet WHERE channel=$1 ORDER BY timestamp DESC, snowflake DESC", channel)
if err != nil {
return nil, err
}
return tweet, nil
}
func (db Database) ContainsTweet(channel string, tweet *ts.Tweet) (bool, error) {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return false, err
}
t := Tweet{}
rows, err := db.Queryx("SELECT * FROM tweet WHERE channel=$1 ORDER BY timestamp DESC, snowflake DESC", channel)
if err != nil {
return false, err
}
for rows.Next() {
err := rows.StructScan(&t)
if err != nil {
return false, err
}
if t.Snowflake == snowflake {
return true, nil
}
}
return false, nil
}
func (db Database) InsertTweet(channel string, tweet *ts.Tweet) error {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return err
}
_, dberr := db.NamedExec("INSERT INTO tweet (snowflake, channel, timestamp) VALUES (:snowflake, :channel, :timestamp)", &Tweet{0, snowflake, channel, tweet.Timestamp})
if dberr != nil {
return err
}
return nil
}
func (db Database) PruneOldestTweets(channel string) error {
var count int
err := db.Get(&count, "SELECT COUNT(*) FROM tweet WHERE channel=$1", channel)
if err != nil {
return err
}
if count > KeepTweets {
tx, err := db.Beginx()
if err != nil {
tx.Rollback()
return err
}
rows, err := tx.Queryx("SELECT tweet_id from tweet WHERE channel=$1 ORDER by timestamp ASC, snowflake ASC LIMIT $2", channel, count-KeepTweets)
if err != nil {
tx.Rollback()
return err
}
for rows.Next() {
var i int
err = rows.Scan(&i)
if err != nil {
tx.Rollback()
return err
}
_, err = tx.Exec("DELETE FROM tweet WHERE tweet_id=$1", i)
if err != nil {
tx.Rollback()
return err
}
}
tx.Commit()
}
return nil
}
func FromTweet(channel string, tweet *ts.Tweet) (*Tweet, error) {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return nil, err
}
return &Tweet{0, snowflake, channel, tweet.Timestamp}, nil
}
func (t Tweet) EqualsTweet(tweet *ts.Tweet) bool {
snowflake, err := strconv.ParseUint(tweet.ID, 10, 64)
if err != nil {
return false
}
return t.Snowflake == snowflake
}
func (t Tweet) Equals(tweet *Tweet) bool {
return t.Snowflake == tweet.Snowflake
}

55
pkg/db/database_test.go Normal file
View file

@ -0,0 +1,55 @@
package db
import (
"testing"
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
)
const (
testDbPath = "../data/testdb.db"
)
var (
connection *sqlx.DB
)
func setupSuite(t *testing.T) func(t *testing.T) {
conn, err := sqlx.Connect("sqlite3", testDbPath)
if err != nil {
t.Errorf("")
}
connection = conn
return func(t *testing.T) {
conn.Close()
}
}
func setupTest(t *testing.T) func(t *testing.T) {
return func(t *testing.T) {
}
}
func TestGetNewestTweet(t *testing.T) {
}
func TestGetTweets(t *testing.T) {
}
func TestContainsTweet(t *testing.T) {
}
func TestInsertTweet(t *testing.T) {
}
func TestPruneOldestTweets(t *testing.T) {
}

View file

@ -0,0 +1,35 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="theme-color" content="#26a7de">
<link rel="canonical" href="{{ .URL }}">
<meta property="twitter:site" content="{{ .Username }}">
<meta property="twitter:creator" content="{{ .Username }}">
<meta property="twitter:title" content="{{ .Title }}">
{{- range $idx, $e := .Images }}
<meta property="twitter:image" content="{{ $e }}">
<meta property="og:image" content="{{ $e }}">
{{- end }}
{{- range $idx, $e := .Videos }}
<meta property="twitter:player:stream" content="{{ $e }}">
<meta property="og:video" content="{{ $e }}">
{{- end }}
{{- range $idx, $e := .Previews }}
<meta property="twitter:image" content="0">
<meta property="og:image" content="{{ $e }}">
{{- end }}
<meta property="twitter:card" content="{{ .Format }}">
<meta property="og:type" content="article">
<meta property="og:article:published_time" content="{{ .Timestamp }}">
<meta property="og:url" content="{{ .Link }}">
<meta property="og:title" content="{{ .Title }}">
<meta property="og:description" content="{{ .Text }}">
<style>
body, html { margin: 0; padding: 0; height: 100%; width: 100%; }
body { display: flex; }
</style>
<body>

View file

@ -0,0 +1,32 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="theme-color" content="#26a7de">
<link rel="canonical" href="{{ .URL }}">
{{- if .Videos }}
<meta property="twitter:site" content="{{ .Username }}">
<meta property="twitter:creator" content="{{ .Username }}">
{{- range $idx, $e := .Videos }}
<meta property="twitter:player:stream" content="{{ $e }}">
<meta property="og:video" content="{{ $e }}">
{{- end }}
{{- range $idx, $e := .Previews }}
<meta property="twitter:image" content="0">
<meta property="og:image" content="{{ $e }}">
{{- end }}
<meta property="twitter:card" content="{{ .Format }}">
<meta property="og:site_name" content="Video attached due to embed limitations">
<meta property="og:type" content="article">
<meta property="og:article:published_time" content="{{ .Timestamp }}">
<meta property="og:url" content="{{ .Link }}">
<meta property="og:title" content="&#x200B;">
{{- end }}
<style>
body, html { margin: 0; padding: 0; height: 100%; width: 100%; }
body { display: flex; }
</style>
<body>

211
pkg/web/web.go Normal file
View file

@ -0,0 +1,211 @@
package web
import (
"bytes"
"embed"
"fmt"
"html/template"
"net/http"
"slices"
"strings"
"time"
"git.snrd.eu/sunred/discord-tweeter/pkg/cache"
"git.snrd.eu/sunred/discord-tweeter/pkg/config"
ts "github.com/imperatrona/twitter-scraper"
)
const (
AvatarCacheTime = 60 * time.Minute // How long to serve same avatar from cache before doing a fresh scrape
TweetCacheTime = 10 * time.Minute // How long to serve same tweet from cache before doing a fresh scrape
)
//go:embed templates/*.html
var templateFiles embed.FS
type Reponse struct {
StatusCode int
ContentType string
Content string
}
type WebServer struct {
config *config.Config
scraper *ts.Scraper
templates *template.Template
avatarCache *cache.Cache[string, string]
responseCache *cache.Cache[string, Reponse]
Server *http.Server
}
type Tweet struct {
URL string
Link string
Title string
Text string
Username string
Timestamp string
Format string
Images []string
Videos []string
Previews []string
}
func New(config *config.Config, scraper *ts.Scraper) (*WebServer, error) {
sm := http.NewServeMux()
tmpl, err := template.ParseFS(templateFiles, "templates/*.html")
if err != nil {
return nil, err
}
ws := &WebServer{
config,
scraper,
tmpl,
cache.New[string, string](),
cache.New[string, Reponse](),
&http.Server{
Handler: sm,
Addr: fmt.Sprintf(":%d", config.WebPort),
ReadTimeout: 30 * time.Second,
WriteTimeout: 30 * time.Second,
},
}
sm.HandleFunc("GET /avatar/{username}", ws.handleAvatar)
sm.HandleFunc("GET /tweet/{id}", ws.handleTweet)
sm.HandleFunc("GET /video/{id}", ws.handleVideo)
return ws, nil
}
func (ws WebServer) handleAvatar(w http.ResponseWriter, r *http.Request) {
username := r.PathValue("username")
if !slices.Contains(ws.config.Channels, username) {
badRequest(w)
return
}
url, cached := ws.avatarCache.Get("avatar-" + username)
if cached {
http.Redirect(w, r, url, http.StatusPermanentRedirect)
return
}
profile, err := ws.scraper.GetProfile(username)
if err != nil {
serverError(w)
return
}
ws.avatarCache.Set("avatar-"+username, profile.Avatar, AvatarCacheTime)
http.Redirect(w, r, profile.Avatar, http.StatusPermanentRedirect)
}
func (ws WebServer) handleTweet(w http.ResponseWriter, r *http.Request) {
ws.handleTemplate(w, r, r.PathValue("id"), "tweet")
}
func (ws WebServer) handleVideo(w http.ResponseWriter, r *http.Request) {
ws.handleTemplate(w, r, r.PathValue("id"), "video")
}
func (ws WebServer) handleTemplate(w http.ResponseWriter, r *http.Request, id string, template string) {
if !validUserAgent(r.UserAgent(), ws.config.UserAgents) {
http.Redirect(w, r, ws.config.NitterBase+"/i/status/"+id, http.StatusPermanentRedirect)
return
}
entry, cached := ws.responseCache.Get(template + "-" + id)
if cached {
response(w, entry)
return
}
tweet, err := ws.scraper.GetTweet(id)
if err != nil {
serverError(w)
return
}
if !slices.Contains(ws.config.Channels, tweet.Username) {
res := Reponse{http.StatusBadRequest, "text/plain", "Bad Request"}
ws.responseCache.Set(template+"-"+id, res, TweetCacheTime)
response(w, res)
return
}
tweetText := tweet.Text
var images []string
var videos []string
var previews []string
for _, photo := range tweet.Photos {
images = append(images, photo.URL)
tweetText = strings.ReplaceAll(tweetText, photo.URL, "")
}
for _, gif := range tweet.GIFs {
videos = append(images, gif.URL)
previews = append(images, gif.Preview)
tweetText = strings.ReplaceAll(tweetText, gif.URL, "")
tweetText = strings.ReplaceAll(tweetText, gif.Preview, "")
}
for _, video := range tweet.Videos {
videos = append(videos, video.URL)
previews = append(previews, video.Preview)
tweetText = strings.ReplaceAll(tweetText, video.URL, "")
tweetText = strings.ReplaceAll(tweetText, video.Preview, "")
}
format := "summary_large_image"
if len(videos) > 0 {
format = "player"
}
data := Tweet{
URL: tweet.PermanentURL,
Link: ws.config.NitterBase + "/" + tweet.Username + "/status/" + id,
Title: tweet.Name + " (@" + tweet.Username + ")",
Text: strings.TrimSpace(tweetText),
Username: tweet.Username,
Timestamp: tweet.TimeParsed.Format(time.RFC3339),
Format: format,
Images: images,
Videos: videos,
Previews: previews,
}
var tpl bytes.Buffer
if err := ws.templates.ExecuteTemplate(&tpl, template+".html", data); err != nil {
serverError(w)
return
}
res := Reponse{http.StatusOK, "text/html", tpl.String()}
ws.responseCache.Set(template+"-"+id, res, TweetCacheTime)
response(w, res)
}
func validUserAgent(ua string, uas []string) bool {
for _, s := range uas {
if s == "*" || strings.Contains(strings.ToLower(ua), s) {
return true
}
}
return false
}
func badRequest(w http.ResponseWriter) {
response(w, Reponse{http.StatusBadRequest, "text/plain", "Bad Request"})
}
func serverError(w http.ResponseWriter) {
response(w, Reponse{http.StatusInternalServerError, "text/plain", "Internal Server Error"})
}
func response(w http.ResponseWriter, res Reponse) {
w.WriteHeader(res.StatusCode)
w.Header().Set("Content-Type", res.ContentType)
fmt.Fprint(w, res.Content)
}