feat: Major refactor, implement web, caching, better tests and build files
* Update golang version to 1.24 * Update multiarch Dockerfile to be more ISA agnostic * Refactor existing code and properly structure project into modules * Get rid of global variables except where necessary (go:embed) * Add default values to Config * Add webserver with templates to finally correctly serve videos and gifs * Add tiny caching library to decrease api load and improve latency * Improve Webhook data preparation by filtering out redundant links from the tweet text and properly attaching videos and gifs in separate webhook request by utilising new webserver * Improve tests for filter function * Improve bake definition for easier CI integration
This commit is contained in:
parent
7562b86894
commit
21d580d1a6
24 changed files with 752 additions and 209 deletions
116
cmd/tweeter/embed.go
Normal file
116
cmd/tweeter/embed.go
Normal file
|
@ -0,0 +1,116 @@
|
|||
package tweeter
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Embed struct {
|
||||
Author Author `json:"author,omitempty"`
|
||||
Title string `json:"title,omitempty"`
|
||||
URL string `json:"url,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Timestamp string `json:"timestamp,omitempty"`
|
||||
Color int64 `json:"color,omitempty"`
|
||||
Fields []Field `json:"fields,omitempty"`
|
||||
Thumbnail Image `json:"thumbnail,omitempty"`
|
||||
Image Image `json:"image,omitempty"`
|
||||
Video Video `json:"video,omitempty"`
|
||||
Footer Footer `json:"footer,omitempty"`
|
||||
}
|
||||
|
||||
type Author struct {
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
IconURL string `json:"icon_url"`
|
||||
}
|
||||
|
||||
type Field struct {
|
||||
Name string `json:"name"`
|
||||
Value string `json:"value"`
|
||||
Inline bool `json:"inline,omitempty"`
|
||||
}
|
||||
|
||||
type Footer struct {
|
||||
Text string `json:"text"`
|
||||
IconURL string `json:"icon_url,omitempty"`
|
||||
}
|
||||
|
||||
type Image struct {
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type Video struct {
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
func (w *Webhook) NewEmbedWithURL(URL string) *Embed {
|
||||
emb := Embed{URL: URL}
|
||||
w.Embeds = append(w.Embeds, &emb)
|
||||
return &emb
|
||||
}
|
||||
|
||||
func (w *Webhook) NewEmbed() *Embed {
|
||||
emb := Embed{}
|
||||
w.Embeds = append(w.Embeds, &emb)
|
||||
return &emb
|
||||
}
|
||||
|
||||
func (e *Embed) SetTitle(Title string) *Embed {
|
||||
e.Title = Title
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *Embed) SetText(Description string) *Embed {
|
||||
e.Description = Description
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *Embed) SetAuthor(Name, URL, IconURL string) *Embed {
|
||||
e.Author = Author{Name, URL, IconURL}
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *Embed) SetColor(color string) (*Embed, error) {
|
||||
color = strings.Replace(color, "0x", "", -1)
|
||||
color = strings.Replace(color, "0X", "", -1)
|
||||
color = strings.Replace(color, "#", "", -1)
|
||||
colorInt, err := strconv.ParseInt(color, 16, 64)
|
||||
if err != nil {
|
||||
return nil, errors.New("invalid hex code passed")
|
||||
}
|
||||
e.Color = colorInt
|
||||
return e, nil
|
||||
}
|
||||
|
||||
func (e *Embed) SetThumbnail(URL string) *Embed {
|
||||
e.Thumbnail = Image{URL}
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *Embed) SetImage(URL string) *Embed {
|
||||
e.Image = Image{URL}
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *Embed) SetVideo(URL string) *Embed {
|
||||
e.Video = Video{URL}
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *Embed) SetFooter(Text, IconURL string) *Embed {
|
||||
e.Footer = Footer{Text, IconURL}
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *Embed) SetTimestamp(timestamp time.Time) *Embed {
|
||||
e.Timestamp = timestamp.Format(time.RFC3339)
|
||||
return e
|
||||
}
|
||||
|
||||
func (e *Embed) AddField(Name, Value string, Inline bool) *Embed {
|
||||
e.Fields = append(e.Fields, Field{Name, Value, Inline})
|
||||
return e
|
||||
}
|
268
cmd/tweeter/tweeter.go
Normal file
268
cmd/tweeter/tweeter.go
Normal file
|
@ -0,0 +1,268 @@
|
|||
package tweeter
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.snrd.eu/sunred/discord-tweeter/pkg/config"
|
||||
"git.snrd.eu/sunred/discord-tweeter/pkg/db"
|
||||
"git.snrd.eu/sunred/discord-tweeter/pkg/web"
|
||||
ts "github.com/imperatrona/twitter-scraper"
|
||||
)
|
||||
|
||||
const (
|
||||
ScrapeInterval int = 3 // How often to check for new tweets (in minutes)
|
||||
ScrapeDelay int64 = 0 // How long to wait between api requests (in seconds)
|
||||
ScrapeStep int = 10 // How many tweets to get at a time
|
||||
DefaultConfig = "./config.toml"
|
||||
)
|
||||
|
||||
type App struct {
|
||||
config *config.Config
|
||||
db db.Database
|
||||
scraper *ts.Scraper
|
||||
}
|
||||
|
||||
func Run() {
|
||||
args := os.Args[1:]
|
||||
|
||||
if len(args) > 1 {
|
||||
log.Fatalln("Too many arguments")
|
||||
}
|
||||
|
||||
configPath := DefaultConfig
|
||||
if len(args) == 1 {
|
||||
if args[0] == "" {
|
||||
log.Fatalln("No config path given")
|
||||
}
|
||||
configPath = args[0]
|
||||
}
|
||||
|
||||
config, err := config.ConfigFromFile(configPath)
|
||||
if err != nil {
|
||||
log.Fatalf("There has been an error parsing config file '%s': %s\n", configPath, err.Error())
|
||||
}
|
||||
|
||||
if len(config.Channels) == 0 {
|
||||
log.Fatalln("List of channels cannot be empty")
|
||||
}
|
||||
|
||||
if len(config.Channels) != len(config.Filter) {
|
||||
log.Fatalln("List of filters has to be same length as channel list")
|
||||
}
|
||||
|
||||
if config.Webhook == "" {
|
||||
log.Fatalln("Webhook address cannot be empty")
|
||||
}
|
||||
|
||||
if config.UseWebServer && config.HostURL == "" {
|
||||
log.Fatalln("HostURL cannot be empty")
|
||||
}
|
||||
|
||||
db, dberr := db.New("sqlite3", config.DbPath)
|
||||
if dberr != nil {
|
||||
log.Fatalf("An error occurred while creating database connection: %s\n", dberr.Error())
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
scraper := ts.New()
|
||||
|
||||
if config.ProxyAddr != "" {
|
||||
err := scraper.SetProxy(config.ProxyAddr)
|
||||
if err != nil {
|
||||
log.Fatalf("An error occurred with proxy connection: %s\n", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
f, err := os.Open(config.CookiePath)
|
||||
if err != nil {
|
||||
log.Println("Cookie file does not yet exist")
|
||||
} else {
|
||||
var cookies []*http.Cookie
|
||||
json.NewDecoder(f).Decode(&cookies)
|
||||
scraper.SetCookies(cookies)
|
||||
}
|
||||
}
|
||||
|
||||
if scraper.IsLoggedIn() {
|
||||
log.Println("We're already logged in, skipping login...")
|
||||
} else {
|
||||
scraper.ClearCookies()
|
||||
if len(config.Username) > 0 {
|
||||
err := scraper.Login(config.Username, config.Password)
|
||||
if err != nil {
|
||||
log.Fatalf("An error occurred during scraper login: %s\n", err.Error())
|
||||
} else {
|
||||
log.Printf("New Login - Saving cookies to %s\n", config.CookiePath)
|
||||
js, jsonErr := json.Marshal(scraper.GetCookies())
|
||||
if jsonErr != nil {
|
||||
log.Fatalf("An error occurred during cookie serialization: %s\n", jsonErr.Error())
|
||||
}
|
||||
f, fErr := os.Create(config.CookiePath)
|
||||
if fErr != nil {
|
||||
log.Fatalf("Failed to create cookie file at %s with the following error: %s\n", config.CookiePath, fErr.Error())
|
||||
}
|
||||
f.Write(js)
|
||||
writeErr := f.Close()
|
||||
if writeErr != nil {
|
||||
log.Fatalf("An error occurred on closing cookie file: %s\n", writeErr.Error())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Println("Trying open account login... ")
|
||||
_, err := scraper.LoginOpenAccount() // TODO: save openaccount token/secret
|
||||
if err != nil {
|
||||
log.Fatalf("An error occurred during scraper login: %s\n", err.Error())
|
||||
}
|
||||
defer scraper.Logout()
|
||||
}
|
||||
}
|
||||
|
||||
scraper.WithDelay(ScrapeDelay)
|
||||
|
||||
if config.UseWebServer {
|
||||
log.Printf("Starting webserver on port %d", config.WebPort)
|
||||
ws, err := web.New(config, scraper)
|
||||
if err != nil {
|
||||
log.Fatalf("An error occurred while starting webserver: %s\n", err.Error())
|
||||
}
|
||||
go ws.Server.ListenAndServe()
|
||||
}
|
||||
|
||||
log.Printf("Starting main app with %d workers", len(config.Channels))
|
||||
app := App{config, db, scraper}
|
||||
for i, c := range config.Channels {
|
||||
go app.queryLoop(i, c)
|
||||
}
|
||||
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
|
||||
done := make(chan bool, 1)
|
||||
|
||||
go func() {
|
||||
sig := <-sigs
|
||||
log.Println(sig)
|
||||
done <- true
|
||||
}()
|
||||
|
||||
<-done
|
||||
log.Println("Exiting...")
|
||||
}
|
||||
|
||||
func (app App) queryLoop(id int, channel string) {
|
||||
log.Printf("Starting worker %d for channel %s", id, channel)
|
||||
// Sleep to stagger api queries of workers
|
||||
time.Sleep(time.Duration(id) * time.Minute)
|
||||
|
||||
db := app.db
|
||||
filter := app.config.Filter[id]
|
||||
init := true
|
||||
|
||||
ScrapeLoop:
|
||||
for {
|
||||
if !init {
|
||||
// Sleep for set interval +-30 seconds
|
||||
time.Sleep(time.Duration(ScrapeInterval)*time.Minute + time.Duration(rand.Intn(60)-30)*time.Second)
|
||||
}
|
||||
init = false
|
||||
|
||||
step := ScrapeStep
|
||||
tweets := []*ts.Tweet{}
|
||||
tweetsToParse := []*ts.Tweet{}
|
||||
tweetsToPost := []*ts.Tweet{}
|
||||
|
||||
dbTweets, dbErr := db.GetTweets(channel)
|
||||
if dbErr != nil {
|
||||
log.Printf("Error while retrieving tweets from database for channel %s: %s", channel, dbErr.Error())
|
||||
continue ScrapeLoop
|
||||
}
|
||||
|
||||
GetTweets:
|
||||
for {
|
||||
for tweet := range app.scraper.GetTweets(context.Background(), channel, step) {
|
||||
if tweet.Error != nil {
|
||||
log.Printf("Error while retrieving tweet for channel %s: %s", channel, tweet.Error.Error())
|
||||
continue ScrapeLoop
|
||||
}
|
||||
tweets = append(tweets, &tweet.Tweet)
|
||||
}
|
||||
if len(tweets) == 0 {
|
||||
break GetTweets
|
||||
}
|
||||
|
||||
if len(dbTweets) > 0 {
|
||||
for _, dbTweet := range dbTweets {
|
||||
for i, tweet := range tweets {
|
||||
if dbTweet.EqualsTweet(tweet) {
|
||||
tweetsToParse = tweets[:i]
|
||||
break GetTweets
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tweetsToParse = append(tweetsToParse, tweets[0])
|
||||
break GetTweets
|
||||
}
|
||||
|
||||
if step >= 50 {
|
||||
tweetsToParse = append(tweetsToParse, tweets[0])
|
||||
break GetTweets
|
||||
} else if step+ScrapeStep > 50 {
|
||||
step = 50
|
||||
} else {
|
||||
step += ScrapeStep
|
||||
}
|
||||
|
||||
log.Printf("Fetching more tweets for %s...", channel)
|
||||
time.Sleep(time.Duration(3) * time.Second) // Wait a few seconds for next api request
|
||||
}
|
||||
|
||||
ParseTweets:
|
||||
// We want to parse old to new
|
||||
for i := len(tweetsToParse) - 1; i >= 0; i-- {
|
||||
tweet := tweetsToParse[i]
|
||||
if filterTweet(filter, tweet) {
|
||||
// One of the filters applies as same bits are 1, so we skip this tweet
|
||||
continue ParseTweets
|
||||
}
|
||||
|
||||
err := db.InsertTweet(channel, tweet)
|
||||
if err != nil {
|
||||
log.Printf("Error while inserting tweet for channel %s into the database: %s", channel, err.Error())
|
||||
continue ParseTweets
|
||||
}
|
||||
tweetsToPost = append(tweetsToPost, tweet)
|
||||
}
|
||||
|
||||
app.SendToWebhook(tweetsToPost)
|
||||
err := db.PruneOldestTweets(channel)
|
||||
if err != nil {
|
||||
log.Printf("Error while pruning old tweets for channel %s: %s", channel, err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func filterTweet(filter uint8, tweet *ts.Tweet) bool {
|
||||
return filterByte(filter, tweet) > 0
|
||||
}
|
||||
|
||||
func filterByte(filter uint8, tweet *ts.Tweet) uint8 {
|
||||
var tweetFilter uint8 = 0
|
||||
filterMap := []bool{tweet.IsSelfThread, tweet.IsRetweet, tweet.IsReply, tweet.IsPin, tweet.IsQuoted}
|
||||
for _, f := range filterMap {
|
||||
tweetFilter <<= 1
|
||||
if f {
|
||||
tweetFilter |= 1
|
||||
}
|
||||
}
|
||||
|
||||
return filter & tweetFilter
|
||||
}
|
43
cmd/tweeter/tweeter_test.go
Normal file
43
cmd/tweeter/tweeter_test.go
Normal file
|
@ -0,0 +1,43 @@
|
|||
package tweeter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
ts "github.com/imperatrona/twitter-scraper"
|
||||
)
|
||||
|
||||
func TestFilter(t *testing.T) {
|
||||
filter1 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b10111, 0b10101, 0b01101, 0b10101}
|
||||
filter2 := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b01000, 0b01010, 0b01000, 0b11011}
|
||||
filterR := []uint8{0b10101, 0b11111, 0b00000, 0b00111, 0b00101, 0b00001, 0b00000, 0b00000, 0b01000, 0b10001}
|
||||
filterB := []bool{true, true, false, true, true, true, false, false, true, true}
|
||||
|
||||
for i, filterBool := range filterB {
|
||||
filterResult := filterR[i]
|
||||
|
||||
filterCheck := filter2[i]
|
||||
filterQuoted := (filterCheck & 1) != 0 // first bit
|
||||
filterPin := (filterCheck & 2) != 0 // second bit
|
||||
filterReply := (filterCheck & 4) != 0 // third bit
|
||||
filterRetweet := (filterCheck & 8) != 0 // fourth bit
|
||||
filterSelfThread := (filterCheck & 16) != 0 // fifth bit
|
||||
|
||||
tweet := ts.Tweet{
|
||||
IsSelfThread: filterSelfThread,
|
||||
IsRetweet: filterRetweet,
|
||||
IsReply: filterReply,
|
||||
IsPin: filterPin,
|
||||
IsQuoted: filterQuoted,
|
||||
}
|
||||
|
||||
resultBool := filterTweet(filter1[i], &tweet)
|
||||
if resultBool != filterBool {
|
||||
t.Errorf("%b AND %b > 0 = %t; got %t\n", filter1[i], filter2[i], filterBool, resultBool)
|
||||
}
|
||||
|
||||
resultByte := filterByte(filter1[i], &tweet)
|
||||
if resultByte != filterResult {
|
||||
t.Errorf("%b AND %b = %b; got %b\n", filter1[i], filter2[i], filterResult, resultByte)
|
||||
}
|
||||
}
|
||||
}
|
109
cmd/tweeter/webhook.go
Normal file
109
cmd/tweeter/webhook.go
Normal file
|
@ -0,0 +1,109 @@
|
|||
package tweeter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
ts "github.com/imperatrona/twitter-scraper"
|
||||
//"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
BaseIcon = "https://abs.twimg.com/icons/apple-touch-icon-192x192.png"
|
||||
)
|
||||
|
||||
type Webhook struct {
|
||||
Content string `json:"content"`
|
||||
Name string `json:"username,omitempty"`
|
||||
Avatar string `json:"avatar_url,omitempty"`
|
||||
Mention *Mention `json:"allowed_mentions,omitempty"`
|
||||
Embeds []*Embed `json:"embeds,omitempty"`
|
||||
}
|
||||
|
||||
type Mention struct {
|
||||
Parse []string `json:"parse,omitempty"`
|
||||
Roles []string `json:"roles,omitempty"`
|
||||
Users []string `json:"users,omitempty"`
|
||||
RepliedUser bool `json:"replied_user,omitempty"`
|
||||
}
|
||||
|
||||
func (app App) SendToWebhook(tweets []*ts.Tweet) {
|
||||
for _, tweet := range tweets {
|
||||
webhooksToSend := []*Webhook{}
|
||||
data := Webhook{
|
||||
Mention: &Mention{Parse: []string{"roles"}},
|
||||
}
|
||||
webhooksToSend = append(webhooksToSend, &data)
|
||||
|
||||
if len(tweet.Videos) > 0 {
|
||||
webhooksToSend = append(webhooksToSend, &Webhook{
|
||||
Content: "[\u2800](" + app.config.HostURL + "/video/" + tweet.ID + ")",
|
||||
Mention: &Mention{Parse: []string{"roles"}},
|
||||
})
|
||||
}
|
||||
|
||||
mainEmbed := data.NewEmbedWithURL(tweet.PermanentURL)
|
||||
mainEmbed.SetAuthor(tweet.Name+" (@"+tweet.Username+")", app.config.HostURL+"/tweet/"+tweet.ID, app.config.HostURL+"/avatar/"+tweet.Username)
|
||||
mainEmbed.SetColor("#26a7de")
|
||||
mainEmbed.SetTimestamp(tweet.TimeParsed)
|
||||
//mainEmbed.SetFooter("Twitter", BaseIcon)
|
||||
//mainEmbed.SetFooter("Twitter • <t:" + strconv.FormatInt((tweet.Timestamp), 10) + ":R>", BaseIcon)
|
||||
|
||||
tweetText := tweet.Text
|
||||
for i, photo := range tweet.Photos {
|
||||
embed := mainEmbed
|
||||
if i > 0 {
|
||||
embed = data.NewEmbedWithURL(tweet.PermanentURL)
|
||||
}
|
||||
embed.SetImage(photo.URL)
|
||||
tweetText = strings.ReplaceAll(tweetText, photo.URL, "")
|
||||
}
|
||||
for _, gif := range tweet.GIFs {
|
||||
//embed := mainEmbed
|
||||
//if i > 0 {
|
||||
// embed = data.NewEmbedWithURL(tweet.PermanentURL)
|
||||
//}
|
||||
//embed.SetImage(gif.Preview)
|
||||
tweetText = strings.ReplaceAll(tweetText, gif.Preview, "")
|
||||
tweetText = strings.ReplaceAll(tweetText, gif.URL, "")
|
||||
}
|
||||
for _, video := range tweet.Videos {
|
||||
//embed := mainEmbed
|
||||
//if i > 0 {
|
||||
// embed = data.NewEmbedWithURL(tweet.PermanentURL)
|
||||
//}
|
||||
//embed.SetImage(video.Preview)
|
||||
tweetText = strings.ReplaceAll(tweetText, video.Preview, "")
|
||||
tweetText = strings.ReplaceAll(tweetText, video.URL, "")
|
||||
}
|
||||
|
||||
mainEmbed.SetText(strings.TrimSpace(tweetText))
|
||||
|
||||
for _, data := range webhooksToSend {
|
||||
err := sendRequest(app.config.Webhook, data)
|
||||
if err != nil {
|
||||
log.Printf("Error while sending webhook for tweet %s: %s", tweet.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func sendRequest(url string, data *Webhook) error {
|
||||
jsonData, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := http.Post(url, "application/json", bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
return nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue