Redmage/api/download_subreddit_images.go

332 lines
8.8 KiB
Go
Raw Normal View History

2024-04-09 22:37:26 +07:00
package api
import (
"context"
"errors"
2024-04-14 00:32:55 +07:00
"image/jpeg"
"io"
"math"
2024-04-10 17:13:07 +07:00
"net/http"
2024-04-14 00:32:55 +07:00
"net/url"
"os"
"path"
"strings"
"sync"
2024-04-14 00:32:55 +07:00
"github.com/disintegration/imaging"
"github.com/tigorlazuardi/redmage/api/reddit"
2024-04-25 12:31:20 +07:00
"github.com/tigorlazuardi/redmage/models"
"github.com/tigorlazuardi/redmage/pkg/errs"
2024-04-14 00:32:55 +07:00
"github.com/tigorlazuardi/redmage/pkg/log"
"github.com/tigorlazuardi/redmage/pkg/telemetry"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
2024-04-25 12:31:20 +07:00
"golang.org/x/sync/errgroup"
)
2024-04-09 22:37:26 +07:00
type DownloadSubredditParams struct {
2024-04-14 00:32:55 +07:00
Countback int
2024-04-25 12:31:20 +07:00
Devices models.DeviceSlice
2024-04-14 00:32:55 +07:00
SubredditType reddit.SubredditType
2024-04-09 22:37:26 +07:00
}
var (
2024-04-10 17:13:07 +07:00
ErrNoDevices = errors.New("api: no devices set")
ErrDownloadDirNotSet = errors.New("api: download directory not set")
)
2024-04-09 22:37:26 +07:00
func (api *API) DownloadSubredditImages(ctx context.Context, subredditName string, params DownloadSubredditParams) error {
2024-04-10 17:13:07 +07:00
downloadDir := api.config.String("download.directory")
if downloadDir == "" {
return errs.Wrapw(ErrDownloadDirNotSet, "download directory must be set before images can be downloaded").Code(http.StatusBadRequest)
}
if len(params.Devices) == 0 {
2024-04-10 17:13:07 +07:00
return errs.Wrapw(ErrNoDevices, "downloading images requires at least one device configured").Code(http.StatusBadRequest)
}
2024-04-10 17:13:07 +07:00
2024-04-14 00:32:55 +07:00
ctx, span := tracer.Start(ctx, "*API.DownloadSubredditImages", trace.WithAttributes(attribute.String("subreddit", subredditName)))
defer span.End()
wg := sync.WaitGroup{}
countback := params.Countback
2024-04-25 12:31:20 +07:00
var (
list reddit.Listing
err error
)
for countback > 0 {
limit := 100
if limit > countback {
limit = countback
2024-04-14 00:32:55 +07:00
}
2024-04-25 12:31:20 +07:00
list, err = api.reddit.GetPosts(ctx, reddit.GetPostsParam{
2024-04-14 00:32:55 +07:00
Subreddit: subredditName,
Limit: limit,
2024-04-25 12:31:20 +07:00
After: list.GetLastAfter(),
2024-04-14 00:32:55 +07:00
SubredditType: params.SubredditType,
})
if err != nil {
return errs.Wrapw(err, "failed to get posts", "subreddit_name", subredditName, "params", params)
}
wg.Add(1)
go func(ctx context.Context, posts reddit.Listing) {
defer wg.Done()
err := api.downloadSubredditListImage(ctx, list, params)
if err != nil {
log.New(ctx).Err(err).Error("failed to download image")
}
}(ctx, list)
2024-04-25 12:31:20 +07:00
if len(list.GetPosts()) == 0 {
break
}
2024-04-14 00:32:55 +07:00
countback -= len(list.GetPosts())
}
wg.Wait()
return nil
}
func (api *API) downloadSubredditListImage(ctx context.Context, list reddit.Listing, params DownloadSubredditParams) error {
2024-04-14 00:49:36 +07:00
ctx, span := tracer.Start(ctx, "*API.downloadSubredditListImage")
2024-04-14 00:32:55 +07:00
defer span.End()
wg := sync.WaitGroup{}
for _, post := range list.GetPosts() {
if !post.IsImagePost() {
continue
}
2024-04-25 12:31:20 +07:00
devices := api.getDevicesThatAcceptPost(ctx, post, params.Devices)
2024-04-14 00:32:55 +07:00
if len(devices) == 0 {
continue
}
wg.Add(1)
api.imageSemaphore <- struct{}{}
go func(ctx context.Context, post reddit.Post) {
defer func() {
<-api.imageSemaphore
wg.Done()
}()
2024-04-14 00:49:36 +07:00
if err := api.downloadSubredditImage(ctx, post, devices); err != nil {
log.New(ctx).Err(err).Error("failed to download subreddit image")
2024-04-14 00:32:55 +07:00
}
2024-04-14 00:49:36 +07:00
}(ctx, post)
}
2024-04-14 00:32:55 +07:00
2024-04-14 00:49:36 +07:00
wg.Wait()
2024-04-14 00:32:55 +07:00
2024-04-14 00:49:36 +07:00
return nil
}
2024-04-14 00:32:55 +07:00
2024-04-25 12:31:20 +07:00
func (api *API) downloadSubredditImage(ctx context.Context, post reddit.Post, devices models.DeviceSlice) error {
2024-04-14 00:49:36 +07:00
ctx, span := tracer.Start(ctx, "*API.downloadSubredditImage")
defer span.End()
2024-04-14 00:32:55 +07:00
2024-04-14 00:49:36 +07:00
imageHandler, err := api.reddit.DownloadImage(ctx, post, api.downloadBroadcast)
if err != nil {
return errs.Wrapw(err, "failed to download image")
}
defer imageHandler.Close()
2024-04-14 00:32:55 +07:00
2024-04-14 00:49:36 +07:00
// copy to temp dir first to avoid copying incomplete files.
tmpImageFile, err := api.copyImageToTempDir(ctx, imageHandler)
if err != nil {
return errs.Wrapw(err, "failed to download image to temp file")
2024-04-14 00:32:55 +07:00
}
2024-04-14 00:49:36 +07:00
defer tmpImageFile.Close()
2024-04-14 00:32:55 +07:00
2024-04-14 00:49:36 +07:00
w, close, err := api.createDeviceImageWriters(post, devices)
if err != nil {
return errs.Wrapw(err, "failed to create image files")
}
defer close()
_, err = io.Copy(w, tmpImageFile)
if err != nil {
return errs.Wrapw(err, "failed to save image files")
}
thumbnailPath := post.GetThumbnailTargetPath(api.config)
_, errStat := os.Stat(thumbnailPath)
if errStat == nil {
// file exist
return nil
}
if !errors.Is(errStat, os.ErrNotExist) {
2024-04-14 13:11:10 +07:00
return errs.Wrapw(err, "failed to check thumbnail existence", "path", thumbnailPath)
2024-04-14 00:49:36 +07:00
}
thumbnailSource, err := imaging.Open(tmpImageFile.filename)
if err != nil {
return errs.Wrapw(err, "failed to open temp thumbnail file", "filename", tmpImageFile.filename)
}
thumbnail := imaging.Resize(thumbnailSource, 256, 0, imaging.Lanczos)
thumbnailFile, err := os.Create(thumbnailPath)
if err != nil {
return errs.Wrapw(err, "failed to create thumbnail file", "filename", thumbnailPath)
}
defer thumbnailFile.Close()
err = jpeg.Encode(thumbnailFile, thumbnail, nil)
if err != nil {
return errs.Wrapw(err, "failed to encode thumbnail file to jpeg", "filename", thumbnailPath)
}
2024-04-14 00:32:55 +07:00
2024-04-09 22:37:26 +07:00
return nil
}
2024-04-14 00:32:55 +07:00
2024-04-25 12:31:20 +07:00
func (api *API) createDeviceImageWriters(post reddit.Post, devices models.DeviceSlice) (writer io.Writer, close func(), err error) {
2024-04-14 00:32:55 +07:00
// open file for each device
var files []*os.File
var writers []io.Writer
for _, device := range devices {
var filename string
if device.WindowsWallpaperMode == 1 {
filename = post.GetWindowsWallpaperImageTargetPath(api.config, device)
} else {
filename = post.GetImageTargetPath(api.config, device)
}
2024-04-25 12:31:20 +07:00
file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o644)
2024-04-14 00:32:55 +07:00
if err != nil {
for _, f := range files {
_ = f.Close()
}
return nil, nil, errs.Wrapw(err, "failed to open temp image file",
"device_name", device.Name,
2024-04-14 17:30:04 +07:00
"device_slug", device.Slug,
2024-04-14 00:32:55 +07:00
"filename", filename,
)
}
files = append(files, file)
writers = append(writers, file)
}
return io.MultiWriter(writers...), func() {
for _, file := range files {
_ = file.Close()
}
}, nil
}
2024-04-25 12:31:20 +07:00
func (api *API) getDevicesThatAcceptPost(ctx context.Context, post reddit.Post, devices models.DeviceSlice) (devs models.DeviceSlice) {
var mu sync.Mutex
errgrp, ctx := errgroup.WithContext(ctx)
2024-04-14 00:32:55 +07:00
for _, device := range devices {
if shouldDownloadPostForDevice(post, device) {
2024-04-25 12:31:20 +07:00
device := device
errgrp.Go(func() error {
if !api.isImageExists(ctx, post, device) {
mu.Lock()
defer mu.Unlock()
devs = append(devices, device)
}
return nil
})
2024-04-14 00:32:55 +07:00
}
}
2024-04-25 12:31:20 +07:00
_ = errgrp.Wait()
2024-04-14 00:32:55 +07:00
return devs
}
2024-04-25 12:31:20 +07:00
func (api *API) isImageExists(ctx context.Context, post reddit.Post, device *models.Device) (found bool) {
ctx, span := tracer.Start(ctx, "*API.IsImageExists")
defer span.End()
// Image does not exist in target image.
if _, err := os.Stat(post.GetImageTargetPath(api.config, device)); err != nil {
return false
}
_, err := models.Images.Query(ctx, api.exec,
models.SelectWhere.Images.DeviceID.EQ(device.ID),
models.SelectWhere.Images.PostID.EQ(post.GetID()),
).One()
return err == nil
}
func shouldDownloadPostForDevice(post reddit.Post, device *models.Device) bool {
if post.IsNSFW() && device.NSFW == 0 {
2024-04-14 00:32:55 +07:00
return false
}
if math.Abs(deviceAspectRatio(device)-post.GetImageAspectRatio()) > device.AspectRatioTolerance { // outside of aspect ratio tolerance
return false
}
width, height := post.GetImageSize()
2024-04-25 12:31:20 +07:00
if device.MaxX > 0 && width > int64(device.MaxX) {
2024-04-14 00:32:55 +07:00
return false
}
2024-04-25 12:31:20 +07:00
if device.MaxY > 0 && height > int64(device.MaxY) {
2024-04-14 00:32:55 +07:00
return false
}
2024-04-25 12:31:20 +07:00
if device.MinX > 0 && width < int64(device.MinX) {
2024-04-14 00:32:55 +07:00
return false
}
2024-04-25 12:31:20 +07:00
if device.MinY > 0 && height < int64(device.MinY) {
2024-04-14 00:32:55 +07:00
return false
}
return true
}
2024-04-25 12:31:20 +07:00
func deviceAspectRatio(device *models.Device) float64 {
2024-04-14 00:32:55 +07:00
return float64(device.ResolutionX) / float64(device.ResolutionY)
}
type tempFile struct {
filename string
file *os.File
}
func (te *tempFile) Read(p []byte) (n int, err error) {
return te.file.Read(p)
}
func (te *tempFile) Close() error {
return te.file.Close()
}
// copyImageToTempDir copies the image to a temporary directory and returns the file handle
//
// file must be closed by the caller after use.
//
// file is nil if an error occurred.
func (api *API) copyImageToTempDir(ctx context.Context, img reddit.PostImage) (tmp *tempFile, err error) {
_, span := tracer.Start(ctx, "*API.copyImageToTempDir")
defer func() { telemetry.EndWithStatus(span, err) }()
// ignore error because url is always valid if this
// function is called
url, _ := url.Parse(img.URL)
split := strings.Split(url.Path, "/")
imageFilename := split[len(split)-1]
tmpDirname := path.Join(os.TempDir(), "redmage")
2024-04-25 12:31:20 +07:00
_ = os.MkdirAll(tmpDirname, 0o644)
2024-04-14 00:32:55 +07:00
tmpFilename := path.Join(tmpDirname, imageFilename)
2024-04-25 12:31:20 +07:00
file, err := os.OpenFile(tmpFilename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o644)
2024-04-14 00:32:55 +07:00
if err != nil {
return nil, errs.Wrapw(err, "failed to open temp image file",
"temp_file_path", tmpFilename,
"image_url", img.URL,
)
}
_, err = io.Copy(file, img.File)
if err != nil {
_ = file.Close()
return nil, errs.Wrapw(err, "failed to download image to temp file",
"temp_file_path", tmpFilename,
"image_url", img.URL,
)
}
return &tempFile{
file: file,
filename: tmpFilename,
}, err
}