consolidate external text trimming

This commit is contained in:
sentriz
2023-12-10 18:44:59 +00:00
parent e31e37e366
commit a8333b8afa
4 changed files with 21 additions and 33 deletions

View File

@@ -8,10 +8,8 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"net/url" "net/url"
"regexp"
"sort" "sort"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/andybalholm/cascadia" "github.com/andybalholm/cascadia"
@@ -60,9 +58,6 @@ func (c *Client) ArtistGetInfo(artistName string) (Artist, error) {
return Artist{}, fmt.Errorf("make request: %w", err) return Artist{}, fmt.Errorf("make request: %w", err)
} }
resp.Artist.Bio.Summary = cleanLicenceText(resp.Artist.Bio.Summary)
resp.Artist.Bio.Content = cleanLicenceText(resp.Artist.Bio.Content)
return resp.Artist, nil return resp.Artist, nil
} }
@@ -83,9 +78,6 @@ func (c *Client) AlbumGetInfo(artistName, albumName string) (Album, error) {
return Album{}, fmt.Errorf("make request: %w", err) return Album{}, fmt.Errorf("make request: %w", err)
} }
resp.Album.Wiki.Summary = cleanLicenceText(resp.Album.Wiki.Summary)
resp.Album.Wiki.Content = cleanLicenceText(resp.Album.Wiki.Content)
return resp.Album, nil return resp.Album, nil
} }
@@ -333,15 +325,3 @@ func GetParamSignature(params url.Values, secret string) string {
hash := md5.Sum([]byte(toHash)) hash := md5.Sum([]byte(toHash))
return hex.EncodeToString(hash[:]) return hex.EncodeToString(hash[:])
} }
var doublePuncExpr = regexp.MustCompile(`\.\s+\.\s+`)
var licenceExpr = regexp.MustCompile(`(?i)user-contributed text.*`)
func cleanLicenceText(text string) string {
text = licenceExpr.ReplaceAllString(text, "")
text = doublePuncExpr.ReplaceAllString(text, ". ")
text = strings.ReplaceAll(text, " .", ".")
text = strings.Join(strings.Fields(text), " ")
text = strings.TrimSpace(text)
return text
}

View File

@@ -333,7 +333,7 @@ func (c *Controller) ServeGetArtistInfoTwo(r *http.Request) *spec.Response {
return sub return sub
} }
sub.ArtistInfoTwo.Biography = info.Biography sub.ArtistInfoTwo.Biography = spec.CleanExternalText(info.Biography)
sub.ArtistInfoTwo.MusicBrainzID = info.MusicBrainzID sub.ArtistInfoTwo.MusicBrainzID = info.MusicBrainzID
sub.ArtistInfoTwo.LastFMURL = info.LastFMURL sub.ArtistInfoTwo.LastFMURL = info.LastFMURL
@@ -409,7 +409,7 @@ func (c *Controller) ServeGetAlbumInfoTwo(r *http.Request) *spec.Response {
return sub return sub
} }
sub.AlbumInfo.Notes = info.Notes sub.AlbumInfo.Notes = spec.CleanExternalText(info.Notes)
sub.AlbumInfo.MusicBrainzID = info.MusicBrainzID sub.AlbumInfo.MusicBrainzID = info.MusicBrainzID
sub.AlbumInfo.LastFMURL = info.LastFMURL sub.AlbumInfo.LastFMURL = info.LastFMURL

View File

@@ -2,19 +2,14 @@ package spec
import ( import (
"go.senan.xyz/gonic/db" "go.senan.xyz/gonic/db"
"jaytaylor.com/html2text"
) )
func NewPodcastChannel(p *db.Podcast) *PodcastChannel { func NewPodcastChannel(p *db.Podcast) *PodcastChannel {
desc, err := html2text.FromString(p.Description, html2text.Options{TextOnly: true})
if err != nil {
desc = ""
}
ret := &PodcastChannel{ ret := &PodcastChannel{
ID: p.SID(), ID: p.SID(),
OriginalImageURL: p.ImageURL, OriginalImageURL: p.ImageURL,
Title: p.Title, Title: p.Title,
Description: desc, Description: CleanExternalText(p.Description),
URL: p.URL, URL: p.URL,
CoverArt: p.SID(), CoverArt: p.SID(),
Status: "skipped", Status: "skipped",
@@ -30,17 +25,13 @@ func NewPodcastEpisode(pe *db.PodcastEpisode) *PodcastEpisode {
if pe == nil { if pe == nil {
return nil return nil
} }
desc, err := html2text.FromString(pe.Description, html2text.Options{TextOnly: true})
if err != nil {
desc = ""
}
r := &PodcastEpisode{ r := &PodcastEpisode{
ID: pe.SID(), ID: pe.SID(),
StreamID: pe.SID(), StreamID: pe.SID(),
ContentType: pe.MIME(), ContentType: pe.MIME(),
ChannelID: pe.PodcastSID(), ChannelID: pe.PodcastSID(),
Title: pe.Title, Title: pe.Title,
Description: desc, Description: CleanExternalText(pe.Description),
Status: string(pe.Status), Status: string(pe.Status),
CoverArt: pe.PodcastSID(), CoverArt: pe.PodcastSID(),
PublishDate: *pe.PublishDate, PublishDate: *pe.PublishDate,

View File

@@ -2,11 +2,13 @@ package spec
import ( import (
"fmt" "fmt"
"regexp"
"strings" "strings"
"time" "time"
"go.senan.xyz/gonic" "go.senan.xyz/gonic"
"go.senan.xyz/gonic/server/ctrlsubsonic/specid" "go.senan.xyz/gonic/server/ctrlsubsonic/specid"
"jaytaylor.com/html2text"
) )
// https://web.archive.org/web/20220707025402/https://www.subsonic.org/pages/api.jsp // https://web.archive.org/web/20220707025402/https://www.subsonic.org/pages/api.jsp
@@ -472,3 +474,18 @@ func formatRating(rating float64) string {
func formatExt(ext string) string { func formatExt(ext string) string {
return strings.TrimPrefix(ext, ".") return strings.TrimPrefix(ext, ".")
} }
var doublePuncExpr = regexp.MustCompile(`\.\s+\.\s+`)
var licenceExpr = regexp.MustCompile(`(?i)\buser-contributed text.*`)
var readMoreExpr = regexp.MustCompile(`(?i)\bread more on.*`)
func CleanExternalText(text string) string {
text, _ = html2text.FromString(text, html2text.Options{TextOnly: true})
text = licenceExpr.ReplaceAllString(text, "")
text = readMoreExpr.ReplaceAllString(text, "")
text = doublePuncExpr.ReplaceAllString(text, ". ")
text = strings.ReplaceAll(text, " .", ".")
text = strings.Join(strings.Fields(text), " ")
text = strings.TrimSpace(text)
return text
}