consolidate external text trimming

This commit is contained in:
sentriz
2023-12-10 18:44:59 +00:00
parent e31e37e366
commit a8333b8afa
4 changed files with 21 additions and 33 deletions

View File

@@ -8,10 +8,8 @@ import (
"fmt"
"net/http"
"net/url"
"regexp"
"sort"
"strconv"
"strings"
"time"
"github.com/andybalholm/cascadia"
@@ -60,9 +58,6 @@ func (c *Client) ArtistGetInfo(artistName string) (Artist, error) {
return Artist{}, fmt.Errorf("make request: %w", err)
}
resp.Artist.Bio.Summary = cleanLicenceText(resp.Artist.Bio.Summary)
resp.Artist.Bio.Content = cleanLicenceText(resp.Artist.Bio.Content)
return resp.Artist, nil
}
@@ -83,9 +78,6 @@ func (c *Client) AlbumGetInfo(artistName, albumName string) (Album, error) {
return Album{}, fmt.Errorf("make request: %w", err)
}
resp.Album.Wiki.Summary = cleanLicenceText(resp.Album.Wiki.Summary)
resp.Album.Wiki.Content = cleanLicenceText(resp.Album.Wiki.Content)
return resp.Album, nil
}
@@ -333,15 +325,3 @@ func GetParamSignature(params url.Values, secret string) string {
hash := md5.Sum([]byte(toHash))
return hex.EncodeToString(hash[:])
}
var doublePuncExpr = regexp.MustCompile(`\.\s+\.\s+`)
var licenceExpr = regexp.MustCompile(`(?i)user-contributed text.*`)
func cleanLicenceText(text string) string {
text = licenceExpr.ReplaceAllString(text, "")
text = doublePuncExpr.ReplaceAllString(text, ". ")
text = strings.ReplaceAll(text, " .", ".")
text = strings.Join(strings.Fields(text), " ")
text = strings.TrimSpace(text)
return text
}

View File

@@ -333,7 +333,7 @@ func (c *Controller) ServeGetArtistInfoTwo(r *http.Request) *spec.Response {
return sub
}
sub.ArtistInfoTwo.Biography = info.Biography
sub.ArtistInfoTwo.Biography = spec.CleanExternalText(info.Biography)
sub.ArtistInfoTwo.MusicBrainzID = info.MusicBrainzID
sub.ArtistInfoTwo.LastFMURL = info.LastFMURL
@@ -409,7 +409,7 @@ func (c *Controller) ServeGetAlbumInfoTwo(r *http.Request) *spec.Response {
return sub
}
sub.AlbumInfo.Notes = info.Notes
sub.AlbumInfo.Notes = spec.CleanExternalText(info.Notes)
sub.AlbumInfo.MusicBrainzID = info.MusicBrainzID
sub.AlbumInfo.LastFMURL = info.LastFMURL

View File

@@ -2,19 +2,14 @@ package spec
import (
"go.senan.xyz/gonic/db"
"jaytaylor.com/html2text"
)
func NewPodcastChannel(p *db.Podcast) *PodcastChannel {
desc, err := html2text.FromString(p.Description, html2text.Options{TextOnly: true})
if err != nil {
desc = ""
}
ret := &PodcastChannel{
ID: p.SID(),
OriginalImageURL: p.ImageURL,
Title: p.Title,
Description: desc,
Description: CleanExternalText(p.Description),
URL: p.URL,
CoverArt: p.SID(),
Status: "skipped",
@@ -30,17 +25,13 @@ func NewPodcastEpisode(pe *db.PodcastEpisode) *PodcastEpisode {
if pe == nil {
return nil
}
desc, err := html2text.FromString(pe.Description, html2text.Options{TextOnly: true})
if err != nil {
desc = ""
}
r := &PodcastEpisode{
ID: pe.SID(),
StreamID: pe.SID(),
ContentType: pe.MIME(),
ChannelID: pe.PodcastSID(),
Title: pe.Title,
Description: desc,
Description: CleanExternalText(pe.Description),
Status: string(pe.Status),
CoverArt: pe.PodcastSID(),
PublishDate: *pe.PublishDate,

View File

@@ -2,11 +2,13 @@ package spec
import (
"fmt"
"regexp"
"strings"
"time"
"go.senan.xyz/gonic"
"go.senan.xyz/gonic/server/ctrlsubsonic/specid"
"jaytaylor.com/html2text"
)
// https://web.archive.org/web/20220707025402/https://www.subsonic.org/pages/api.jsp
@@ -472,3 +474,18 @@ func formatRating(rating float64) string {
func formatExt(ext string) string {
return strings.TrimPrefix(ext, ".")
}
var doublePuncExpr = regexp.MustCompile(`\.\s+\.\s+`)
var licenceExpr = regexp.MustCompile(`(?i)\buser-contributed text.*`)
var readMoreExpr = regexp.MustCompile(`(?i)\bread more on.*`)
func CleanExternalText(text string) string {
text, _ = html2text.FromString(text, html2text.Options{TextOnly: true})
text = licenceExpr.ReplaceAllString(text, "")
text = readMoreExpr.ReplaceAllString(text, "")
text = doublePuncExpr.ReplaceAllString(text, ". ")
text = strings.ReplaceAll(text, " .", ".")
text = strings.Join(strings.Fields(text), " ")
text = strings.TrimSpace(text)
return text
}