working without api key

This commit is contained in:
pika 2024-12-12 11:37:43 +01:00
parent b97b89efb8
commit 999376e5f7
5 changed files with 371 additions and 166 deletions

View file

@ -1,15 +1,13 @@
package scraper
import (
"context"
"fmt"
"time"
// "log"
"html"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"google.golang.org/api/option"
"google.golang.org/api/youtube/v3"
"time"
)
type Video struct {
@ -17,155 +15,103 @@ type Video struct {
URL string
Channel string
Duration string
Views string
Thumbnail string
UploadDate string
}
// Replace with your actual API key
const API_KEY = "AIzaSyAzsihRkp8mYTOXLOkVN09yTqld9TJ4Nts"
func formatViews(count uint64) string {
switch {
case count >= 1000000000:
return fmt.Sprintf("%.1fB views", float64(count)/1000000000)
case count >= 1000000:
return fmt.Sprintf("%.1fM views", float64(count)/1000000)
case count >= 1000:
return fmt.Sprintf("%.1fK views", float64(count)/1000)
default:
return fmt.Sprintf("%d views", count)
}
}
func formatDuration(duration string) string {
// Remove PT from the start
duration = strings.TrimPrefix(duration, "PT")
var result strings.Builder
// Handle hours
if i := strings.Index(duration, "H"); i != -1 {
result.WriteString(duration[:i])
result.WriteString(":")
duration = duration[i+1:]
}
// Handle minutes
if i := strings.Index(duration, "M"); i != -1 {
minutes := duration[:i]
if len(minutes) == 1 {
result.WriteString("0")
}
result.WriteString(minutes)
result.WriteString(":")
duration = duration[i+1:]
} else if result.Len() > 0 {
result.WriteString("00:")
}
// Handle seconds
if i := strings.Index(duration, "S"); i != -1 {
seconds := duration[:i]
if len(seconds) == 1 {
result.WriteString("0")
}
result.WriteString(seconds)
} else {
result.WriteString("00")
}
return result.String()
}
func formatUploadDate(uploadDate string) string {
t, err := time.Parse(time.RFC3339, uploadDate)
if err != nil {
return uploadDate
}
now := time.Now()
diff := now.Sub(t)
days := int(diff.Hours() / 24)
formattedDate := t.Format("02-01-2006")
// If video is less than 30 days old, add "X days ago"
if days < 30 {
var timeAgo string
switch {
case days == 0:
hours := int(diff.Hours())
if hours == 0 {
timeAgo = "just now"
} else {
timeAgo = fmt.Sprintf("%dh ago", hours)
}
case days == 1:
timeAgo = "1 day ago"
default:
timeAgo = fmt.Sprintf("%d days ago", days)
}
return fmt.Sprintf("%s (%s)", formattedDate, timeAgo)
}
return formattedDate
}
// Updated regular expressions to match the shell script
var (
titleRegex = regexp.MustCompile(`"title":\{"runs":\[\{"text":"([^"]+)"\}\]`)
channelRegex = regexp.MustCompile(`"ownerText":\{"runs":\[\{"text":"([^"]+)"\}\]`)
durationRegex = regexp.MustCompile(`"lengthText":\{"accessibility":\{"accessibilityData":\{"label":"[^"]*"\}\},"simpleText":"([^"]+)"`)
uploadDateRegex = regexp.MustCompile(`"publishedTimeText":\{"simpleText":"([^"]+)"\}`)
videoIDRegex = regexp.MustCompile(`watch\?v=([^"]+)`)
)
func FetchVideos(query string) ([]Video, error) {
ctx := context.Background()
youtubeService, err := youtube.NewService(ctx, option.WithAPIKey(API_KEY))
if err != nil {
return nil, fmt.Errorf("error creating YouTube client: %w", err)
client := &http.Client{
Timeout: 10 * time.Second,
}
// Make the search request
call := youtubeService.Search.List([]string{"snippet"}).
Q(query).
MaxResults(50).
Type("video").
VideoDuration("any")
// Format URL similar to the shell script
searchURL := fmt.Sprintf("https://www.youtube.com/results?search_query=%s",
url.QueryEscape(strings.ReplaceAll(query, " ", "+")))
response, err := call.Do()
fmt.Printf("Fetching: %s\n", searchURL) // Debug print
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("error making search request: %w", err)
return nil, fmt.Errorf("error creating request: %w", err)
}
// Add headers to mimic a browser
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("error making request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("error reading response: %w", err)
}
content := string(body)
// Extract information
titles := titleRegex.FindAllStringSubmatch(content, -1)
channels := channelRegex.FindAllStringSubmatch(content, -1)
durations := durationRegex.FindAllStringSubmatch(content, -1)
uploadDates := uploadDateRegex.FindAllStringSubmatch(content, -1)
videoIDs := videoIDRegex.FindAllStringSubmatch(content, -1)
fmt.Printf("Found: %d titles, %d channels, %d durations, %d dates, %d IDs\n",
len(titles), len(channels), len(durations), len(uploadDates), len(videoIDs))
var videos []Video
for _, item := range response.Items {
video := Video{
Title: item.Snippet.Title,
URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", item.Id.VideoId),
Channel: item.Snippet.ChannelTitle,
Thumbnail: item.Snippet.Thumbnails.Default.Url,
UploadDate: item.Snippet.PublishedAt,
for i := 0; i < len(titles) && i < 10; i++ { // Limit to 10 results like the shell script
if i >= len(videoIDs) {
break
}
video := Video{
Title: unescapeHTML(titles[i][1]),
URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoIDs[i][1]),
}
if i < len(channels) {
video.Channel = unescapeHTML(channels[i][1])
}
if i < len(durations) {
video.Duration = durations[i][1]
}
if i < len(uploadDates) {
video.UploadDate = uploadDates[i][1]
}
videos = append(videos, video)
}
// Get additional video details (duration, views) in a single request
videoIds := make([]string, len(response.Items))
for i, item := range response.Items {
videoIds[i] = item.Id.VideoId
}
// Get video statistics
statsCall := youtubeService.Videos.List([]string{"contentDetails", "statistics"}).
Id(videoIds...)
statsResponse, err := statsCall.Do()
if err != nil {
return nil, fmt.Errorf("error fetching video details: %w", err)
}
// Update videos with additional information
for i, stat := range statsResponse.Items {
if i < len(videos) {
videos[i].Duration = formatDuration(stat.ContentDetails.Duration)
videos[i].Views = formatViews(stat.Statistics.ViewCount)
videos[i].Title = html.UnescapeString(videos[i].Title)
videos[i].UploadDate = formatUploadDate(videos[i].UploadDate)
}
if len(videos) == 0 {
return nil, fmt.Errorf("no videos found")
}
return videos, nil
}
func unescapeHTML(s string) string {
replacements := map[string]string{
"\\u0026": "&",
"\\\"": "\"",
"\\u003c": "<",
"\\u003e": ">",
"&quot;": "\"",
"&#39;": "'",
}
for old, new := range replacements {
s = strings.ReplaceAll(s, old, new)
}
return s
}