working without api key
This commit is contained in:
parent
b97b89efb8
commit
999376e5f7
5 changed files with 371 additions and 166 deletions
|
@ -1,15 +1,13 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
// "log"
|
||||
"html"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"google.golang.org/api/option"
|
||||
"google.golang.org/api/youtube/v3"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Video struct {
|
||||
|
@ -17,155 +15,103 @@ type Video struct {
|
|||
URL string
|
||||
Channel string
|
||||
Duration string
|
||||
Views string
|
||||
Thumbnail string
|
||||
UploadDate string
|
||||
}
|
||||
|
||||
// Replace with your actual API key
|
||||
const API_KEY = "AIzaSyAzsihRkp8mYTOXLOkVN09yTqld9TJ4Nts"
|
||||
|
||||
func formatViews(count uint64) string {
|
||||
switch {
|
||||
case count >= 1000000000:
|
||||
return fmt.Sprintf("%.1fB views", float64(count)/1000000000)
|
||||
case count >= 1000000:
|
||||
return fmt.Sprintf("%.1fM views", float64(count)/1000000)
|
||||
case count >= 1000:
|
||||
return fmt.Sprintf("%.1fK views", float64(count)/1000)
|
||||
default:
|
||||
return fmt.Sprintf("%d views", count)
|
||||
}
|
||||
}
|
||||
|
||||
func formatDuration(duration string) string {
|
||||
// Remove PT from the start
|
||||
duration = strings.TrimPrefix(duration, "PT")
|
||||
|
||||
var result strings.Builder
|
||||
|
||||
// Handle hours
|
||||
if i := strings.Index(duration, "H"); i != -1 {
|
||||
result.WriteString(duration[:i])
|
||||
result.WriteString(":")
|
||||
duration = duration[i+1:]
|
||||
}
|
||||
|
||||
// Handle minutes
|
||||
if i := strings.Index(duration, "M"); i != -1 {
|
||||
minutes := duration[:i]
|
||||
if len(minutes) == 1 {
|
||||
result.WriteString("0")
|
||||
}
|
||||
result.WriteString(minutes)
|
||||
result.WriteString(":")
|
||||
duration = duration[i+1:]
|
||||
} else if result.Len() > 0 {
|
||||
result.WriteString("00:")
|
||||
}
|
||||
|
||||
// Handle seconds
|
||||
if i := strings.Index(duration, "S"); i != -1 {
|
||||
seconds := duration[:i]
|
||||
if len(seconds) == 1 {
|
||||
result.WriteString("0")
|
||||
}
|
||||
result.WriteString(seconds)
|
||||
} else {
|
||||
result.WriteString("00")
|
||||
}
|
||||
|
||||
return result.String()
|
||||
}
|
||||
|
||||
func formatUploadDate(uploadDate string) string {
|
||||
t, err := time.Parse(time.RFC3339, uploadDate)
|
||||
if err != nil {
|
||||
return uploadDate
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
diff := now.Sub(t)
|
||||
days := int(diff.Hours() / 24)
|
||||
|
||||
formattedDate := t.Format("02-01-2006")
|
||||
|
||||
// If video is less than 30 days old, add "X days ago"
|
||||
if days < 30 {
|
||||
var timeAgo string
|
||||
switch {
|
||||
case days == 0:
|
||||
hours := int(diff.Hours())
|
||||
if hours == 0 {
|
||||
timeAgo = "just now"
|
||||
} else {
|
||||
timeAgo = fmt.Sprintf("%dh ago", hours)
|
||||
}
|
||||
case days == 1:
|
||||
timeAgo = "1 day ago"
|
||||
default:
|
||||
timeAgo = fmt.Sprintf("%d days ago", days)
|
||||
}
|
||||
return fmt.Sprintf("%s (%s)", formattedDate, timeAgo)
|
||||
}
|
||||
|
||||
return formattedDate
|
||||
}
|
||||
// Updated regular expressions to match the shell script
|
||||
var (
|
||||
titleRegex = regexp.MustCompile(`"title":\{"runs":\[\{"text":"([^"]+)"\}\]`)
|
||||
channelRegex = regexp.MustCompile(`"ownerText":\{"runs":\[\{"text":"([^"]+)"\}\]`)
|
||||
durationRegex = regexp.MustCompile(`"lengthText":\{"accessibility":\{"accessibilityData":\{"label":"[^"]*"\}\},"simpleText":"([^"]+)"`)
|
||||
uploadDateRegex = regexp.MustCompile(`"publishedTimeText":\{"simpleText":"([^"]+)"\}`)
|
||||
videoIDRegex = regexp.MustCompile(`watch\?v=([^"]+)`)
|
||||
)
|
||||
|
||||
func FetchVideos(query string) ([]Video, error) {
|
||||
ctx := context.Background()
|
||||
youtubeService, err := youtube.NewService(ctx, option.WithAPIKey(API_KEY))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating YouTube client: %w", err)
|
||||
client := &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
}
|
||||
|
||||
// Make the search request
|
||||
call := youtubeService.Search.List([]string{"snippet"}).
|
||||
Q(query).
|
||||
MaxResults(50).
|
||||
Type("video").
|
||||
VideoDuration("any")
|
||||
// Format URL similar to the shell script
|
||||
searchURL := fmt.Sprintf("https://www.youtube.com/results?search_query=%s",
|
||||
url.QueryEscape(strings.ReplaceAll(query, " ", "+")))
|
||||
|
||||
response, err := call.Do()
|
||||
fmt.Printf("Fetching: %s\n", searchURL) // Debug print
|
||||
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error making search request: %w", err)
|
||||
return nil, fmt.Errorf("error creating request: %w", err)
|
||||
}
|
||||
|
||||
// Add headers to mimic a browser
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error making request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading response: %w", err)
|
||||
}
|
||||
content := string(body)
|
||||
|
||||
// Extract information
|
||||
titles := titleRegex.FindAllStringSubmatch(content, -1)
|
||||
channels := channelRegex.FindAllStringSubmatch(content, -1)
|
||||
durations := durationRegex.FindAllStringSubmatch(content, -1)
|
||||
uploadDates := uploadDateRegex.FindAllStringSubmatch(content, -1)
|
||||
videoIDs := videoIDRegex.FindAllStringSubmatch(content, -1)
|
||||
|
||||
fmt.Printf("Found: %d titles, %d channels, %d durations, %d dates, %d IDs\n",
|
||||
len(titles), len(channels), len(durations), len(uploadDates), len(videoIDs))
|
||||
|
||||
var videos []Video
|
||||
for _, item := range response.Items {
|
||||
video := Video{
|
||||
Title: item.Snippet.Title,
|
||||
URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", item.Id.VideoId),
|
||||
Channel: item.Snippet.ChannelTitle,
|
||||
Thumbnail: item.Snippet.Thumbnails.Default.Url,
|
||||
UploadDate: item.Snippet.PublishedAt,
|
||||
for i := 0; i < len(titles) && i < 10; i++ { // Limit to 10 results like the shell script
|
||||
if i >= len(videoIDs) {
|
||||
break
|
||||
}
|
||||
|
||||
video := Video{
|
||||
Title: unescapeHTML(titles[i][1]),
|
||||
URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoIDs[i][1]),
|
||||
}
|
||||
|
||||
if i < len(channels) {
|
||||
video.Channel = unescapeHTML(channels[i][1])
|
||||
}
|
||||
if i < len(durations) {
|
||||
video.Duration = durations[i][1]
|
||||
}
|
||||
if i < len(uploadDates) {
|
||||
video.UploadDate = uploadDates[i][1]
|
||||
}
|
||||
|
||||
videos = append(videos, video)
|
||||
}
|
||||
|
||||
// Get additional video details (duration, views) in a single request
|
||||
videoIds := make([]string, len(response.Items))
|
||||
for i, item := range response.Items {
|
||||
videoIds[i] = item.Id.VideoId
|
||||
}
|
||||
|
||||
// Get video statistics
|
||||
statsCall := youtubeService.Videos.List([]string{"contentDetails", "statistics"}).
|
||||
Id(videoIds...)
|
||||
statsResponse, err := statsCall.Do()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error fetching video details: %w", err)
|
||||
}
|
||||
|
||||
// Update videos with additional information
|
||||
for i, stat := range statsResponse.Items {
|
||||
if i < len(videos) {
|
||||
videos[i].Duration = formatDuration(stat.ContentDetails.Duration)
|
||||
videos[i].Views = formatViews(stat.Statistics.ViewCount)
|
||||
videos[i].Title = html.UnescapeString(videos[i].Title)
|
||||
videos[i].UploadDate = formatUploadDate(videos[i].UploadDate)
|
||||
}
|
||||
if len(videos) == 0 {
|
||||
return nil, fmt.Errorf("no videos found")
|
||||
}
|
||||
|
||||
return videos, nil
|
||||
}
|
||||
|
||||
func unescapeHTML(s string) string {
|
||||
replacements := map[string]string{
|
||||
"\\u0026": "&",
|
||||
"\\\"": "\"",
|
||||
"\\u003c": "<",
|
||||
"\\u003e": ">",
|
||||
""": "\"",
|
||||
"'": "'",
|
||||
}
|
||||
|
||||
for old, new := range replacements {
|
||||
s = strings.ReplaceAll(s, old, new)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue