package scraper import ( "fmt" "io" "net/http" "net/url" "regexp" "strings" "time" ) type Video struct { Title string URL string Channel string Duration string UploadDate string } // Updated regular expressions to match the shell script var ( titleRegex = regexp.MustCompile(`"title":\{"runs":\[\{"text":"([^"]+)"\}\]`) channelRegex = regexp.MustCompile(`"ownerText":\{"runs":\[\{"text":"([^"]+)"\}\]`) durationRegex = regexp.MustCompile(`"lengthText":\{"accessibility":\{"accessibilityData":\{"label":"[^"]*"\}\},"simpleText":"([^"]+)"`) uploadDateRegex = regexp.MustCompile(`"publishedTimeText":\{"simpleText":"([^"]+)"\}`) videoIDRegex = regexp.MustCompile(`watch\?v=([^"]+)`) ) func FetchVideos(query string) ([]Video, error) { client := &http.Client{ Timeout: 10 * time.Second, } // Format URL similar to the shell script searchURL := fmt.Sprintf("https://www.youtube.com/results?search_query=%s", url.QueryEscape(strings.ReplaceAll(query, " ", "+"))) fmt.Printf("Fetching: %s\n", searchURL) // Debug print req, err := http.NewRequest("GET", searchURL, nil) if err != nil { return nil, fmt.Errorf("error creating request: %w", err) } // Add headers to mimic a browser req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") req.Header.Set("Accept-Language", "en-US,en;q=0.9") resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("error making request: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("error reading response: %w", err) } content := string(body) // Extract information titles := titleRegex.FindAllStringSubmatch(content, -1) channels := channelRegex.FindAllStringSubmatch(content, -1) durations := durationRegex.FindAllStringSubmatch(content, -1) uploadDates := uploadDateRegex.FindAllStringSubmatch(content, -1) videoIDs := videoIDRegex.FindAllStringSubmatch(content, -1) fmt.Printf("Found: %d titles, %d channels, %d durations, %d dates, %d IDs\n", len(titles), len(channels), len(durations), len(uploadDates), len(videoIDs)) var videos []Video for i := 0; i < len(titles) && i < 10; i++ { // Limit to 10 results like the shell script if i >= len(videoIDs) { break } video := Video{ Title: unescapeHTML(titles[i][1]), URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoIDs[i][1]), } if i < len(channels) { video.Channel = unescapeHTML(channels[i][1]) } if i < len(durations) { video.Duration = durations[i][1] } if i < len(uploadDates) { video.UploadDate = uploadDates[i][1] } videos = append(videos, video) } if len(videos) == 0 { return nil, fmt.Errorf("no videos found") } return videos, nil } func unescapeHTML(s string) string { replacements := map[string]string{ "\\u0026": "&", "\\\"": "\"", "\\u003c": "<", "\\u003e": ">", """: "\"", "'": "'", } for old, new := range replacements { s = strings.ReplaceAll(s, old, new) } return s }