#!/usr/bin/env python3 import json from collections import Counter # Read the articles data with open('articles_temp.json', 'r') as f: articles = json.load(f) # Count thumbnail usage thumbnail_counts = Counter() thumbnail_to_articles = {} for article in articles: thumbnail = article.get('thumbnail', '') if thumbnail: thumbnail_counts[thumbnail] += 1 if thumbnail not in thumbnail_to_articles: thumbnail_to_articles[thumbnail] = [] thumbnail_to_articles[thumbnail].append({ 'id': article['id'], 'title': article['title'][:60] + '...' if len(article['title']) > 60 else article['title'], 'outletId': article['outletId'] }) # Find duplicates print("=== DUPLICATE THUMBNAILS ===") duplicates_found = False for thumbnail, count in thumbnail_counts.most_common(): if count > 1: duplicates_found = True print(f"\n{thumbnail} (used {count} times):") for article in thumbnail_to_articles[thumbnail]: print(f" - {article['outletId']}: {article['title']} (ID: {article['id']})") if not duplicates_found: print("No duplicate thumbnails found!") # Show some stats print(f"\n=== STATISTICS ===") print(f"Total articles: {len(articles)}") print(f"Unique thumbnails: {len(thumbnail_counts)}") print(f"Articles with duplicates: {sum(count for count in thumbnail_counts.values() if count > 1)}")