<?php

/**
 * کلاس دریافت‌کننده اخبار پیشرفته
 * Advanced News Fetcher with Multiple Sources
 */

use GuzzleHttp\Client;
use Monolog\Logger;
use Monolog\Handler\StreamHandler;

class NewsFetcher
{
    private Client $client;
    private Logger $logger;
    private DatabaseManager $db;
    private array $cache = [];
    private int $maxConcurrentRequests = 5;

    public function __construct(DatabaseManager $db)
    {
        $this->db = $db;
        $this->logger = new Logger('NewsFetcher');
        $this->logger->pushHandler(new StreamHandler(__DIR__ . '/../storage/logs/news_fetcher.log'));
        
        $this->client = new Client([
            'timeout' => 30,
            'connect_timeout' => 10,
            'headers' => [
                'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            ]
        ]);
    }

    /**
     * دریافت اخبار از تمام منابع
     */
    public function fetchAllNews(): array
    {
        $this->logger->info('شروع دریافت اخبار از تمام منابع');
        
        $allArticles = [];
        
        try {
            // دریافت از NewsAPI
            $newsApiArticles = $this->fetchFromNewsApi();
            $allArticles = array_merge($allArticles, $newsApiArticles);
            $this->logger->info('اخبار NewsAPI: ' . count($newsApiArticles) . ' مقاله');
            
            // دریافت از RSS منابع انگلیسی
            $englishRssArticles = $this->fetchFromEnglishRss();
            $allArticles = array_merge($allArticles, $englishRssArticles);
            $this->logger->info('اخبار RSS انگلیسی: ' . count($englishRssArticles) . ' مقاله');
            
            // دریافت از RSS منابع فارسی
            $persianRssArticles = $this->fetchFromPersianRss();
            $allArticles = array_merge($allArticles, $persianRssArticles);
            $this->logger->info('اخبار RSS فارسی: ' . count($persianRssArticles) . ' مقاله');
            
            // دریافت از منابع کریپتو
            $cryptoArticles = $this->fetchFromCryptoSources();
            $allArticles = array_merge($allArticles, $cryptoArticles);
            $this->logger->info('اخبار کریپتو: ' . count($cryptoArticles) . ' مقاله');
            
            // دریافت از منابع فارکس
            $forexArticles = $this->fetchFromForexSources();
            $allArticles = array_merge($allArticles, $forexArticles);
            $this->logger->info('اخبار فارکس: ' . count($forexArticles) . ' مقاله');
            
            // حذف تکراری‌ها
            $uniqueArticles = $this->removeDuplicates($allArticles);
            $this->logger->info('مجموع مقالات منحصر به فرد: ' . count($uniqueArticles));
            
            return $uniqueArticles;
            
        } catch (Exception $e) {
            $this->logger->error('خطا در دریافت اخبار: ' . $e->getMessage());
            return [];
        }
    }

    /**
     * دریافت اخبار از NewsAPI
     */
    public function fetchFromNewsApi(): array
    {
        $articles = [];
        $apiKey = Config::get('apis.news_api.key');
        
        if (empty($apiKey)) {
            $this->logger->warning('کلید NewsAPI تنظیم نشده است');
            return $articles;
        }

        try {
            $sources = Config::get('news_sources.english', []);
            $limitedSources = array_slice($sources, 0, 5); // محدود کردن برای جلوگیری از URL طولانی
            
            $fromDate = (new DateTime())->sub(new DateInterval('P7D'))->format(DateTime::ATOM);
            
            $url = 'https://newsapi.org/v2/everything?' . http_build_query([
                'sources' => implode(',', $limitedSources),
                'from' => $fromDate,
                'language' => 'en',
                'pageSize' => 50,
                'sortBy' => 'publishedAt',
                'apiKey' => $apiKey
            ]);

            $this->logger->info('درخواست اخبار از NewsAPI');
            $response = $this->client->get($url);
            $data = json_decode($response->getBody(), true);

            if ($data['status'] === 'error') {
                $this->logger->error('خطای NewsAPI: ' . $data['message']);
                return $articles;
            }

            foreach ($data['articles'] ?? [] as $item) {
                if (empty($item['title']) || empty($item['url'])) {
                    continue;
                }

                $article = [
                    'hash_id' => md5($item['title'] . $item['url']),
                    'title' => $item['title'],
                    'description' => $item['description'] ?? '',
                    'content' => $item['content'] ?? '',
                    'url' => $item['url'],
                    'image_url' => $item['urlToImage'] ?? '',
                    'source' => $item['source']['name'] ?? 'NewsAPI',
                    'published_at' => $item['publishedAt'] ?? date('Y-m-d H:i:s'),
                    'category' => $this->categorizeArticle($item['title']),
                    'language' => 'en'
                ];

                $articles[] = $article;
            }

            $this->updateSourceStats('NewsAPI', count($articles));

        } catch (RequestException $e) {
            $this->logger->error('خطا در اتصال به NewsAPI: ' . $e->getMessage());
        }

        return $articles;
    }

    /**
     * دریافت اخبار از RSS منابع انگلیسی
     */
    public function fetchFromEnglishRss(): array
    {
        $articles = [];
        $sources = [
            'https://feeds.bbci.co.uk/news/rss.xml',
            'https://rss.cnn.com/rss/edition.rss',
            'https://www.theguardian.com/world/rss',
            'https://feeds.reuters.com/reuters/topNews',
            'https://feeds.bbci.co.uk/news/business/rss.xml',
            'https://feeds.bbci.co.uk/news/technology/rss.xml'
        ];

        foreach ($sources as $sourceUrl) {
            try {
                $rssArticles = $this->parseRssFeed($sourceUrl, 'en');
                $articles = array_merge($articles, $rssArticles);
                $this->logger->info("RSS انگلیسی از $sourceUrl: " . count($rssArticles) . " مقاله");
            } catch (Exception $e) {
                $this->logger->error("خطا در دریافت RSS انگلیسی از $sourceUrl: " . $e->getMessage());
            }
        }

        return $articles;
    }

    /**
     * دریافت اخبار از RSS منابع فارسی
     */
    public function fetchFromPersianRss(): array
    {
        $articles = [];
        $sources = [
            'https://www.tasnimnews.com/fa/rss/feed/0/8/0/%D8%A7%D9%82%D8%AA%D8%B5%D8%A7%D8%AF',
            'https://www.mehrnews.com/rss',
            'https://www.isna.ir/rss',
            'https://www.farsnews.ir/rss',
            'https://www.irna.ir/rss'
        ];

        foreach ($sources as $sourceUrl) {
            try {
                $rssArticles = $this->parseRssFeed($sourceUrl, 'fa');
                $articles = array_merge($articles, $rssArticles);
                $this->logger->info("RSS فارسی از $sourceUrl: " . count($rssArticles) . " مقاله");
            } catch (Exception $e) {
                $this->logger->error("خطا در دریافت RSS فارسی از $sourceUrl: " . $e->getMessage());
            }
        }

        return $articles;
    }

    /**
     * دریافت اخبار از منابع کریپتو
     */
    public function fetchFromCryptoSources(): array
    {
        $articles = [];
        $sources = [
            'https://cointelegraph.com/rss',
            'https://coindesk.com/arc/outboundfeeds/rss/',
            'https://bitcoinmagazine.com/rss',
            'https://decrypt.co/feed'
        ];

        foreach ($sources as $sourceUrl) {
            try {
                $rssArticles = $this->parseRssFeed($sourceUrl, 'en', 'crypto');
                $articles = array_merge($articles, $rssArticles);
                $this->logger->info("RSS کریپتو از $sourceUrl: " . count($rssArticles) . " مقاله");
            } catch (Exception $e) {
                $this->logger->error("خطا در دریافت RSS کریپتو از $sourceUrl: " . $e->getMessage());
            }
        }

        return $articles;
    }

    /**
     * دریافت اخبار از منابع فارکس
     */
    public function fetchFromForexSources(): array
    {
        $articles = [];
        $sources = [
            'https://www.forexfactory.com/rss.php',
            'https://www.dailyfx.com/feeds/market-news',
            'https://www.fxstreet.com/rss'
        ];

        foreach ($sources as $sourceUrl) {
            try {
                $rssArticles = $this->parseRssFeed($sourceUrl, 'en', 'forex');
                $articles = array_merge($articles, $rssArticles);
                $this->logger->info("RSS فارکس از $sourceUrl: " . count($rssArticles) . " مقاله");
            } catch (Exception $e) {
                $this->logger->error("خطا در دریافت RSS فارکس از $sourceUrl: " . $e->getMessage());
            }
        }

        return $articles;
    }

    /**
     * پارس کردن فید RSS
     */
    private function parseRssFeed(string $url, string $language = 'en', string $defaultCategory = 'general'): array
    {
        $articles = [];
        
        try {
            $this->logger->info("پارس RSS از: $url");
            
            $response = $this->client->get($url);
            $xml = $response->getBody();
            
            $feed = new SimplePie();
            $feed->set_raw_data($xml);
            $feed->enable_cache(false);
            $feed->init();
            
            if ($feed->error()) {
                $this->logger->error("خطای SimplePie: " . $feed->error());
                return $articles;
            }

            $items = array_slice($feed->get_items(), 0, 15); // حداکثر 15 مقاله از هر فید
            
            foreach ($items as $item) {
                $title = $item->get_title();
                $link = $item->get_link();
                
                if (empty($title) || empty($link)) {
                    continue;
                }

                // استخراج تصویر
                $imageUrl = '';
                if ($item->get_enclosure()) {
                    $enclosure = $item->get_enclosure();
                    if (strpos($enclosure->get_type(), 'image') === 0) {
                        $imageUrl = $enclosure->get_link();
                    }
                }

                // استخراج تاریخ
                $publishedAt = $item->get_date();
                if (!$publishedAt) {
                    $publishedAt = date('Y-m-d H:i:s');
                }

                $article = [
                    'hash_id' => md5($title . $link),
                    'title' => $title,
                    'description' => strip_tags($item->get_description()),
                    'content' => strip_tags($item->get_content()),
                    'url' => $link,
                    'image_url' => $imageUrl,
                    'source' => $this->extractSourceName($url),
                    'published_at' => $publishedAt,
                    'category' => $this->categorizeArticle($title) ?: $defaultCategory,
                    'language' => $language
                ];

                $articles[] = $article;
            }

        } catch (Exception $e) {
            $this->logger->error("خطا در پارس RSS از $url: " . $e->getMessage());
        }

        return $articles;
    }

    /**
     * استخراج نام منبع از URL
     */
    private function extractSourceName(string $url): string
    {
        $parsed = parse_url($url);
        $domain = $parsed['host'] ?? '';
        $domain = str_replace('www.', '', $domain);
        
        $nameMap = [
            'bbci.co.uk' => 'BBC News',
            'cnn.com' => 'CNN',
            'theguardian.com' => 'The Guardian',
            'reuters.com' => 'Reuters',
            'tasnimnews.com' => 'تسنیم',
            'mehrnews.com' => 'مهر',
            'isna.ir' => 'ایسنا',
            'farsnews.ir' => 'فارس',
            'irna.ir' => 'ایرنا',
            'cointelegraph.com' => 'CoinTelegraph',
            'coindesk.com' => 'CoinDesk',
            'bitcoinmagazine.com' => 'Bitcoin Magazine',
            'decrypt.co' => 'Decrypt',
            'forexfactory.com' => 'Forex Factory',
            'dailyfx.com' => 'DailyFX',
            'fxstreet.com' => 'FXStreet'
        ];
        
        return $nameMap[$domain] ?? ucfirst(explode('.', $domain)[0]);
    }

    /**
     * دسته‌بندی مقاله بر اساس عنوان
     */
    private function categorizeArticle(string $title): string
    {
        $titleLower = strtolower($title);
        $keywords = Config::get('keywords', []);
        
        foreach ($keywords as $category => $categoryKeywords) {
            foreach ($categoryKeywords as $keyword) {
                if (strpos($titleLower, $keyword) !== false) {
                    return $category;
                }
            }
        }
        
        return 'general';
    }

    /**
     * حذف مقالات تکراری
     */
    private function removeDuplicates(array $articles): array
    {
        $unique = [];
        $seenHashes = [];
        
        foreach ($articles as $article) {
            if (!in_array($article['hash_id'], $seenHashes)) {
                $unique[] = $article;
                $seenHashes[] = $article['hash_id'];
            }
        }
        
        return $unique;
    }

    /**
     * به‌روزرسانی آمار منبع
     */
    private function updateSourceStats(string $sourceName, int $articleCount): void
    {
        try {
            $sql = "UPDATE news_sources 
                    SET fetch_count = fetch_count + 1, 
                        last_fetch = CURRENT_TIMESTAMP 
                    WHERE name = :name";
            
            $this->db->query($sql, ['name' => $sourceName]);
        } catch (Exception $e) {
            $this->logger->error("خطا در به‌روزرسانی آمار منبع: " . $e->getMessage());
        }
    }

    /**
     * دریافت اخبار بر اساس کلمات کلیدی
     */
    public function fetchByKeywords(array $keywords, int $limit = 20): array
    {
        $articles = [];
        
        foreach ($keywords as $keyword) {
            try {
                // جستجو در NewsAPI
                $apiKey = Config::get('apis.news_api.key');
                if ($apiKey) {
                    $url = 'https://newsapi.org/v2/everything?' . http_build_query([
                        'q' => $keyword,
                        'language' => 'en',
                        'pageSize' => 10,
                        'sortBy' => 'publishedAt',
                        'apiKey' => $apiKey
                    ]);
                    
                    $response = $this->client->get($url);
                    $data = json_decode($response->getBody(), true);
                    
                    foreach ($data['articles'] ?? [] as $item) {
                        if (empty($item['title']) || empty($item['url'])) {
                            continue;
                        }
                        
                        $article = [
                            'hash_id' => md5($item['title'] . $item['url']),
                            'title' => $item['title'],
                            'description' => $item['description'] ?? '',
                            'content' => $item['content'] ?? '',
                            'url' => $item['url'],
                            'image_url' => $item['urlToImage'] ?? '',
                            'source' => $item['source']['name'] ?? 'NewsAPI',
                            'published_at' => $item['publishedAt'] ?? date('Y-m-d H:i:s'),
                            'category' => $this->categorizeArticle($item['title']),
                            'language' => 'en'
                        ];
                        
                        $articles[] = $article;
                    }
                }
            } catch (Exception $e) {
                $this->logger->error("خطا در جستجوی کلمه کلیدی '$keyword': " . $e->getMessage());
            }
        }
        
        // حذف تکراری‌ها و محدود کردن تعداد
        $uniqueArticles = $this->removeDuplicates($articles);
        return array_slice($uniqueArticles, 0, $limit);
    }

    /**
     * دریافت آمار منابع خبری
     */
    public function getSourceStats(): array
    {
        try {
            $sql = "SELECT name, fetch_count, last_fetch, error_count, is_active 
                    FROM news_sources 
                    ORDER BY fetch_count DESC";
            
            return $this->db->query($sql);
        } catch (Exception $e) {
            $this->logger->error("خطا در دریافت آمار منابع: " . $e->getMessage());
            return [];
        }
    }

    /**
     * تست اتصال به منابع
     */
    public function testConnections(): array
    {
        $results = [];
        $sources = Config::get('news_sources', []);
        
        foreach ($sources as $type => $sourceList) {
            foreach ($sourceList as $source) {
                $url = is_array($source) ? $source['url'] : $source;
                $name = is_array($source) ? $source['name'] : $this->extractSourceName($url);
                
                try {
                    $startTime = microtime(true);
                    $response = $this->client->get($url, ['timeout' => 10]);
                    $endTime = microtime(true);
                    
                    $results[] = [
                        'name' => $name,
                        'url' => $url,
                        'status' => 'success',
                        'response_time' => round(($endTime - $startTime) * 1000, 2),
                        'status_code' => $response->getStatusCode()
                    ];
                } catch (Exception $e) {
                    $results[] = [
                        'name' => $name,
                        'url' => $url,
                        'status' => 'error',
                        'error' => $e->getMessage()
                    ];
                }
            }
        }
        
        return $results;
    }
}
