GtaVUsersInfo/Helpers/CarParser.cs

127 lines
4.7 KiB
C#
Raw Permalink Normal View History

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Newtonsoft.Json;
using OpenQA.Selenium;
using OpenQA.Selenium.Firefox;
using OpenQA.Selenium.Support.UI;
namespace GtaVUsersInfo.Helpers
{
public class CarParser : IDisposable
{
private IWebDriver driver;
public CarParser()
{
var chromeOptions = new FirefoxOptions();
chromeOptions.AddArgument("--no-sandbox");
driver = new FirefoxDriver(chromeOptions);
}
public async Task<List<Car>> ParseCarsFromPages(int maxPages)
{
var cars = new List<Car>();
for (int page = 1; page <= maxPages; page++)
{
var url = $"https://gtacars.net/?page={page}";
var pageCars = await ParseCarsFromPage(url);
cars.AddRange(pageCars);
}
return cars;
}
private async Task<List<Car>> ParseCarsFromPage(string url)
{
var cars = new List<Car>();
// Открываем страницу в браузере
driver.Navigate().GoToUrl(url);
// Ожидание, пока динамические элементы не подгрузятся
var wait = new WebDriverWait(driver, TimeSpan.FromSeconds(60));
// Ждём, пока появится хотя бы один элемент с ценой
wait.Until(drv => drv.FindElements(By.CssSelector("span.v-popper--has-tooltip")).Any(e => e.Text.Contains("$")));
IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
Thread.Sleep(100);
js.ExecuteScript("window.scrollTo(0, document.body.scrollHeight / 4);");
Thread.Sleep(100);
js.ExecuteScript("window.scrollTo(0, document.body.scrollHeight / 3);");
Thread.Sleep(100);
js.ExecuteScript("window.scrollTo(0, document.body.scrollHeight / 2);");
Thread.Sleep(100);
js.ExecuteScript("window.scrollTo(0, document.body.scrollHeight);");
Thread.Sleep(500);
wait.Until(drv => drv.FindElements(By.CssSelector("img[src*='data:image/gif']")).Count == 0);
// После этого можно парсить элементы
var carElements = driver.FindElements(By.CssSelector("div.relative.flex.flex-col"));
foreach (var carElement in carElements)
{
var car = new Car();
// Имя автомобиля
var nameElement = carElement.FindElement(By.CssSelector("span[title]"));
car.Name = nameElement.Text;
// Цена автомобиля
var priceElement = carElement.FindElement(By.CssSelector("span.v-popper--has-tooltip"));
var priceText = priceElement.Text;
car.Price = ParsePrice(priceText);
// Фото
var photoElement = carElement.FindElement(By.CssSelector("img.w-full"));
car.Photo = photoElement.GetAttribute("src");
// Производитель
var manufacturerElement = carElement.FindElement(By.CssSelector("a[href*='filter_manufacturer']"));
car.Manufacturer = manufacturerElement.Text;
// Класс автомобиля
var classElement = carElement.FindElement(By.CssSelector("a[href*='filter_class']"));
car.Class = classElement.Text;
// Модель (часть URL)
var modelElement = carElement.FindElement(By.CssSelector("a[href]"));
car.Model = modelElement.GetAttribute("href").Split('/').Last();
cars.Add(car);
}
return cars;
}
private int ParsePrice(string priceText)
{
if (string.IsNullOrWhiteSpace(priceText)) return 0;
priceText = priceText.Replace("$", "").Replace(",", "").Replace(" ", "");
return int.TryParse(priceText, out int price) ? price : 0;
}
public void SaveToJson(List<Car> cars, string filePath)
{
if (!Directory.Exists(Path.GetDirectoryName(filePath)))
{
Directory.CreateDirectory(Path.GetDirectoryName(filePath));
}
var json = JsonConvert.SerializeObject(cars, Formatting.Indented);
File.WriteAllText(filePath, json);
}
public void Dispose()
{
if (driver != null)
{
driver.Quit();
driver.Dispose();
}
}
}
}