GtaVUsersInfo/Helpers/CarParser.cs

127 lines
4.7 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Newtonsoft.Json;
using OpenQA.Selenium;
using OpenQA.Selenium.Firefox;
using OpenQA.Selenium.Support.UI;
namespace GtaVUsersInfo.Helpers
{
public class CarParser : IDisposable
{
private IWebDriver driver;
public CarParser()
{
var chromeOptions = new FirefoxOptions();
chromeOptions.AddArgument("--no-sandbox");
driver = new FirefoxDriver(chromeOptions);
}
public async Task<List<Car>> ParseCarsFromPages(int maxPages)
{
var cars = new List<Car>();
for (int page = 1; page <= maxPages; page++)
{
var url = $"https://gtacars.net/?page={page}";
var pageCars = await ParseCarsFromPage(url);
cars.AddRange(pageCars);
}
return cars;
}
private async Task<List<Car>> ParseCarsFromPage(string url)
{
var cars = new List<Car>();
// Открываем страницу в браузере
driver.Navigate().GoToUrl(url);
// Ожидание, пока динамические элементы не подгрузятся
var wait = new WebDriverWait(driver, TimeSpan.FromSeconds(60));
// Ждём, пока появится хотя бы один элемент с ценой
wait.Until(drv => drv.FindElements(By.CssSelector("span.v-popper--has-tooltip")).Any(e => e.Text.Contains("$")));
IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
Thread.Sleep(100);
js.ExecuteScript("window.scrollTo(0, document.body.scrollHeight / 4);");
Thread.Sleep(100);
js.ExecuteScript("window.scrollTo(0, document.body.scrollHeight / 3);");
Thread.Sleep(100);
js.ExecuteScript("window.scrollTo(0, document.body.scrollHeight / 2);");
Thread.Sleep(100);
js.ExecuteScript("window.scrollTo(0, document.body.scrollHeight);");
Thread.Sleep(500);
wait.Until(drv => drv.FindElements(By.CssSelector("img[src*='data:image/gif']")).Count == 0);
// После этого можно парсить элементы
var carElements = driver.FindElements(By.CssSelector("div.relative.flex.flex-col"));
foreach (var carElement in carElements)
{
var car = new Car();
// Имя автомобиля
var nameElement = carElement.FindElement(By.CssSelector("span[title]"));
car.Name = nameElement.Text;
// Цена автомобиля
var priceElement = carElement.FindElement(By.CssSelector("span.v-popper--has-tooltip"));
var priceText = priceElement.Text;
car.Price = ParsePrice(priceText);
// Фото
var photoElement = carElement.FindElement(By.CssSelector("img.w-full"));
car.Photo = photoElement.GetAttribute("src");
// Производитель
var manufacturerElement = carElement.FindElement(By.CssSelector("a[href*='filter_manufacturer']"));
car.Manufacturer = manufacturerElement.Text;
// Класс автомобиля
var classElement = carElement.FindElement(By.CssSelector("a[href*='filter_class']"));
car.Class = classElement.Text;
// Модель (часть URL)
var modelElement = carElement.FindElement(By.CssSelector("a[href]"));
car.Model = modelElement.GetAttribute("href").Split('/').Last();
cars.Add(car);
}
return cars;
}
private int ParsePrice(string priceText)
{
if (string.IsNullOrWhiteSpace(priceText)) return 0;
priceText = priceText.Replace("$", "").Replace(",", "").Replace(" ", "");
return int.TryParse(priceText, out int price) ? price : 0;
}
public void SaveToJson(List<Car> cars, string filePath)
{
if (!Directory.Exists(Path.GetDirectoryName(filePath)))
{
Directory.CreateDirectory(Path.GetDirectoryName(filePath));
}
var json = JsonConvert.SerializeObject(cars, Formatting.Indented);
File.WriteAllText(filePath, json);
}
public void Dispose()
{
if (driver != null)
{
driver.Quit();
driver.Dispose();
}
}
}
}