import { test } from '@japa/runner' import { ScraperService, ScraperError } from '#services/scraper_service' import { readFile } from 'node:fs/promises' import { fileURLToPath } from 'node:url' import { dirname, join } from 'node:path' // Get the directory path for fixtures const __filename = fileURLToPath(import.meta.url) const __dirname = dirname(__filename) const fixturesPath = join(__dirname, '..', '..', 'fixtures') test.group('ScraperService - parsePage', () => { test('should parse valid auction HTML correctly', async ({ assert }) => { const scraper = new ScraperService() const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8') const auctions = scraper.parsePage(html) assert.lengthOf(auctions, 3, 'Should parse all 3 auctions') // Verify first auction const firstAuction = auctions[0] assert.equal(firstAuction.auctionNum, 'AUC-2024-001') assert.equal(firstAuction.title, 'Поставка офисной мебели') assert.equal(firstAuction.organization, 'ООО "Тестовая Компания"') assert.equal(firstAuction.status, 'Активный') assert.equal(firstAuction.deadline, '15.03.2025') assert.equal(firstAuction.link, 'https://icetrade.by/trades/view/12345') assert.equal(firstAuction.description, 'Комплект офисной мебели для нового офиса') }) test('should handle relative URLs and convert to absolute', async ({ assert }) => { const scraper = new ScraperService() const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8') const auctions = scraper.parsePage(html) // First two auctions have relative URLs assert.isTrue(auctions[0].link.startsWith('https://icetrade.by/')) assert.isTrue(auctions[1].link.startsWith('https://icetrade.by/')) // Third auction already has absolute URL assert.equal(auctions[2].link, 'https://icetrade.by/trades/view/12347') }) test('should handle null deadline gracefully', async ({ assert }) => { const scraper = new ScraperService() const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8') const auctions = scraper.parsePage(html) // Third auction has empty deadline assert.isNull(auctions[2].deadline) }) test('should handle null description when not present', async ({ assert }) => { const scraper = new ScraperService() const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8') const auctions = scraper.parsePage(html) // Second auction has no description assert.isNull(auctions[1].description) }) test('should return empty array when no auctions table found', async ({ assert }) => { const scraper = new ScraperService() const html = await readFile(join(fixturesPath, 'empty_page.html'), 'utf-8') const auctions = scraper.parsePage(html) assert.lengthOf(auctions, 0, 'Should return empty array') }) test('should skip invalid auction rows and continue parsing', async ({ assert }) => { const scraper = new ScraperService() const html = await readFile(join(fixturesPath, 'malformed_auction.html'), 'utf-8') const auctions = scraper.parsePage(html) // Both rows in malformed_auction.html should fail validation // First row: empty auction number // Second row: no href (empty string fails URL validation) assert.lengthOf(auctions, 0, 'Should skip all malformed rows') }) test('should throw ScraperError for completely invalid HTML', ({ assert }) => { const scraper = new ScraperService() const invalidHtml = 'This is not valid HTML at all {{{' // Should not throw - cheerio is very lenient // But should return empty array since no table found const auctions = scraper.parsePage(invalidHtml) assert.lengthOf(auctions, 0) }) }) test.group('ScraperService - fetchPage', () => { test('should build correct URL with all parameters', ({ assert }) => { const scraper = new ScraperService() // Access private method via type assertion for testing const buildUrl = (scraper as any).buildUrl.bind(scraper) const url = buildUrl(1) assert.include(url, 'https://icetrade.by/trades/index') assert.include(url, 'p=1') assert.include(url, 'onPage=100') assert.include(url, 'sort=num%3Adesc') assert.include(url, 'zakup_type%5B1%5D=1') assert.include(url, 'zakup_type%5B2%5D=1') assert.include(url, 'r%5B1%5D=1') assert.include(url, 't%5BTrade%5D=1') }) test('should include correct page number in URL', ({ assert }) => { const scraper = new ScraperService() const buildUrl = (scraper as any).buildUrl.bind(scraper) const url1 = buildUrl(1) const url5 = buildUrl(5) assert.include(url1, 'p=1') assert.include(url5, 'p=5') }) }) test.group('ScraperService - scrapeAuctions', () => { test('should throw error if maxPages is less than 1', async ({ assert }) => { const scraper = new ScraperService() await assert.rejects( async () => await scraper.scrapeAuctions(0), 'maxPages must be at least 1' ) await assert.rejects( async () => await scraper.scrapeAuctions(-5), 'maxPages must be at least 1' ) }) // Note: Full integration tests for scrapeAuctions would require: // 1. Mocking fetch API // 2. Providing mock responses // These should be in integration tests, not unit tests }) test.group('ScraperService - validation', () => { test('should validate auction number is not empty', ({ assert }) => { const scraper = new ScraperService() const html = `
Title Org Status
` const auctions = scraper.parsePage(html) assert.lengthOf(auctions, 0, 'Should skip auction with empty number') }) test('should validate title is not empty', ({ assert }) => { const scraper = new ScraperService() const html = `
AUC-001 Org Status
` const auctions = scraper.parsePage(html) assert.lengthOf(auctions, 0, 'Should skip auction with empty title') }) test('should validate organization is not empty', ({ assert }) => { const scraper = new ScraperService() const html = `
AUC-001 Title Status
` const auctions = scraper.parsePage(html) assert.lengthOf(auctions, 0, 'Should skip auction with empty organization') }) test('should validate link is a valid URL', ({ assert }) => { const scraper = new ScraperService() const html = `
AUC-001 Title Org Status
` const auctions = scraper.parsePage(html) assert.lengthOf(auctions, 0, 'Should skip auction with invalid URL') }) test('should trim whitespace from all fields', ({ assert }) => { const scraper = new ScraperService() const html = `
AUC-001 Test Title Test Org Active 2025-03-15
` const auctions = scraper.parsePage(html) assert.lengthOf(auctions, 1) const auction = auctions[0] assert.equal(auction.auctionNum, 'AUC-001') assert.equal(auction.title, 'Test Title') assert.equal(auction.organization, 'Test Org') assert.equal(auction.status, 'Active') assert.equal(auction.deadline, '2025-03-15') }) }) test.group('ScraperService - ScraperError', () => { test('should create ScraperError with message', ({ assert }) => { const error = new ScraperError('Test error message') assert.equal(error.message, 'Test error message') assert.equal(error.name, 'ScraperError') assert.instanceOf(error, Error) }) test('should create ScraperError with cause', ({ assert }) => { const originalError = new Error('Original error') const error = new ScraperError('Wrapped error', originalError) assert.equal(error.message, 'Wrapped error') assert.equal(error.cause, originalError) }) })