import { test } from '@japa/runner'
import { ScraperService, ScraperError } from '#services/scraper_service'
import { readFile } from 'node:fs/promises'
import { fileURLToPath } from 'node:url'
import { dirname, join } from 'node:path'
// Get the directory path for fixtures
const __filename = fileURLToPath(import.meta.url)
const __dirname = dirname(__filename)
const fixturesPath = join(__dirname, '..', '..', 'fixtures')
test.group('ScraperService - parsePage', () => {
test('should parse valid auction HTML correctly', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 3, 'Should parse all 3 auctions')
// Verify first auction
const firstAuction = auctions[0]
assert.equal(firstAuction.auctionNum, 'AUC-2024-001')
assert.equal(firstAuction.title, 'Поставка офисной мебели')
assert.equal(firstAuction.organization, 'ООО "Тестовая Компания"')
assert.equal(firstAuction.status, 'Активный')
assert.equal(firstAuction.deadline, '15.03.2025')
assert.equal(firstAuction.link, 'https://icetrade.by/trades/view/12345')
assert.equal(firstAuction.description, 'Комплект офисной мебели для нового офиса')
})
test('should handle relative URLs and convert to absolute', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
const auctions = scraper.parsePage(html)
// First two auctions have relative URLs
assert.isTrue(auctions[0].link.startsWith('https://icetrade.by/'))
assert.isTrue(auctions[1].link.startsWith('https://icetrade.by/'))
// Third auction already has absolute URL
assert.equal(auctions[2].link, 'https://icetrade.by/trades/view/12347')
})
test('should handle null deadline gracefully', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
const auctions = scraper.parsePage(html)
// Third auction has empty deadline
assert.isNull(auctions[2].deadline)
})
test('should handle null description when not present', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
const auctions = scraper.parsePage(html)
// Second auction has no description
assert.isNull(auctions[1].description)
})
test('should return empty array when no auctions table found', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'empty_page.html'), 'utf-8')
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should return empty array')
})
test('should skip invalid auction rows and continue parsing', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'malformed_auction.html'), 'utf-8')
const auctions = scraper.parsePage(html)
// Both rows in malformed_auction.html should fail validation
// First row: empty auction number
// Second row: no href (empty string fails URL validation)
assert.lengthOf(auctions, 0, 'Should skip all malformed rows')
})
test('should throw ScraperError for completely invalid HTML', ({ assert }) => {
const scraper = new ScraperService()
const invalidHtml = 'This is not valid HTML at all {{{'
// Should not throw - cheerio is very lenient
// But should return empty array since no table found
const auctions = scraper.parsePage(invalidHtml)
assert.lengthOf(auctions, 0)
})
})
test.group('ScraperService - fetchPage', () => {
test('should build correct URL with all parameters', ({ assert }) => {
const scraper = new ScraperService()
// Access private method via type assertion for testing
const buildUrl = (scraper as any).buildUrl.bind(scraper)
const url = buildUrl(1)
assert.include(url, 'https://icetrade.by/trades/index')
assert.include(url, 'p=1')
assert.include(url, 'onPage=100')
assert.include(url, 'sort=num%3Adesc')
assert.include(url, 'zakup_type%5B1%5D=1')
assert.include(url, 'zakup_type%5B2%5D=1')
assert.include(url, 'r%5B1%5D=1')
assert.include(url, 't%5BTrade%5D=1')
})
test('should include correct page number in URL', ({ assert }) => {
const scraper = new ScraperService()
const buildUrl = (scraper as any).buildUrl.bind(scraper)
const url1 = buildUrl(1)
const url5 = buildUrl(5)
assert.include(url1, 'p=1')
assert.include(url5, 'p=5')
})
})
test.group('ScraperService - scrapeAuctions', () => {
test('should throw error if maxPages is less than 1', async ({ assert }) => {
const scraper = new ScraperService()
await assert.rejects(
async () => await scraper.scrapeAuctions(0),
'maxPages must be at least 1'
)
await assert.rejects(
async () => await scraper.scrapeAuctions(-5),
'maxPages must be at least 1'
)
})
// Note: Full integration tests for scrapeAuctions would require:
// 1. Mocking fetch API
// 2. Providing mock responses
// These should be in integration tests, not unit tests
})
test.group('ScraperService - validation', () => {
test('should validate auction number is not empty', ({ assert }) => {
const scraper = new ScraperService()
const html = `
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should skip auction with empty number')
})
test('should validate title is not empty', ({ assert }) => {
const scraper = new ScraperService()
const html = `
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should skip auction with empty title')
})
test('should validate organization is not empty', ({ assert }) => {
const scraper = new ScraperService()
const html = `
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should skip auction with empty organization')
})
test('should validate link is a valid URL', ({ assert }) => {
const scraper = new ScraperService()
const html = `
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should skip auction with invalid URL')
})
test('should trim whitespace from all fields', ({ assert }) => {
const scraper = new ScraperService()
const html = `
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 1)
const auction = auctions[0]
assert.equal(auction.auctionNum, 'AUC-001')
assert.equal(auction.title, 'Test Title')
assert.equal(auction.organization, 'Test Org')
assert.equal(auction.status, 'Active')
assert.equal(auction.deadline, '2025-03-15')
})
})
test.group('ScraperService - ScraperError', () => {
test('should create ScraperError with message', ({ assert }) => {
const error = new ScraperError('Test error message')
assert.equal(error.message, 'Test error message')
assert.equal(error.name, 'ScraperError')
assert.instanceOf(error, Error)
})
test('should create ScraperError with cause', ({ assert }) => {
const originalError = new Error('Original error')
const error = new ScraperError('Wrapped error', originalError)
assert.equal(error.message, 'Wrapped error')
assert.equal(error.cause, originalError)
})
})