auction-scrapper/tests/unit/services/scraper_service.spec.ts
Vakula Uladimir 12f005e335 init
2025-10-17 11:27:52 +03:00

275 lines
8.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { test } from '@japa/runner'
import { ScraperService, ScraperError } from '#services/scraper_service'
import { readFile } from 'node:fs/promises'
import { fileURLToPath } from 'node:url'
import { dirname, join } from 'node:path'
// Get the directory path for fixtures
const __filename = fileURLToPath(import.meta.url)
const __dirname = dirname(__filename)
const fixturesPath = join(__dirname, '..', '..', 'fixtures')
test.group('ScraperService - parsePage', () => {
test('should parse valid auction HTML correctly', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 3, 'Should parse all 3 auctions')
// Verify first auction
const firstAuction = auctions[0]
assert.equal(firstAuction.auctionNum, 'AUC-2024-001')
assert.equal(firstAuction.title, 'Поставка офисной мебели')
assert.equal(firstAuction.organization, 'ООО "Тестовая Компания"')
assert.equal(firstAuction.status, 'Активный')
assert.equal(firstAuction.deadline, '15.03.2025')
assert.equal(firstAuction.link, 'https://icetrade.by/trades/view/12345')
assert.equal(firstAuction.description, 'Комплект офисной мебели для нового офиса')
})
test('should handle relative URLs and convert to absolute', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
const auctions = scraper.parsePage(html)
// First two auctions have relative URLs
assert.isTrue(auctions[0].link.startsWith('https://icetrade.by/'))
assert.isTrue(auctions[1].link.startsWith('https://icetrade.by/'))
// Third auction already has absolute URL
assert.equal(auctions[2].link, 'https://icetrade.by/trades/view/12347')
})
test('should handle null deadline gracefully', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
const auctions = scraper.parsePage(html)
// Third auction has empty deadline
assert.isNull(auctions[2].deadline)
})
test('should handle null description when not present', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
const auctions = scraper.parsePage(html)
// Second auction has no description
assert.isNull(auctions[1].description)
})
test('should return empty array when no auctions table found', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'empty_page.html'), 'utf-8')
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should return empty array')
})
test('should skip invalid auction rows and continue parsing', async ({ assert }) => {
const scraper = new ScraperService()
const html = await readFile(join(fixturesPath, 'malformed_auction.html'), 'utf-8')
const auctions = scraper.parsePage(html)
// Both rows in malformed_auction.html should fail validation
// First row: empty auction number
// Second row: no href (empty string fails URL validation)
assert.lengthOf(auctions, 0, 'Should skip all malformed rows')
})
test('should throw ScraperError for completely invalid HTML', ({ assert }) => {
const scraper = new ScraperService()
const invalidHtml = 'This is not valid HTML at all {{{'
// Should not throw - cheerio is very lenient
// But should return empty array since no table found
const auctions = scraper.parsePage(invalidHtml)
assert.lengthOf(auctions, 0)
})
})
test.group('ScraperService - fetchPage', () => {
test('should build correct URL with all parameters', ({ assert }) => {
const scraper = new ScraperService()
// Access private method via type assertion for testing
const buildUrl = (scraper as any).buildUrl.bind(scraper)
const url = buildUrl(1)
assert.include(url, 'https://icetrade.by/trades/index')
assert.include(url, 'p=1')
assert.include(url, 'onPage=100')
assert.include(url, 'sort=num%3Adesc')
assert.include(url, 'zakup_type%5B1%5D=1')
assert.include(url, 'zakup_type%5B2%5D=1')
assert.include(url, 'r%5B1%5D=1')
assert.include(url, 't%5BTrade%5D=1')
})
test('should include correct page number in URL', ({ assert }) => {
const scraper = new ScraperService()
const buildUrl = (scraper as any).buildUrl.bind(scraper)
const url1 = buildUrl(1)
const url5 = buildUrl(5)
assert.include(url1, 'p=1')
assert.include(url5, 'p=5')
})
})
test.group('ScraperService - scrapeAuctions', () => {
test('should throw error if maxPages is less than 1', async ({ assert }) => {
const scraper = new ScraperService()
await assert.rejects(
async () => await scraper.scrapeAuctions(0),
'maxPages must be at least 1'
)
await assert.rejects(
async () => await scraper.scrapeAuctions(-5),
'maxPages must be at least 1'
)
})
// Note: Full integration tests for scrapeAuctions would require:
// 1. Mocking fetch API
// 2. Providing mock responses
// These should be in integration tests, not unit tests
})
test.group('ScraperService - validation', () => {
test('should validate auction number is not empty', ({ assert }) => {
const scraper = new ScraperService()
const html = `
<table class="auctions w100">
<tbody>
<tr>
<td> </td>
<td><a href="https://test.com">Title</a></td>
<td>Org</td>
<td>Status</td>
<td></td>
</tr>
</tbody>
</table>
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should skip auction with empty number')
})
test('should validate title is not empty', ({ assert }) => {
const scraper = new ScraperService()
const html = `
<table class="auctions w100">
<tbody>
<tr>
<td>AUC-001</td>
<td><a href="https://test.com"> </a></td>
<td>Org</td>
<td>Status</td>
<td></td>
</tr>
</tbody>
</table>
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should skip auction with empty title')
})
test('should validate organization is not empty', ({ assert }) => {
const scraper = new ScraperService()
const html = `
<table class="auctions w100">
<tbody>
<tr>
<td>AUC-001</td>
<td><a href="https://test.com">Title</a></td>
<td> </td>
<td>Status</td>
<td></td>
</tr>
</tbody>
</table>
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should skip auction with empty organization')
})
test('should validate link is a valid URL', ({ assert }) => {
const scraper = new ScraperService()
const html = `
<table class="auctions w100">
<tbody>
<tr>
<td>AUC-001</td>
<td><a href="">Title</a></td>
<td>Org</td>
<td>Status</td>
<td></td>
</tr>
</tbody>
</table>
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 0, 'Should skip auction with invalid URL')
})
test('should trim whitespace from all fields', ({ assert }) => {
const scraper = new ScraperService()
const html = `
<table class="auctions w100">
<tbody>
<tr>
<td> AUC-001 </td>
<td><a href="https://test.com"> Test Title </a></td>
<td> Test Org </td>
<td> Active </td>
<td> 2025-03-15 </td>
</tr>
</tbody>
</table>
`
const auctions = scraper.parsePage(html)
assert.lengthOf(auctions, 1)
const auction = auctions[0]
assert.equal(auction.auctionNum, 'AUC-001')
assert.equal(auction.title, 'Test Title')
assert.equal(auction.organization, 'Test Org')
assert.equal(auction.status, 'Active')
assert.equal(auction.deadline, '2025-03-15')
})
})
test.group('ScraperService - ScraperError', () => {
test('should create ScraperError with message', ({ assert }) => {
const error = new ScraperError('Test error message')
assert.equal(error.message, 'Test error message')
assert.equal(error.name, 'ScraperError')
assert.instanceOf(error, Error)
})
test('should create ScraperError with cause', ({ assert }) => {
const originalError = new Error('Original error')
const error = new ScraperError('Wrapped error', originalError)
assert.equal(error.message, 'Wrapped error')
assert.equal(error.cause, originalError)
})
})