275 lines
8.8 KiB
TypeScript
275 lines
8.8 KiB
TypeScript
import { test } from '@japa/runner'
|
||
import { ScraperService, ScraperError } from '#services/scraper_service'
|
||
import { readFile } from 'node:fs/promises'
|
||
import { fileURLToPath } from 'node:url'
|
||
import { dirname, join } from 'node:path'
|
||
|
||
// Get the directory path for fixtures
|
||
const __filename = fileURLToPath(import.meta.url)
|
||
const __dirname = dirname(__filename)
|
||
const fixturesPath = join(__dirname, '..', '..', 'fixtures')
|
||
|
||
test.group('ScraperService - parsePage', () => {
|
||
test('should parse valid auction HTML correctly', async ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
|
||
assert.lengthOf(auctions, 3, 'Should parse all 3 auctions')
|
||
|
||
// Verify first auction
|
||
const firstAuction = auctions[0]
|
||
assert.equal(firstAuction.auctionNum, 'AUC-2024-001')
|
||
assert.equal(firstAuction.title, 'Поставка офисной мебели')
|
||
assert.equal(firstAuction.organization, 'ООО "Тестовая Компания"')
|
||
assert.equal(firstAuction.status, 'Активный')
|
||
assert.equal(firstAuction.deadline, '15.03.2025')
|
||
assert.equal(firstAuction.link, 'https://icetrade.by/trades/view/12345')
|
||
assert.equal(firstAuction.description, 'Комплект офисной мебели для нового офиса')
|
||
})
|
||
|
||
test('should handle relative URLs and convert to absolute', async ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
|
||
// First two auctions have relative URLs
|
||
assert.isTrue(auctions[0].link.startsWith('https://icetrade.by/'))
|
||
assert.isTrue(auctions[1].link.startsWith('https://icetrade.by/'))
|
||
|
||
// Third auction already has absolute URL
|
||
assert.equal(auctions[2].link, 'https://icetrade.by/trades/view/12347')
|
||
})
|
||
|
||
test('should handle null deadline gracefully', async ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
|
||
// Third auction has empty deadline
|
||
assert.isNull(auctions[2].deadline)
|
||
})
|
||
|
||
test('should handle null description when not present', async ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = await readFile(join(fixturesPath, 'sample_auctions.html'), 'utf-8')
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
|
||
// Second auction has no description
|
||
assert.isNull(auctions[1].description)
|
||
})
|
||
|
||
test('should return empty array when no auctions table found', async ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = await readFile(join(fixturesPath, 'empty_page.html'), 'utf-8')
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
|
||
assert.lengthOf(auctions, 0, 'Should return empty array')
|
||
})
|
||
|
||
test('should skip invalid auction rows and continue parsing', async ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = await readFile(join(fixturesPath, 'malformed_auction.html'), 'utf-8')
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
|
||
// Both rows in malformed_auction.html should fail validation
|
||
// First row: empty auction number
|
||
// Second row: no href (empty string fails URL validation)
|
||
assert.lengthOf(auctions, 0, 'Should skip all malformed rows')
|
||
})
|
||
|
||
test('should throw ScraperError for completely invalid HTML', ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const invalidHtml = 'This is not valid HTML at all {{{'
|
||
|
||
// Should not throw - cheerio is very lenient
|
||
// But should return empty array since no table found
|
||
const auctions = scraper.parsePage(invalidHtml)
|
||
assert.lengthOf(auctions, 0)
|
||
})
|
||
})
|
||
|
||
test.group('ScraperService - fetchPage', () => {
|
||
test('should build correct URL with all parameters', ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
|
||
// Access private method via type assertion for testing
|
||
const buildUrl = (scraper as any).buildUrl.bind(scraper)
|
||
const url = buildUrl(1)
|
||
|
||
assert.include(url, 'https://icetrade.by/trades/index')
|
||
assert.include(url, 'p=1')
|
||
assert.include(url, 'onPage=100')
|
||
assert.include(url, 'sort=num%3Adesc')
|
||
assert.include(url, 'zakup_type%5B1%5D=1')
|
||
assert.include(url, 'zakup_type%5B2%5D=1')
|
||
assert.include(url, 'r%5B1%5D=1')
|
||
assert.include(url, 't%5BTrade%5D=1')
|
||
})
|
||
|
||
test('should include correct page number in URL', ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const buildUrl = (scraper as any).buildUrl.bind(scraper)
|
||
|
||
const url1 = buildUrl(1)
|
||
const url5 = buildUrl(5)
|
||
|
||
assert.include(url1, 'p=1')
|
||
assert.include(url5, 'p=5')
|
||
})
|
||
})
|
||
|
||
test.group('ScraperService - scrapeAuctions', () => {
|
||
test('should throw error if maxPages is less than 1', async ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
|
||
await assert.rejects(
|
||
async () => await scraper.scrapeAuctions(0),
|
||
'maxPages must be at least 1'
|
||
)
|
||
|
||
await assert.rejects(
|
||
async () => await scraper.scrapeAuctions(-5),
|
||
'maxPages must be at least 1'
|
||
)
|
||
})
|
||
|
||
// Note: Full integration tests for scrapeAuctions would require:
|
||
// 1. Mocking fetch API
|
||
// 2. Providing mock responses
|
||
// These should be in integration tests, not unit tests
|
||
})
|
||
|
||
test.group('ScraperService - validation', () => {
|
||
test('should validate auction number is not empty', ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = `
|
||
<table class="auctions w100">
|
||
<tbody>
|
||
<tr>
|
||
<td> </td>
|
||
<td><a href="https://test.com">Title</a></td>
|
||
<td>Org</td>
|
||
<td>Status</td>
|
||
<td></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
`
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
assert.lengthOf(auctions, 0, 'Should skip auction with empty number')
|
||
})
|
||
|
||
test('should validate title is not empty', ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = `
|
||
<table class="auctions w100">
|
||
<tbody>
|
||
<tr>
|
||
<td>AUC-001</td>
|
||
<td><a href="https://test.com"> </a></td>
|
||
<td>Org</td>
|
||
<td>Status</td>
|
||
<td></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
`
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
assert.lengthOf(auctions, 0, 'Should skip auction with empty title')
|
||
})
|
||
|
||
test('should validate organization is not empty', ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = `
|
||
<table class="auctions w100">
|
||
<tbody>
|
||
<tr>
|
||
<td>AUC-001</td>
|
||
<td><a href="https://test.com">Title</a></td>
|
||
<td> </td>
|
||
<td>Status</td>
|
||
<td></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
`
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
assert.lengthOf(auctions, 0, 'Should skip auction with empty organization')
|
||
})
|
||
|
||
test('should validate link is a valid URL', ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = `
|
||
<table class="auctions w100">
|
||
<tbody>
|
||
<tr>
|
||
<td>AUC-001</td>
|
||
<td><a href="">Title</a></td>
|
||
<td>Org</td>
|
||
<td>Status</td>
|
||
<td></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
`
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
assert.lengthOf(auctions, 0, 'Should skip auction with invalid URL')
|
||
})
|
||
|
||
test('should trim whitespace from all fields', ({ assert }) => {
|
||
const scraper = new ScraperService()
|
||
const html = `
|
||
<table class="auctions w100">
|
||
<tbody>
|
||
<tr>
|
||
<td> AUC-001 </td>
|
||
<td><a href="https://test.com"> Test Title </a></td>
|
||
<td> Test Org </td>
|
||
<td> Active </td>
|
||
<td> 2025-03-15 </td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
`
|
||
|
||
const auctions = scraper.parsePage(html)
|
||
assert.lengthOf(auctions, 1)
|
||
|
||
const auction = auctions[0]
|
||
assert.equal(auction.auctionNum, 'AUC-001')
|
||
assert.equal(auction.title, 'Test Title')
|
||
assert.equal(auction.organization, 'Test Org')
|
||
assert.equal(auction.status, 'Active')
|
||
assert.equal(auction.deadline, '2025-03-15')
|
||
})
|
||
})
|
||
|
||
test.group('ScraperService - ScraperError', () => {
|
||
test('should create ScraperError with message', ({ assert }) => {
|
||
const error = new ScraperError('Test error message')
|
||
|
||
assert.equal(error.message, 'Test error message')
|
||
assert.equal(error.name, 'ScraperError')
|
||
assert.instanceOf(error, Error)
|
||
})
|
||
|
||
test('should create ScraperError with cause', ({ assert }) => {
|
||
const originalError = new Error('Original error')
|
||
const error = new ScraperError('Wrapped error', originalError)
|
||
|
||
assert.equal(error.message, 'Wrapped error')
|
||
assert.equal(error.cause, originalError)
|
||
})
|
||
})
|