feat: implement parser

This commit is contained in:
jinhojang6 2024-05-31 02:57:30 +09:00 committed by Jinho Jang
parent 58ed27ec17
commit 7308ed4239
2 changed files with 65 additions and 0 deletions

14
events.json Normal file
View File

@ -0,0 +1,14 @@
[
{
"title": "Logos Assembly: Belgrade",
"location": "Beograd",
"link": "/LA2",
"thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/e9/6d3a2f12-1c8c-42e9-8196-c1d240948030"
},
{
"title": "Logos Assembly: Brno",
"location": "Studentský klub U Kachničky",
"link": "/la3",
"thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/16/6718e8d5-8ce9-4247-8dbb-4f6e8d60392d"
}
]

51
scrape.js Normal file
View File

@ -0,0 +1,51 @@
const fetch = require('node-fetch')
const fs = require('fs')
const { JSDOM } = require('jsdom')
// URL to scrape
const url = 'https://lu.ma/logosevents'
// Function to fetch and parse HTML
async function scrapeData() {
try {
const response = await fetch(url)
const html = await response.text()
const dom = new JSDOM(html)
const document = dom.window.document
const events = []
// Select elements with the .event-link class
const eventLinks = document.querySelectorAll('a.event-link.content-link')
eventLinks.forEach(eventLink => {
const title = eventLink.getAttribute('aria-label')
const link = eventLink.href
const eventContent = eventLink.nextElementSibling
const location = eventContent
.querySelector('.attribute:nth-of-type(2) > .text-ellipses')
?.textContent.trim()
const thumbnail = eventContent.querySelector('img')?.src
// Push the extracted data to the events array
events.push({
title,
location,
link,
thumbnail,
})
})
// Write data to a JSON file
fs.writeFileSync('events.json', JSON.stringify(events, null, 2))
console.log('Data scraped and saved to events.json')
} catch (error) {
console.error('Error scraping data:', error)
}
}
// Execute the function
scrapeData()