logos.co/scrape.js

62 lines
1.6 KiB
JavaScript
Raw Normal View History

2024-05-30 17:57:30 +00:00
const fs = require('fs')
2024-05-31 13:59:55 +00:00
const puppeteer = require('puppeteer')
2024-05-30 17:57:30 +00:00
// URL to scrape
const url = 'https://lu.ma/logosevents'
// Function to fetch and parse HTML
async function scrapeData() {
try {
2024-05-31 13:59:55 +00:00
// Launch Puppeteer
const browser = await puppeteer.launch()
const page = await browser.newPage()
await page.goto(url, { waitUntil: 'networkidle2' })
// Wait for the required elements to load
await page.waitForSelector('a.event-link.content-link')
// Scrape the data
const events = await page.evaluate(() => {
const eventLinks = document.querySelectorAll('a.event-link.content-link')
const events = []
eventLinks.forEach(eventLink => {
const title = eventLink.getAttribute('aria-label')
const href = eventLink.href
const eventContent = eventLink.nextElementSibling
const location = eventContent
.querySelector('.attribute:nth-of-type(2) > .text-ellipses')
?.textContent.trim()
const thumbnail = eventContent.querySelector('img')?.src
const date = eventContent
.querySelector('.event-time .text-warning')
?.textContent.trim()
events.push({
title,
date,
location,
href: `https://lu.ma${href}`,
thumbnail,
})
2024-05-30 17:57:30 +00:00
})
2024-05-31 13:59:55 +00:00
return events
2024-05-30 17:57:30 +00:00
})
// Write data to a JSON file
fs.writeFileSync('events.json', JSON.stringify(events, null, 2))
console.log('Data scraped and saved to events.json')
2024-05-31 13:59:55 +00:00
// Close Puppeteer
await browser.close()
2024-05-30 17:57:30 +00:00
} catch (error) {
console.error('Error scraping data:', error)
}
}
// Execute the function
scrapeData()