diff --git a/events.json b/events.json index 0a20f17..07b588b 100644 --- a/events.json +++ b/events.json @@ -1,16 +1,16 @@ [ { "title": "Logos Assembly: Belgrade", - "date": "Jun 3, 6:30 PM GMT+2", "location": "Beograd", - "href": "https://lu.mahttps://lu.ma/LA2", - "thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/e9/6d3a2f12-1c8c-42e9-8196-c1d240948030" + "href": "https://lu.ma/LA2", + "thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/e9/6d3a2f12-1c8c-42e9-8196-c1d240948030", + "date": "Jun 3\n4:30 PM" }, { "title": "Logos Assembly: Brno", - "date": "Jun 13, 6:00 PM GMT+2", "location": "Studentský klub U Kachničky", - "href": "https://lu.mahttps://lu.ma/la3", - "thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/16/6718e8d5-8ce9-4247-8dbb-4f6e8d60392d" + "href": "https://lu.ma/la3", + "thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/16/6718e8d5-8ce9-4247-8dbb-4f6e8d60392d", + "date": "Jun 13\n4:00 PM" } ] \ No newline at end of file diff --git a/scrape.js b/scrape.js index 3ce666a..3e93a65 100644 --- a/scrape.js +++ b/scrape.js @@ -1,61 +1,116 @@ +const fetch = require('node-fetch') const fs = require('fs') -const puppeteer = require('puppeteer') +const { JSDOM } = require('jsdom') // URL to scrape const url = 'https://lu.ma/logosevents' +const urlCompact = 'https://lu.ma/logosevents?compact=true' + // Function to fetch and parse HTML async function scrapeData() { try { - // Launch Puppeteer - const browser = await puppeteer.launch() - const page = await browser.newPage() - await page.goto(url, { waitUntil: 'networkidle2' }) + const response = await fetch(url) + const html = await response.text() - // Wait for the required elements to load - await page.waitForSelector('a.event-link.content-link') + const dom = new JSDOM(html) + const document = dom.window.document - // Scrape the data - const events = await page.evaluate(() => { - const eventLinks = document.querySelectorAll('a.event-link.content-link') - const events = [] + const events = [] - eventLinks.forEach(eventLink => { - const title = eventLink.getAttribute('aria-label') - const href = eventLink.href - const eventContent = eventLink.nextElementSibling - const location = eventContent - .querySelector('.attribute:nth-of-type(2) > .text-ellipses') - ?.textContent.trim() - const thumbnail = eventContent.querySelector('img')?.src + // Select elements with the .event-link class + const eventLinks = document.querySelectorAll('a.event-link.content-link') - const date = eventContent - .querySelector('.event-time .text-warning') - ?.textContent.trim() + eventLinks.forEach(eventLink => { + const title = eventLink.getAttribute('aria-label') - events.push({ - title, - date, - location, - href: `https://lu.ma${href}`, - thumbnail, - }) + const href = eventLink.href + + const eventContent = eventLink.nextElementSibling + + const location = eventContent + .querySelector('.attribute:nth-of-type(2) > .text-ellipses') + ?.textContent.trim() + + const thumbnail = eventContent.querySelector('img')?.src + + const date = eventLink + .querySelector('.jsx-2921306942 > .date') + ?.textContent.trim() + + // Push the extracted data to the events array + events.push({ + title, + date: date, + location, + href: `https://lu.ma${href}`, + thumbnail, }) - - return events }) // Write data to a JSON file fs.writeFileSync('events.json', JSON.stringify(events, null, 2)) console.log('Data scraped and saved to events.json') - - // Close Puppeteer - await browser.close() } catch (error) { console.error('Error scraping data:', error) } } -// Execute the function -scrapeData() +async function scrapeEventDate() { + try { + const response = await fetch(urlCompact) + const html = await response.text() + + const dom = new JSDOM(html) + const document = dom.window.document + + const events = [] + + console.log('html', html) + const eventData = document.querySelectorAll('.section') + + eventData?.forEach(event => { + const date = event + .querySelector('.date-inner > .date') + ?.textContent.trim() + + const time = event + .querySelector('.time.text-tertiary-alpha') + ?.textContent.trim() + + console.log('time', time) + + const href = event.querySelector('a')?.href + + if (date && href) { + events.push({ + date: `${date}\n${time}`, + href: `https://lu.ma${href}`, + }) + } + + // Update events.json with the date + const eventsData = JSON.parse(fs.readFileSync('events.json')) + + const result = eventsData.map(eventData => { + const eventDate = events.find(event => event.href === eventData.href) + return { + ...eventData, + date: eventDate?.date, + } + }) + + fs.writeFileSync('events.json', JSON.stringify(result, null, 2)) + }) + } catch (error) { + console.error('Error scraping data:', error) + } +} + +async function main() { + await scrapeData() + await scrapeEventDate() +} + +main()