From 7308ed4239919e9a31c031c7c7325c1399d61307 Mon Sep 17 00:00:00 2001 From: jinhojang6 Date: Fri, 31 May 2024 02:57:30 +0900 Subject: [PATCH] feat: implement parser --- events.json | 14 ++++++++++++++ scrape.js | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 events.json create mode 100644 scrape.js diff --git a/events.json b/events.json new file mode 100644 index 0000000..85cb3a7 --- /dev/null +++ b/events.json @@ -0,0 +1,14 @@ +[ + { + "title": "Logos Assembly: Belgrade", + "location": "Beograd", + "link": "/LA2", + "thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/e9/6d3a2f12-1c8c-42e9-8196-c1d240948030" + }, + { + "title": "Logos Assembly: Brno", + "location": "Studentský klub U Kachničky", + "link": "/la3", + "thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/16/6718e8d5-8ce9-4247-8dbb-4f6e8d60392d" + } +] \ No newline at end of file diff --git a/scrape.js b/scrape.js new file mode 100644 index 0000000..ed50a0f --- /dev/null +++ b/scrape.js @@ -0,0 +1,51 @@ +const fetch = require('node-fetch') +const fs = require('fs') +const { JSDOM } = require('jsdom') + +// URL to scrape +const url = 'https://lu.ma/logosevents' + +// Function to fetch and parse HTML +async function scrapeData() { + try { + const response = await fetch(url) + const html = await response.text() + + const dom = new JSDOM(html) + const document = dom.window.document + + const events = [] + + // Select elements with the .event-link class + const eventLinks = document.querySelectorAll('a.event-link.content-link') + + eventLinks.forEach(eventLink => { + const title = eventLink.getAttribute('aria-label') + const link = eventLink.href + + const eventContent = eventLink.nextElementSibling + const location = eventContent + .querySelector('.attribute:nth-of-type(2) > .text-ellipses') + ?.textContent.trim() + const thumbnail = eventContent.querySelector('img')?.src + + // Push the extracted data to the events array + events.push({ + title, + location, + link, + thumbnail, + }) + }) + + // Write data to a JSON file + fs.writeFileSync('events.json', JSON.stringify(events, null, 2)) + + console.log('Data scraped and saved to events.json') + } catch (error) { + console.error('Error scraping data:', error) + } +} + +// Execute the function +scrapeData()