feat: save thumbnails locally

This commit is contained in:
jinhojang6 2024-06-01 00:04:14 +09:00
parent 65cca55a86
commit 76718542f2
No known key found for this signature in database
GPG Key ID: 1762F21FE8B543F8
4 changed files with 32 additions and 12 deletions

View File

@ -3,14 +3,14 @@
"title": "Logos Assembly: Belgrade",
"location": "Beograd",
"href": "https://lu.ma/LA2",
"thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/e9/6d3a2f12-1c8c-42e9-8196-c1d240948030",
"thumbnail": "/events/la2/thumbnail.png",
"date": "Jun 3\n4:30 PM"
},
{
"title": "Logos Assembly: Brno",
"location": "Studentský klub U Kachničky",
"href": "https://lu.ma/la3",
"thumbnail": "https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=180,height=180/event-covers/16/6718e8d5-8ce9-4247-8dbb-4f6e8d60392d",
"thumbnail": "/events/la3/thumbnail.png",
"date": "Jun 13\n4:00 PM"
}
]

View File

@ -1,10 +1,10 @@
const fetch = require('node-fetch')
const fs = require('fs')
const path = require('path')
const { JSDOM } = require('jsdom')
// URL to scrape
const url = 'https://lu.ma/logosevents'
const urlCompact = 'https://lu.ma/logosevents?compact=true'
// Function to fetch and parse HTML
@ -21,11 +21,9 @@ async function scrapeData() {
// Select elements with the .event-link class
const eventLinks = document.querySelectorAll('a.event-link.content-link')
eventLinks.forEach(eventLink => {
for (const eventLink of eventLinks) {
const title = eventLink.getAttribute('aria-label')
const href = eventLink.href
const eventContent = eventLink.nextElementSibling
const location = eventContent
@ -38,15 +36,21 @@ async function scrapeData() {
.querySelector('.jsx-2921306942 > .date')
?.textContent.trim()
// Download and save the image
let imagePath = ''
if (thumbnail) {
imagePath = await downloadImage(thumbnail, href)
}
// Push the extracted data to the events array
events.push({
title,
date: date,
date,
location,
href: `https://lu.ma${href}`,
thumbnail,
thumbnail: imagePath.replace('static', ''), // Remove 'static' from the path
})
})
}
// Write data to a JSON file
fs.writeFileSync('events.json', JSON.stringify(events, null, 2))
@ -67,7 +71,6 @@ async function scrapeEventDate() {
const events = []
console.log('html', html)
const eventData = document.querySelectorAll('.section')
eventData?.forEach(event => {
@ -79,8 +82,6 @@ async function scrapeEventDate() {
.querySelector('.time.text-tertiary-alpha')
?.textContent.trim()
console.log('time', time)
const href = event.querySelector('a')?.href
if (date && href) {
@ -108,6 +109,25 @@ async function scrapeEventDate() {
}
}
// Function to download image
async function downloadImage(url, href) {
const response = await fetch(url)
const buffer = await response.buffer()
const cleanHref = href
.split('/')
.pop()
.toLowerCase() // Use the last part of the href, in lowercase
const folder = path.join(__dirname, 'static', 'events', cleanHref)
// Create directory if it doesn't exist
fs.mkdirSync(folder, { recursive: true })
const imagePath = path.join(folder, 'thumbnail.png')
fs.writeFileSync(imagePath, buffer)
console.log(`Image downloaded: ${imagePath}`)
return path.join('/events', cleanHref, 'thumbnail.png') // Ensure the path does not include 'static'
}
async function main() {
await scrapeData()
await scrapeEventDate()

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB