Files
lostark-news-webscraper/main.py
雲華 12bd0027f9 Initial commit
Added the first implementation of scraping the website. The "thumbnail"
option is disabled in the embeds sent because they are not properly
handled on Guilded yet (cropping is wonky). This script should also work
for any Discord webhook URLs, but it hasn't been tested. There currently
isn't a way to pre-populate the database with hashes unless you manually
do it before hand. This is something I'm looking at adding in the future
probably.
2021-08-17 17:33:52 -04:00

63 lines
1.9 KiB
Python

from datetime import datetime as dt
from dotenv import load_dotenv
from news import NewsScraper
from os.path import join, dirname
import schedule
import os
import asyncio
import json
import time
dotenv_path = join(dirname(__file__), '.env')
load_dotenv(dotenv_path)
WEBHOOK_URL = os.environ.get('WEBHOOK_URL')
loop = asyncio.get_event_loop()
async def publish_news():
print('Running web scrape...')
la_news = NewsScraper(loop=loop)
articles = await la_news.news_articles()
if bool(articles):
for article in articles:
payload = {
"content": None,
"embeds": [
{
"title": article['title'].replace("'", "\\'"),
"description": article['preview_text'].replace("'", "\\'"),
"url": article['article_link'].replace("'", "\\'"),
"color": 5814783,
"footer": {
"text": article['tag']
},
"timestamp": f"{dt.utcnow()}",
"image": {
"url": article['image_preview']
},
#"thumbnail": {
# "url": "https://images.ctfassets.net/umhrp0op95v1/S3yKwaVAOi8Bgqg4n4scf"
# "/adae769671b271b88f97d31721432986/LA_LOGO.png "
#}
}
]
}
resp = await la_news.client.post(url=WEBHOOK_URL, data=json.dumps(payload).encode('UTF-8'), headers={'Content-Type': 'application/json'})
print(resp.status)
await la_news.close()
def run_async(coroutine):
task = coroutine()
loop.run_until_complete(task)
schedule.every().hour.do(run_async, publish_news)
while True:
print('Checking schedule...')
schedule.run_pending()
time.sleep(300)