Added the first implementation of scraping the website. The "thumbnail" option is disabled in the embeds sent because they are not properly handled on Guilded yet (cropping is wonky). This script should also work for any Discord webhook URLs, but it hasn't been tested. There currently isn't a way to pre-populate the database with hashes unless you manually do it before hand. This is something I'm looking at adding in the future probably.
63 lines
1.9 KiB
Python
63 lines
1.9 KiB
Python
from datetime import datetime as dt
|
|
from dotenv import load_dotenv
|
|
from news import NewsScraper
|
|
from os.path import join, dirname
|
|
import schedule
|
|
import os
|
|
import asyncio
|
|
import json
|
|
import time
|
|
|
|
dotenv_path = join(dirname(__file__), '.env')
|
|
load_dotenv(dotenv_path)
|
|
|
|
WEBHOOK_URL = os.environ.get('WEBHOOK_URL')
|
|
loop = asyncio.get_event_loop()
|
|
|
|
|
|
async def publish_news():
|
|
print('Running web scrape...')
|
|
la_news = NewsScraper(loop=loop)
|
|
articles = await la_news.news_articles()
|
|
|
|
if bool(articles):
|
|
for article in articles:
|
|
payload = {
|
|
"content": None,
|
|
"embeds": [
|
|
{
|
|
"title": article['title'].replace("'", "\\'"),
|
|
"description": article['preview_text'].replace("'", "\\'"),
|
|
"url": article['article_link'].replace("'", "\\'"),
|
|
"color": 5814783,
|
|
"footer": {
|
|
"text": article['tag']
|
|
},
|
|
"timestamp": f"{dt.utcnow()}",
|
|
"image": {
|
|
"url": article['image_preview']
|
|
},
|
|
#"thumbnail": {
|
|
# "url": "https://images.ctfassets.net/umhrp0op95v1/S3yKwaVAOi8Bgqg4n4scf"
|
|
# "/adae769671b271b88f97d31721432986/LA_LOGO.png "
|
|
#}
|
|
}
|
|
]
|
|
}
|
|
resp = await la_news.client.post(url=WEBHOOK_URL, data=json.dumps(payload).encode('UTF-8'), headers={'Content-Type': 'application/json'})
|
|
print(resp.status)
|
|
await la_news.close()
|
|
|
|
|
|
def run_async(coroutine):
|
|
task = coroutine()
|
|
loop.run_until_complete(task)
|
|
|
|
|
|
schedule.every().hour.do(run_async, publish_news)
|
|
|
|
while True:
|
|
print('Checking schedule...')
|
|
schedule.run_pending()
|
|
time.sleep(300)
|