Updated script to preload hashes and updated message

The script now uses a new .env variable called PRELOAD which is used
to specify whether the database should be preloaded with hashes.
Currently this is configured to use a role from a specific server.
This will be changed at a later date.
This commit is contained in:
雲華
2022-01-19 16:15:02 -05:00
parent af948965b0
commit f97f72d18d
3 changed files with 45 additions and 14 deletions

View File

@@ -10,4 +10,5 @@ RUN python3 -m venv venv && \
RUN mkdir database && \ RUN mkdir database && \
touch database/news.db touch database/news.db
CMD ["venv/bin/python", "main.py"] CMD ["venv/bin/python", "main.py"]

44
main.py
View File

@@ -8,24 +8,41 @@ import asyncio
import json import json
import time import time
import logging import logging
import ast
logging.basicConfig(filename="lanews.log", level=logging.DEBUG) logging.basicConfig(filename="lanews.log", level=logging.DEBUG)
dotenv_path = join(dirname(__file__), '.env') dotenv_path = join(dirname(__file__), '.env')
load_dotenv(dotenv_path) load_dotenv(dotenv_path)
WEBHOOK_URL = os.environ.get('WEBHOOK_URL') WEBHOOK_URL = os.environ.get('WEBHOOK_URL')
PRELOAD = ast.literal_eval(os.environ.get('PRELOAD'))
if WEBHOOK_URL is None:
raise Exception()
if PRELOAD is None:
PRELOAD = True
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
async def publish_news(): async def preload_news():
scraper = NewsScraper(loop=loop)
await fetch_news(scraper)
await scraper.close()
async def fetch_news(scraper):
logging.debug('Running web scrape...') logging.debug('Running web scrape...')
return await scraper.news_articles()
async def publish_news():
la_news = NewsScraper(loop=loop) la_news = NewsScraper(loop=loop)
articles = await la_news.news_articles() articles = await fetch_news(la_news)
if bool(articles): if bool(articles):
for article in articles: for article in articles:
payload = { payload = {
"content": None, "content": '<@&922576289188151307>',
"embeds": [ "embeds": [
{ {
"title": article['title'].replace("'", "\\'"), "title": article['title'].replace("'", "\\'"),
@@ -39,15 +56,19 @@ async def publish_news():
"image": { "image": {
"url": article['image_preview'] "url": article['image_preview']
}, },
#"thumbnail": { "thumbnail": {
# "url": "https://images.ctfassets.net/umhrp0op95v1/S3yKwaVAOi8Bgqg4n4scf" "url": "https://images.ctfassets.net/umhrp0op95v1/S3yKwaVAOi8Bgqg4n4scf"
# "/adae769671b271b88f97d31721432986/LA_LOGO.png " "/adae769671b271b88f97d31721432986/LA_LOGO.png "
#} }
} }
] ]
} }
resp = await la_news.client.post(url=WEBHOOK_URL, data=json.dumps(payload).encode('UTF-8'), headers={'Content-Type': 'application/json'}) while True:
print(resp.status) resp = await la_news.client.post(url=WEBHOOK_URL, data=json.dumps(payload).encode('UTF-8'), headers={'Content-Type': 'application/json'})
if resp.status == 204:
break
time.sleep(15)
time.sleep(5)
await la_news.close() await la_news.close()
@@ -56,7 +77,10 @@ def run_async(coroutine):
loop.run_until_complete(task) loop.run_until_complete(task)
schedule.every(5).minutes.do(run_async, publish_news) if PRELOAD:
asyncio.get_event_loop().run_until_complete(preload_news())
schedule.every(1).seconds.do(run_async, publish_news)
while True: while True:
logging.debug('Checking schedule...') logging.debug('Checking schedule...')

14
news.py
View File

@@ -3,6 +3,7 @@ from aiohttp import ClientSession
from sqlite3 import Error from sqlite3 import Error
import hashlib import hashlib
import sqlite3 import sqlite3
import time
BASE_URL = 'https://www.playlostark.com' BASE_URL = 'https://www.playlostark.com'
@@ -28,8 +29,11 @@ class NewsScraper:
self._md5 = hashlib.new('md5', usedforsecurity=False) self._md5 = hashlib.new('md5', usedforsecurity=False)
async def _fetch_url(self, url): async def _fetch_url(self, url):
async with self.client.get(url=url) as resp: while True:
return await resp.text() async with self.client.get(url=url) as resp:
if resp.status == 200:
return await resp.text()
time.sleep(15)
def _store_hash(self, _hash, table): def _store_hash(self, _hash, table):
with self.database as db: with self.database as db:
@@ -72,12 +76,14 @@ class NewsScraper:
_hash = hashlib.md5(article_meta.__str__().encode('UTF-8'), usedforsecurity=False).hexdigest() _hash = hashlib.md5(article_meta.__str__().encode('UTF-8'), usedforsecurity=False).hexdigest()
if self._check_hash(_hash, 'news_hashes'): if self._check_hash(_hash, 'news_hashes'):
return articles.reverse()
return articles
else: else:
self._store_hash(_hash, 'news_hashes') self._store_hash(_hash, 'news_hashes')
articles.append(article_meta) articles.append(article_meta)
return articles.reverse() articles.reverse()
return articles
async def close(self): async def close(self):
await self.client.close() await self.client.close()