diff --git a/.gitignore b/.gitignore index 894a44c..455b3bc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +.rss_bot.* +.arxiv_bot.* +.vscode/* +vars_copy.sh + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/arxiv_ml_xpost_bot.py b/arxiv_ml_xpost_bot.py index 1f73ff8..6a33e1d 100644 --- a/arxiv_ml_xpost_bot.py +++ b/arxiv_ml_xpost_bot.py @@ -4,13 +4,13 @@ import requests import bs4 import html2text import time, os -import bmemcached +import shelve import re from prawcore import NotFound import datetime -from botlib import get_bot, get_memcache_client, get_logger +from botlib import get_bot, get_logger logger = get_logger() @@ -39,29 +39,30 @@ if r.read_only == False: print("Connected and running.") -def comment(cache): - for subreddit in subreddits: - try: - all_posts = subreddit.new(limit=LIMIT_CHECK) - for post in all_posts: - match = ARXIV_URL_RE.search(post.url) - if match: - arxiv_id = match.group(1) +def comment(): + with shelve.open('.arxiv_bot') as cache: + for subreddit in subreddits: + try: + all_posts = subreddit.new(limit=LIMIT_CHECK) + for post in all_posts: + match = ARXIV_URL_RE.search(post.url) + if match: + arxiv_id = match.group(1) - # crosspost - print('found', arxiv_id) - + # crosspost + print('found', arxiv_id) + - if cache.get(post.id) and cache.get(post.id) is 'T': - print ("Parsed this post already: %s"%(post.permalink)) - continue - else: - xpost(['r/researchml'], post) - cache.set(post.id, 'T') - time.sleep(10) - except Exception as error: - logger.error("Failed to scrape") - print(error) + if cache.get(post.id) and cache.get(post.id) is 'T': + print ("Parsed this post already: %s"%(post.permalink)) + continue + else: + xpost(['r/researchml'], post) + cache[post.id]='T' + time.sleep(10) + except Exception as error: + logger.error("Failed to scrape") + print(error) def xpost(subs, originalpost): # originalpost = where.submission @@ -111,8 +112,7 @@ def xpost(subs, originalpost): if __name__ == "__main__": - cache = get_memcache_client() while True: - comment(cache) + comment() time.sleep(SLEEP) diff --git a/botlib.py b/botlib.py index d3efcac..976b94a 100644 --- a/botlib.py +++ b/botlib.py @@ -4,7 +4,7 @@ import requests import bs4 import html2text import time, os -import bmemcached +import shelve import re from prawcore import NotFound @@ -17,16 +17,8 @@ def get_logger(): logger = logging.getLogger(__name__) return logger -def get_memcache_client(): - # Store IDs of comments that the bot has already replied to. - # Read local cache by default - - MEMCACHEDCLOUD_SERVERS = os.environ.get('MEMCACHEDCLOUD_SERVERS') - MEMCACHEDCLOUD_USERNAME = os.environ.get('MEMCACHEDCLOUD_USERNAME') - MEMCACHEDCLOUD_PASSWORD = os.environ.get('MEMCACHEDCLOUD_PASSWORD') - - client = bmemcached.Client((MEMCACHEDCLOUD_SERVERS,), MEMCACHEDCLOUD_USERNAME, - MEMCACHEDCLOUD_PASSWORD) +def get_memcache_client(name): + client = shelve.open(name) return client diff --git a/requirements.txt b/requirements.txt index b4fe890..e03ef2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ html2text==2018.1.9 idna==2.8 praw==6.0.0 prawcore==1.0.0 -python-binary-memcached==0.28.0 requests==2.21.0 six==1.12.0 typing==3.6.6 diff --git a/rss_poster_bot.py b/rss_poster_bot.py index f7d71e5..036cdd8 100644 --- a/rss_poster_bot.py +++ b/rss_poster_bot.py @@ -8,13 +8,13 @@ import datetime import pytz import feedparser import logging +import shelve from time import mktime -from botlib import get_bot, get_memcache_client, get_logger +from botlib import get_bot, get_logger logger = get_logger() r = get_bot() -cache = get_memcache_client() SUBREDDIT = 'researchml' # 'testingground4bots' SLEEP = 600 @@ -27,53 +27,54 @@ DESCRIPTION_FORMAT = "{}" sources = ["https://distill.pub/rss.xml", "https://www.shortscience.org/rss.xml"] def run_bot(sources): - sub = r.subreddit(SUBREDDIT) - t0 = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) - datetime.timedelta(days=MAX_AGE_DAYS) + with shelve.open('.rss_bot') as cache: + sub = r.subreddit(SUBREDDIT) + t0 = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) - datetime.timedelta(days=MAX_AGE_DAYS) - logger.info("Start bot for subreddit %s", SUBREDDIT) - while True: - try: - logger.info("check sources") - newArticles = [] - for source in sources: - d = feedparser.parse(source) - newArticles = d['entries'] - - for article in newArticles: - dt = datetime.datetime.fromtimestamp(mktime(article['published_parsed'])).replace(tzinfo=pytz.utc) - url = id = article['link'] - desc = article['summary'] - title= article['title'] + logger.info("Start bot for subreddit %s", SUBREDDIT) + while True: + try: + logger.info("check sources") + newArticles = [] + for source in sources: + d = feedparser.parse(source) + newArticles = d['entries'] + + for article in newArticles: + dt = datetime.datetime.fromtimestamp(mktime(article['published_parsed'])).replace(tzinfo=pytz.utc) + url = id = article['link'] + desc = article['summary'] + title= article['title'] - if dt < t0: - # skip older ones - logger.debug(f"skipping older article '{title}', id='{id}' td={dt-t0}") - continue - - if cache.get(id) and cache.get(id) is 'T': - logger.info(f"skipping done article '{title}', id='{id}'") - continue - else: - logger.info('posting %s', id) - try: - submission = sub.submit('[S] ' + title, url=url, resubmit=True, send_replies=False) - if POST_DESCRIPTION and desc is not None: - submission.reply(DESCRIPTION_FORMAT.format(desc)) - except praw.exceptions.PRAWException as e: - logger.exception("could not submit %s", e) + if dt < t0: + # skip older ones + logger.debug(f"skipping older article '{title}', id='{id}' td={dt-t0}") + continue + + if cache.get(id) and cache.get(id) is 'T': + logger.info(f"skipping done article '{title}', id='{id}'") + continue else: - cache.set(id, 'T') - logger.info("submit article %s", article) + logger.info('posting %s', id) + try: + submission = sub.submit('[S] ' + title, url=url, resubmit=True, send_replies=False) + if POST_DESCRIPTION and desc is not None: + submission.reply(DESCRIPTION_FORMAT.format(desc)) + except praw.exceptions.PRAWException as e: + logger.exception("could not submit %s", e) + else: + cache[id] ='T' + logger.info("submit article %s", article) - # Allows the bot to exit on ^C, all other exceptions are ignored - except KeyboardInterrupt: - return 0 - break - except Exception as e: - logger.error("Exception %s", e, exc_info=True) + # Allows the bot to exit on ^C, all other exceptions are ignored + except KeyboardInterrupt: + return 0 + break + except Exception as e: + logger.error("Exception %s", e, exc_info=True) - logger.info("sleep for %s s", SLEEP) - time.sleep(SLEEP) + logger.info("sleep for %s s", SLEEP) + time.sleep(SLEEP) # write_config_done(done)