From 71908d185a20ab6a2c7b350c5d7204558a9ba75f Mon Sep 17 00:00:00 2001
From: wassname <wassname@users.noreply.github.com>
Date: Wed, 12 Feb 2020 07:28:13 +0800
Subject: [PATCH] working well except for reposts due to non perist cache

---
 arxiv_ml_xpost_bot.py | 42 +++++++-----------------------------------
 rss_poster_bot.py     |  5 +++--
 2 files changed, 10 insertions(+), 37 deletions(-)

diff --git a/arxiv_ml_xpost_bot.py b/arxiv_ml_xpost_bot.py
index b1a2fad..1f73ff8 100644
--- a/arxiv_ml_xpost_bot.py
+++ b/arxiv_ml_xpost_bot.py
@@ -28,42 +28,21 @@ subreddits = [
     # r.subreddit('reinforcementlearning')
     # r.subreddit('LanguageTechnology')
 ]
-target_subreddit = r.subreddit('mlresearch')
-target_subreddit = r.subreddit('testingground4bots')
+# target_subreddit = r.subreddit('mlresearch')
+# target_subreddit = r.subreddit('testingground4bots')
+
+SLEEP = 600
+LIMIT_CHECK=20
 
 
 if r.read_only == False:
     print("Connected and running.")
-# alreadydone = set()
-
-
-# def scrape_arxiv(arxiv_id):
-#     url = 'https://arxiv.org/abs/{}'.format(arxiv_id)
-#     r = requests.get(url)
-#     soup = bs4.BeautifulSoup(r.text)
-#     abstract = soup.select('.abstract')[0]
-#     abstract = html2text.html2text(abstract.decode()).replace('\n', ' ')
-
-#     authors = soup.select('.authors')[0]
-#     authors = html2text.html2text(authors.decode()).replace('\n', ' ')
-#     authors = authors.replace('(/', '(http://arxiv.org/')
-
-#     title = soup.select('.title')[0]
-#     title =  html2text.html2text(title.decode()).replace('\n', ' ')[2:]
-
-#     abs_link = u'[Landing Page]({})'.format(url)
-#     pdf_link = u'[PDF Link](https://arxiv.org/pdf/{})'.format(arxiv_id)
-#     web_link = u'[Read as web page on arXiv Vanity](https://www.arxiv-vanity.com/papers/{}/)'.format(arxiv_id)
-#     links = u'{} | {} | {}'.format(pdf_link, abs_link, web_link)
-#     response = '\n\n'.join([title, authors, abstract, links]) 
-#     return response
 
 
 def comment(cache):
-    # print(time.asctime(), "searching")
     for subreddit in subreddits:
         try:
-            all_posts = subreddit.new(limit=100)
+            all_posts = subreddit.new(limit=LIMIT_CHECK)
             for post in all_posts:
                 match = ARXIV_URL_RE.search(post.url)
                 if match:
@@ -76,16 +55,9 @@ def comment(cache):
                     if cache.get(post.id) and cache.get(post.id) is 'T':
                         print ("Parsed this post already: %s"%(post.permalink))
                         continue
-                    # for comment in post.comments:
-                    #     if str(comment.author) == 'arXiv_abstract_bot':
-                    #         break
                     else:
                         xpost(['r/researchml'], post)
-                        # response = scrape_arxiv(arxiv_id)
-                        # post.reply(response)
                         cache.set(post.id, 'T')
-                    #     print "Parsed post: %s"%(post.permalink)
-                    #     print(arxiv_id, response)
                         time.sleep(10)
         except Exception as error:
             logger.error("Failed to scrape")
@@ -143,4 +115,4 @@ if __name__ == "__main__":
 
     while True:
         comment(cache)
-        time.sleep(30)
+        time.sleep(SLEEP)
diff --git a/rss_poster_bot.py b/rss_poster_bot.py
index 79d3c51..f7d71e5 100644
--- a/rss_poster_bot.py
+++ b/rss_poster_bot.py
@@ -17,7 +17,8 @@ r = get_bot()
 cache = get_memcache_client()
 
 SUBREDDIT = 'researchml'  # 'testingground4bots'
-SLEEP = 60
+SLEEP = 600
+MAX_AGE_DAYS = 1
 POST_DESCRIPTION = True
 DESCRIPTION_FORMAT = "{}"
 
@@ -27,7 +28,7 @@ sources = ["https://distill.pub/rss.xml", "https://www.shortscience.org/rss.xml"
 
 def run_bot(sources):
     sub = r.subreddit(SUBREDDIT)
-    t0 = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) - datetime.timedelta(days=10) 
+    t0 = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) - datetime.timedelta(days=MAX_AGE_DAYS) 
 
     logger.info("Start bot for subreddit %s", SUBREDDIT)
     while True: