mirror of
https://github.com/wassname/arXiv_abstract_bot.git
synced 2026-06-27 16:44:23 +08:00
add min age and vote threshold
This commit is contained in:
+23
-8
@@ -18,18 +18,28 @@ r = get_bot()
|
||||
|
||||
SUBREDDIT = 'researchml' # 'testingground4bots'
|
||||
SLEEP = 600
|
||||
MAX_AGE_DAYS = 1
|
||||
MIN_AGE_DAYS = 2
|
||||
MAX_AGE_DAYS = 30
|
||||
POST_DESCRIPTION = True
|
||||
DESCRIPTION_FORMAT = "{}"
|
||||
|
||||
# main procedure
|
||||
|
||||
sources = ["https://distill.pub/rss.xml", "https://www.shortscience.org/rss.xml"]
|
||||
sources = ["https://www.shortscience.org/rss.xml", "https://distill.pub/rss.xml", ]
|
||||
# TODO might be nice to check shortscience votes 'shortscience:votes'
|
||||
|
||||
# note this is what the entries look like
|
||||
# {'title': 'Visualizing Neural Networks with the Grand Tour', 'title_detail': {'type': 'text/plain', 'language': None, 'base': 'https://distill.pub/rss.xml', 'value': 'Visualizing Neural Networks with the Grand Tour'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'https://distill.pub/2020/grand-tour'}], 'link': 'https://distill.pub/2020/grand-tour', 'summary': 'By focusing on linear dimensionality reduction, we show how to visualize many dynamic phenomena in neural networks.', 'summary_detail': {'type': 'text/html', 'language': None, 'base': 'https://distill.pub/rss.xml', 'value': 'By focusing on linear dimensionality reduction, we show how to visualize many dynamic phenomena in neural networks.'}, 'id': 'https://distill.pub/2020/grand-tour', 'guidislink': False, 'published': 'Mon, 16 Mar 2020 20:0:0 Z', 'published_parsed': time.struct_time(tm_year=2020, tm_mon=3, tm_mday=16, tm_hour=20, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=76, tm_isdst=0)}
|
||||
|
||||
# {'shortscience_arxivid': '1708.09259', 'shortscience_bibtexkey': 'journals/corr/1708.09259', 'shortscience_votes': '2', 'title': 'Efficient Convolutional Network Learning using Parametric Log based Dual-Tree Wavelet ScatterNet', 'title_detail': {'type': 'text/plain', 'language': None, 'base': 'https://www.shortscience.org/rss.xml', 'value': 'Efficient Convolutional Network Learning using Parametric Log based Dual-Tree Wavelet ScatterNet'}, 'authors': [{'name': 'hanoch kremer'}], 'author': 'hanoch kremer', 'author_detail': {'name': 'hanoch kremer'}, 'summary': "ScatterNets incorporates geometric knowledge of images to produce discriminative and invariant (translation and rotation) features i.e. edge information. The same outcome as CNN's first layers hold. So why not replace that first layer/s with an equivalent, fixed, structure and let the optimizer find the best weights for the CNN with its leading-edge removed.\nThe main motivations of the idea of replacing the first convolutional, ReLU and pooling layers of the CNN with a two-layer parametric log-b...", 'summary_detail': {'type': 'text/html', 'language': None, 'base': 'https://www.shortscience.org/rss.xml', 'value': "ScatterNets incorporates geometric knowledge of images to produce discriminative and invariant (translation and rotation) features i.e. edge information. The same outcome as CNN's first layers hold. So why not replace that first layer/s with an equivalent, fixed, structure and let the optimizer find the best weights for the CNN with its leading-edge removed.\nThe main motivations of the idea of replacing the first convolutional, ReLU and pooling layers of the CNN with a two-layer parametric log-b..."}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'http://www.shortscience.org/paper?bibtexKey=journals/corr/1708.09259#hanochkremer'}], 'link': 'http://www.shortscience.org/paper?bibtexKey=journals/corr/1708.09259#hanochkremer', 'id': 'http://www.shortscience.org/paper?bibtexKey=journals/corr/1708.09259#hanochkremer', 'guidislink': False, 'published': 'Thu, 09 Apr 2020 12:05:38 +0000', 'published_parsed': time.struct_time(tm_year=2020, tm_mon=4, tm_mday=9, tm_hour=12, tm_min=5, tm_sec=38, tm_wday=3, tm_yday=100, tm_isdst=0)}
|
||||
|
||||
|
||||
|
||||
def run_bot(sources):
|
||||
with shelve.open('.rss_bot') as cache:
|
||||
sub = r.subreddit(SUBREDDIT)
|
||||
t0 = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) - datetime.timedelta(days=MAX_AGE_DAYS)
|
||||
t0 = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) - datetime.timedelta(days=MAX_AGE_DAYS)
|
||||
t1 = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) - datetime.timedelta(days=MIN_AGE_DAYS)
|
||||
|
||||
logger.info("Start bot for subreddit %s", SUBREDDIT)
|
||||
while True:
|
||||
@@ -44,10 +54,15 @@ def run_bot(sources):
|
||||
dt = datetime.datetime.fromtimestamp(mktime(article['published_parsed'])).replace(tzinfo=pytz.utc)
|
||||
url = id = article['link']
|
||||
desc = article['summary']
|
||||
title= article['title']
|
||||
title = article['title']
|
||||
votes = int(article.get('shortscience_votes', 99))
|
||||
|
||||
if dt < t0:
|
||||
# skip older ones
|
||||
if votes < 2:
|
||||
logger.debug(f"skipping low votes article '{title}', id='{id}' td={dt-t0}, votes={votes}")
|
||||
continue
|
||||
|
||||
if (dt > t0) and (dt < t1):
|
||||
# skip older ones and new ones (that way we miss bugs that are removed from rss feed within a day)
|
||||
logger.debug(f"skipping older article '{title}', id='{id}' td={dt-t0}")
|
||||
continue
|
||||
|
||||
@@ -70,8 +85,8 @@ def run_bot(sources):
|
||||
except KeyboardInterrupt:
|
||||
return 0
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error("Exception %s", e, exc_info=True)
|
||||
# except Exception as e:
|
||||
# logger.error("Exception %s", e, exc_info=True)
|
||||
|
||||
logger.info("sleep for %s s", SLEEP)
|
||||
time.sleep(SLEEP)
|
||||
|
||||
Reference in New Issue
Block a user