mirror of
https://github.com/wassname/arXiv_abstract_bot.git
synced 2026-06-27 16:59:36 +08:00
only posts 1h old and >10 points
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
.arxiv_bot.*
|
.arxiv_bot.*
|
||||||
.vscode/*
|
.vscode/*
|
||||||
vars_copy.sh
|
vars_copy.sh
|
||||||
|
vars_*.sh
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|||||||
+62
-55
@@ -17,7 +17,7 @@ logger = get_logger()
|
|||||||
# from https://github.com/arxiv-vanity/arxiv-vanity/blob/master/arxiv_vanity/scraper/arxiv_ids.py
|
# from https://github.com/arxiv-vanity/arxiv-vanity/blob/master/arxiv_vanity/scraper/arxiv_ids.py
|
||||||
ARXIV_ID_PATTERN = r'([a-z\-]+(?:\.[A-Z]{2})?/\d{7}|\d+\.\d+)(v\d+)?'
|
ARXIV_ID_PATTERN = r'([a-z\-]+(?:\.[A-Z]{2})?/\d{7}|\d+\.\d+)(v\d+)?'
|
||||||
ARXIV_URL_RE = re.compile(r'arxiv.org/[^\/]+/({})(\.pdf)?'.format(ARXIV_ID_PATTERN), re.I)
|
ARXIV_URL_RE = re.compile(r'arxiv.org/[^\/]+/({})(\.pdf)?'.format(ARXIV_ID_PATTERN), re.I)
|
||||||
# OPENREVIEW_URL_RE = re.compile(r'openreview.net/./', re.I)
|
OPENREVIEW_URL_RE = re.compile(r'openreview.net/', re.I)
|
||||||
# OPENREVIEW_URL_RE = re.compile(r'distill.pub/./', re.I)
|
# OPENREVIEW_URL_RE = re.compile(r'distill.pub/./', re.I)
|
||||||
|
|
||||||
r = get_bot()
|
r = get_bot()
|
||||||
@@ -25,14 +25,16 @@ r = get_bot()
|
|||||||
# source subreddits
|
# source subreddits
|
||||||
subreddits = [
|
subreddits = [
|
||||||
r.subreddit('machinelearning'),
|
r.subreddit('machinelearning'),
|
||||||
# r.subreddit('reinforcementlearning')
|
r.subreddit('reinforcementlearning')
|
||||||
# r.subreddit('LanguageTechnology')
|
r.subreddit('LanguageTechnology')
|
||||||
]
|
]
|
||||||
# target_subreddit = r.subreddit('mlresearch')
|
# target_subreddit = r.subreddit('mlresearch')
|
||||||
# target_subreddit = r.subreddit('testingground4bots')
|
# target_subreddit = r.subreddit('testingground4bots')
|
||||||
|
|
||||||
SLEEP = 600
|
SLEEP = 600
|
||||||
LIMIT_CHECK=20
|
LIMIT_CHECK = 20
|
||||||
|
MIN_SECONDS = 60 * 60
|
||||||
|
MIN_SCORE = 10
|
||||||
|
|
||||||
|
|
||||||
if r.read_only == False:
|
if r.read_only == False:
|
||||||
@@ -45,68 +47,73 @@ def comment():
|
|||||||
try:
|
try:
|
||||||
all_posts = subreddit.new(limit=LIMIT_CHECK)
|
all_posts = subreddit.new(limit=LIMIT_CHECK)
|
||||||
for post in all_posts:
|
for post in all_posts:
|
||||||
match = ARXIV_URL_RE.search(post.url)
|
match = ARXIV_URL_RE.search(post.url) or OPENREVIEW_URL_RE.search(post.url)
|
||||||
if match:
|
|
||||||
arxiv_id = match.group(1)
|
ts = time.time() - post.created_utc
|
||||||
|
if match and (post.score > MIN_SCORE) and (ts > MIN_SECONDS):
|
||||||
|
# arxiv_id = match.group(1)
|
||||||
|
|
||||||
# crosspost
|
# crosspost
|
||||||
print('found', arxiv_id)
|
# print('found', arxiv_id)
|
||||||
|
|
||||||
|
|
||||||
if cache.get(post.id) and cache.get(post.id) is 'T':
|
if cache.get(post.id) and cache.get(post.id) is 'T':
|
||||||
print ("Parsed this post already: %s"%(post.permalink))
|
print("Parsed this post already: %s. %s" % (post.permalink, post.id))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
xpost(['r/researchml'], post)
|
print("posting", post, post.id, post.score, ts)
|
||||||
|
post.crosspost('researchml')
|
||||||
|
# xpost(['r/researchml'], post)
|
||||||
cache[post.id]='T'
|
cache[post.id]='T'
|
||||||
time.sleep(10)
|
time.sleep(60)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
logger.error("Failed to scrape")
|
logger.error("Failed to scrape")
|
||||||
print(error)
|
print(error)
|
||||||
|
|
||||||
def xpost(subs, originalpost):
|
# def xpost(subs, originalpost):
|
||||||
# originalpost = where.submission
|
# # TODO native xport
|
||||||
newtitle = "(X-Post r/" + str(originalpost.subreddit.display_name) + ") " + originalpost.title
|
# # originalpost = where.submission
|
||||||
print("New post: " + str(newtitle))
|
# newtitle = "(X-Post r/" + str(originalpost.subreddit.display_name) + ") " + originalpost.title
|
||||||
link = "https://www.reddit.com" + str(originalpost.permalink)
|
# print("New post: " + str(newtitle))
|
||||||
workedsubs = []
|
# link = "https://www.reddit.com" + str(originalpost.permalink)
|
||||||
failedsubs = []
|
# workedsubs = []
|
||||||
wasError = False
|
# failedsubs = []
|
||||||
for workingsub in subs:
|
# wasError = False
|
||||||
exists = True
|
# for workingsub in subs:
|
||||||
try:
|
# exists = True
|
||||||
r.subreddits.search_by_name(workingsub[2:], exact=True)
|
# try:
|
||||||
except NotFound:
|
# r.subreddits.search_by_name(workingsub[2:], exact=True)
|
||||||
logging.error("Failed to post")
|
# except NotFound:
|
||||||
exists = False
|
# logging.error("Failed to post")
|
||||||
if exists == True:
|
# exists = False
|
||||||
subreddit = r.subreddit(workingsub[2:])
|
# if exists == True:
|
||||||
try:
|
# subreddit = r.subreddit(workingsub[2:])
|
||||||
subreddit.submit(newtitle, url=link, resubmit=True, send_replies=False)
|
# try:
|
||||||
workedsubs.append(str(workingsub))
|
# submission.crosspost(subreddit)
|
||||||
print("Posting: " + str(newtitle) + " to " + str(workingsub))
|
# subreddit.submit(newtitle, url=link, resubmit=True, send_replies=False)
|
||||||
except praw.exceptions.APIException:
|
# workedsubs.append(str(workingsub))
|
||||||
logging.error("Failed to post")
|
# print("Posting: " + str(newtitle) + " to " + str(workingsub))
|
||||||
wasError = True
|
# except praw.exceptions.APIException:
|
||||||
break
|
# logging.error("Failed to post")
|
||||||
else:
|
# wasError = True
|
||||||
failedsubs.append(str(workingsub))
|
# break
|
||||||
if not wasError:
|
# else:
|
||||||
print(workedsubs,failedsubs)
|
# failedsubs.append(str(workingsub))
|
||||||
# reply(workedsubs,failedsubs,where)
|
# if not wasError:
|
||||||
pass
|
# print(workedsubs,failedsubs)
|
||||||
else:
|
# # reply(workedsubs,failedsubs,where)
|
||||||
response = ""
|
# pass
|
||||||
if workedsubs:
|
# else:
|
||||||
response = "I was able to crosspost in "
|
# response = ""
|
||||||
for i in workedsubs:
|
# if workedsubs:
|
||||||
response = response + str(i) + " and "
|
# response = "I was able to crosspost in "
|
||||||
response = response[:-5] + ", but I was rate-limited on the others."
|
# for i in workedsubs:
|
||||||
print(response)
|
# response = response + str(i) + " and "
|
||||||
else:
|
# response = response[:-5] + ", but I was rate-limited on the others."
|
||||||
response = "Sorry, I was rate-limited, and I couldn't post."
|
# print(response)
|
||||||
print(response)
|
# else:
|
||||||
# where.reply(str(response) + " Make sure to give me karma to prevent that in the future.")
|
# response = "Sorry, I was rate-limited, and I couldn't post."
|
||||||
|
# print(response)
|
||||||
|
# # where.reply(str(response) + " Make sure to give me karma to prevent that in the future.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user