diff --git a/data_scraper/cboe.py b/data_scraper/cboe.py index 67df983..9796b79 100644 --- a/data_scraper/cboe.py +++ b/data_scraper/cboe.py @@ -71,9 +71,6 @@ def fetch_data(symbols=None): retry_failure(failed,done) send_report(done, failed, __name__) - - - ##if a symbol failes to scrape try again exponentialy @tenacity.retry(wait=tenacity.wait_exponential(multiplier=300), stop = tenacity.stop_after_attempt(10), retry=tenacity.retry_if_exception_type(IOError)) def retry_failure(failed, done): @@ -101,8 +98,6 @@ def retry_failure(failed, done): done+=1 failed.remove(symbol) - - def aggregate_monthly_data(symbols=None): """Aggregate daily snapshots into monthly files and validate data""" symbols = symbols or _get_all_listed_symbols() @@ -132,7 +127,7 @@ def aggregate_monthly_data(symbols=None): daily_files = [ os.path.join(daily_dir, name) for name in file_names ] - + try: symbol_df = concatenate_files(daily_files) except Exception: @@ -174,10 +169,8 @@ def aggregate_monthly_data(symbols=None): for file in daily_files: utils.remove_file(file, logger) - send_report(done, failed, __name__, op="aggregate") - def _get_all_listed_symbols(): """Returns array of all listed symbols. http://www.cboe.com/publish/scheduledtask/mktdata/cboesymboldir2.csv @@ -188,7 +181,6 @@ def _get_all_listed_symbols(): symbols_df = pd.read_csv(symbols_file, skiprows=1) return symbols_df["Stock Symbol"].array - def concatenate_files(files): """Returns a dataframe of the concatenated data from `files`.""" df_generator = (pd.read_csv(file) for file in sorted(files)) diff --git a/data_scraper/notifications.py b/data_scraper/notifications.py index 31ecce2..3b5bd60 100644 --- a/data_scraper/notifications.py +++ b/data_scraper/notifications.py @@ -66,6 +66,5 @@ def send_report(done, failed, scraper, op="scrape"): msg= msg_success + '\n' + msg_fail slack_notification(msg, scraper, status=Status.Warning) - def _symbol_str(count): return str(count) + " symbol" if count == 1 else str(count) + " symbols" diff --git a/data_scraper/test/test_cboe.py b/data_scraper/test/test_cboe.py index 6c93b63..204353a 100644 --- a/data_scraper/test/test_cboe.py +++ b/data_scraper/test/test_cboe.py @@ -62,7 +62,6 @@ class TestCBOE(unittest.TestCase): cboe.fetch_data(["SPX"]) self.assertTrue(mocked_notification.called) - @patch("data_scraper.cboe.utils.remove_file", return_value=None) @patch("data_scraper.cboe.send_report", return_value=None) def test_data_aggregation(self, mocked_report, mocked_remove): @@ -109,6 +108,5 @@ class TestCBOE(unittest.TestCase): if os.path.exists(file_path): shutil.rmtree(file_path) - if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/data_scraper/tiingo.py b/data_scraper/tiingo.py index 263c6fb..b7e8a5f 100644 --- a/data_scraper/tiingo.py +++ b/data_scraper/tiingo.py @@ -109,13 +109,10 @@ def _save_data(symbol, symbol_df): merged_df.to_csv(file_path, index=False) logger.debug("Saved symbol data as %s", file_path) - def _merge(symbol, symbol_df): """Merge `symbol_df` with previous data file.""" - save_data_path = utils.get_save_data_path() symbol_dir = os.path.join(save_data_path, "tiingo", symbol) - files = os.listdir(symbol_dir) if len(files) == 0: return symbol_df diff --git a/data_scraper/validation.py b/data_scraper/validation.py index 369fb0c..6bfc92f 100644 --- a/data_scraper/validation.py +++ b/data_scraper/validation.py @@ -9,13 +9,11 @@ from .notifications import slack_notification logger = logging.getLogger(__name__) - def file_hash_matches_data(file_path, data): file_hash = file_md5(file_path) data_md5 = hashlib.md5(data.encode()).hexdigest() return file_hash == data_md5 - def file_md5(file, chunk_size=4096): md5 = hashlib.md5() with open(file, "rb") as f: @@ -24,7 +22,6 @@ def file_md5(file, chunk_size=4096): return md5.hexdigest() - def validate_dates_in_month(symbol, date_range): """Compares `date_range` (month) with NYSE trading calendar. Returns `True` if there are no missing days. @@ -46,7 +43,6 @@ def validate_dates_in_month(symbol, date_range): missing_days) return missing_days.empty - def validate_historical_dates(symbol, date_range): """Compares `date_range` (any time range) with trading calendar. Returns `True` if there are no missing days. @@ -67,11 +63,9 @@ def validate_historical_dates(symbol, date_range): return missing_days.empty - def validate_columns(expected, received): """Verify that the `received` columns scraped are equal to `expected`""" valid = all(expected == received) - if not valid: expected_cols = ", ".join(expected) received_cols = ", ".join(received) @@ -80,13 +74,10 @@ def validate_columns(expected, received): Received: {}""".format(expected_cols, received_cols) logger.error(msg) slack_notification(msg, __name__) - return valid - def validate_aggregate_file(aggregate_file, daily_files): """Compares `aggregate_file` with the data from `daily_files`.""" aggregate_df = pd.read_csv(aggregate_file) recreated_df = cboe.concatenate_files(daily_files) - - return aggregate_df.equals(recreated_df) + return aggregate_df.equals(recreated_df) \ No newline at end of file