eliminating blank space

This commit is contained in:
Catalina Syddall
2019-07-23 17:56:52 -03:00
parent a86e99e641
commit 91f4b90406
5 changed files with 3 additions and 26 deletions
+1 -9
View File
@@ -71,9 +71,6 @@ def fetch_data(symbols=None):
retry_failure(failed,done)
send_report(done, failed, __name__)
##if a symbol failes to scrape try again exponentialy
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=300), stop = tenacity.stop_after_attempt(10), retry=tenacity.retry_if_exception_type(IOError))
def retry_failure(failed, done):
@@ -101,8 +98,6 @@ def retry_failure(failed, done):
done+=1
failed.remove(symbol)
def aggregate_monthly_data(symbols=None):
"""Aggregate daily snapshots into monthly files and validate data"""
symbols = symbols or _get_all_listed_symbols()
@@ -132,7 +127,7 @@ def aggregate_monthly_data(symbols=None):
daily_files = [
os.path.join(daily_dir, name) for name in file_names
]
try:
symbol_df = concatenate_files(daily_files)
except Exception:
@@ -174,10 +169,8 @@ def aggregate_monthly_data(symbols=None):
for file in daily_files:
utils.remove_file(file, logger)
send_report(done, failed, __name__, op="aggregate")
def _get_all_listed_symbols():
"""Returns array of all listed symbols.
http://www.cboe.com/publish/scheduledtask/mktdata/cboesymboldir2.csv
@@ -188,7 +181,6 @@ def _get_all_listed_symbols():
symbols_df = pd.read_csv(symbols_file, skiprows=1)
return symbols_df["Stock Symbol"].array
def concatenate_files(files):
"""Returns a dataframe of the concatenated data from `files`."""
df_generator = (pd.read_csv(file) for file in sorted(files))
-1
View File
@@ -66,6 +66,5 @@ def send_report(done, failed, scraper, op="scrape"):
msg= msg_success + '\n' + msg_fail
slack_notification(msg, scraper, status=Status.Warning)
def _symbol_str(count):
return str(count) + " symbol" if count == 1 else str(count) + " symbols"
+1 -3
View File
@@ -62,7 +62,6 @@ class TestCBOE(unittest.TestCase):
cboe.fetch_data(["SPX"])
self.assertTrue(mocked_notification.called)
@patch("data_scraper.cboe.utils.remove_file", return_value=None)
@patch("data_scraper.cboe.send_report", return_value=None)
def test_data_aggregation(self, mocked_report, mocked_remove):
@@ -109,6 +108,5 @@ class TestCBOE(unittest.TestCase):
if os.path.exists(file_path):
shutil.rmtree(file_path)
if __name__ == "__main__":
unittest.main()
unittest.main()
-3
View File
@@ -109,13 +109,10 @@ def _save_data(symbol, symbol_df):
merged_df.to_csv(file_path, index=False)
logger.debug("Saved symbol data as %s", file_path)
def _merge(symbol, symbol_df):
"""Merge `symbol_df` with previous data file."""
save_data_path = utils.get_save_data_path()
symbol_dir = os.path.join(save_data_path, "tiingo", symbol)
files = os.listdir(symbol_dir)
if len(files) == 0:
return symbol_df
+1 -10
View File
@@ -9,13 +9,11 @@ from .notifications import slack_notification
logger = logging.getLogger(__name__)
def file_hash_matches_data(file_path, data):
file_hash = file_md5(file_path)
data_md5 = hashlib.md5(data.encode()).hexdigest()
return file_hash == data_md5
def file_md5(file, chunk_size=4096):
md5 = hashlib.md5()
with open(file, "rb") as f:
@@ -24,7 +22,6 @@ def file_md5(file, chunk_size=4096):
return md5.hexdigest()
def validate_dates_in_month(symbol, date_range):
"""Compares `date_range` (month) with NYSE trading calendar.
Returns `True` if there are no missing days.
@@ -46,7 +43,6 @@ def validate_dates_in_month(symbol, date_range):
missing_days)
return missing_days.empty
def validate_historical_dates(symbol, date_range):
"""Compares `date_range` (any time range) with trading calendar.
Returns `True` if there are no missing days.
@@ -67,11 +63,9 @@ def validate_historical_dates(symbol, date_range):
return missing_days.empty
def validate_columns(expected, received):
"""Verify that the `received` columns scraped are equal to `expected`"""
valid = all(expected == received)
if not valid:
expected_cols = ", ".join(expected)
received_cols = ", ".join(received)
@@ -80,13 +74,10 @@ def validate_columns(expected, received):
Received: {}""".format(expected_cols, received_cols)
logger.error(msg)
slack_notification(msg, __name__)
return valid
def validate_aggregate_file(aggregate_file, daily_files):
"""Compares `aggregate_file` with the data from `daily_files`."""
aggregate_df = pd.read_csv(aggregate_file)
recreated_df = cboe.concatenate_files(daily_files)
return aggregate_df.equals(recreated_df)
return aggregate_df.equals(recreated_df)