Adjust PEP 8 + minor improvement

pull/1628/head
Ricardo Prins 2023-07-20 22:44:36 -06:00
parent 07a4594455
commit a2bde88c36
1 changed files with 59 additions and 58 deletions

View File

@ -59,7 +59,7 @@ class TestPriceHistory(unittest.TestCase):
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.hour, dt1.hour)
except:
except AssertionError:
print("Ticker = ", tkr)
raise
@ -82,7 +82,7 @@ class TestPriceHistory(unittest.TestCase):
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0, dt1)
except:
except AssertionError:
print("Ticker = ", tkr)
raise
@ -106,7 +106,7 @@ class TestPriceHistory(unittest.TestCase):
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.week, dt1.week)
except:
except AssertionError:
print("Ticker={}: Last two rows within same week:".format(tkr))
print(df.iloc[df.shape[0] - 2:])
raise
@ -172,18 +172,19 @@ class TestPriceHistory(unittest.TestCase):
start_d = _dt.date(2022, 1, 1)
end_d = _dt.date(2023, 1, 1)
tkr_div_dates = {}
tkr_div_dates['BHP.AX'] = [_dt.date(2022, 9, 1), _dt.date(2022, 2, 24)] # Yahoo claims 23-Feb but wrong because DST
tkr_div_dates['IMP.JO'] = [_dt.date(2022, 9, 21), _dt.date(2022, 3, 16)]
tkr_div_dates['BP.L'] = [_dt.date(2022, 11, 10), _dt.date(2022, 8, 11), _dt.date(2022, 5, 12), _dt.date(2022, 2, 17)]
tkr_div_dates['INTC'] = [_dt.date(2022, 11, 4), _dt.date(2022, 8, 4), _dt.date(2022, 5, 5), _dt.date(2022, 2, 4)]
tkr_div_dates = {'BHP.AX': [_dt.date(2022, 9, 1), _dt.date(2022, 2, 24)],
'IMP.JO': [_dt.date(2022, 9, 21), _dt.date(2022, 3, 16)],
'BP.L': [_dt.date(2022, 11, 10), _dt.date(2022, 8, 11), _dt.date(2022, 5, 12),
_dt.date(2022, 2, 17)],
'INTC': [_dt.date(2022, 11, 4), _dt.date(2022, 8, 4), _dt.date(2022, 5, 5),
_dt.date(2022, 2, 4)]}
for tkr,dates in tkr_div_dates.items():
for tkr, dates in tkr_div_dates.items():
df = yf.Ticker(tkr, session=self.session).history(interval='1d', start=start_d, end=end_d)
df_divs = df[df['Dividends']!=0].sort_index(ascending=False)
df_divs = df[df['Dividends'] != 0].sort_index(ascending=False)
try:
self.assertTrue((df_divs.index.date == dates).all())
except:
except AssertionError:
print(f'- ticker = {tkr}')
print('- response:') ; print(df_divs.index.date)
print('- answer:') ; print(dates)
@ -201,7 +202,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
@ -216,7 +217,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
@ -289,7 +290,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
@ -304,7 +305,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
@ -322,7 +323,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
@ -337,7 +338,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
@ -349,15 +350,15 @@ class TestPriceHistory(unittest.TestCase):
dfm = yf.Ticker("ABBV").history(period="max", interval="1mo")
dfd = yf.Ticker("ABBV").history(period="max", interval="1d")
dfd = dfd[dfd.index > dfm.index[0]]
dfm_divs = dfm[dfm['Dividends']!=0]
dfd_divs = dfd[dfd['Dividends']!=0]
dfm_divs = dfm[dfm['Dividends'] != 0]
dfd_divs = dfd[dfd['Dividends'] != 0]
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])
dfm = yf.Ticker("F").history(period="50mo",interval="1mo")
dfm = yf.Ticker("F").history(period="50mo", interval="1mo")
dfd = yf.Ticker("F").history(period="50mo", interval="1d")
dfd = dfd[dfd.index > dfm.index[0]]
dfm_divs = dfm[dfm['Dividends']!=0]
dfd_divs = dfd[dfd['Dividends']!=0]
dfm_divs = dfm[dfm['Dividends'] != 0]
dfd_divs = dfd[dfd['Dividends'] != 0]
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])
def test_tz_dst_ambiguous(self):
@ -388,7 +389,7 @@ class TestPriceHistory(unittest.TestCase):
df = dat.history(start=start, end=end, interval=interval)
try:
self.assertTrue((df.index.weekday == 0).all())
except:
except AssertionError:
print("Weekly data not aligned to Monday")
raise
@ -440,18 +441,18 @@ class TestPriceHistory(unittest.TestCase):
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9)
time_close = _dt.time(17,30)
time_close = _dt.time(17, 30)
special_day = _dt.date(2022, 12, 23)
time_early_close = _dt.time(13, 2)
dat = yf.Ticker(tkr, session=self.session)
# Half trading day Jan 5, Apr 14, May 25, Jun 23, Nov 4, Dec 23, Dec 30
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1,5), (4,14), (5,25), (6,23), (11,4), (12,23), (12,30)]]
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1, 5), (4, 14), (5, 25), (6, 23), (11, 4), (12, 23), (12, 30)]]
# Yahoo has incorrectly classified afternoon of 2022-04-13 as post-market.
# Nothing yfinance can do because Yahoo doesn't return data with prepost=False.
# But need to handle in this test.
expected_incorrect_half_days = [_dt.date(2022,4,13)]
expected_incorrect_half_days = [_dt.date(2022, 4, 13)]
half_days = sorted(half_days+expected_incorrect_half_days)
# Run
@ -468,7 +469,7 @@ class TestPriceHistory(unittest.TestCase):
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
unexpected_early_close_dates = [d for d in early_close_dates if not d in half_days]
unexpected_early_close_dates = [d for d in early_close_dates if d not in half_days]
self.assertEqual(len(unexpected_early_close_dates), 0)
self.assertEqual(len(early_close_dates), len(half_days))
self.assertTrue(_np.equal(early_close_dates, half_days).all())
@ -484,7 +485,7 @@ class TestPriceHistory(unittest.TestCase):
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(10)
time_close = _dt.time(16,12)
time_close = _dt.time(16, 12)
# No early closes in 2022
dat = yf.Ticker(tkr, session=self.session)
@ -521,6 +522,7 @@ class TestPriceHistory(unittest.TestCase):
df = dat.history(start=start, end=end, interval=interval)
class TestPriceRepair(unittest.TestCase):
session = None
@ -566,10 +568,10 @@ class TestPriceRepair(unittest.TestCase):
"Close": [475, 473.5, 472, 473.5],
"Adj Close": [475, 473.5, 472, 473.5],
"Volume": [2295613, 2245604, 3000287, 2635611]},
index=_pd.to_datetime([_dt.date(2022, 10, 24),
_dt.date(2022, 10, 17),
_dt.date(2022, 10, 10),
_dt.date(2022, 10, 3)]))
index=_pd.to_datetime([_dt.date(2022, 10, 24),
_dt.date(2022, 10, 17),
_dt.date(2022, 10, 10),
_dt.date(2022, 10, 3)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
@ -587,12 +589,11 @@ class TestPriceRepair(unittest.TestCase):
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
except AssertionError:
print(df[c])
print(df_repaired[c])
raise
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
@ -621,10 +622,10 @@ class TestPriceRepair(unittest.TestCase):
"Close": [410, 409.5, 402, 399],
"Adj Close": [398.02, 397.53, 390.25, 387.34],
"Volume": [3232600, 3773900, 10835000, 4257900]},
index=_pd.to_datetime([_dt.date(2020, 3, 30),
_dt.date(2020, 3, 23),
_dt.date(2020, 3, 16),
_dt.date(2020, 3, 9)]))
index=_pd.to_datetime([_dt.date(2020, 3, 30),
_dt.date(2020, 3, 23),
_dt.date(2020, 3, 16),
_dt.date(2020, 3, 9)]))
df = df.sort_index()
# Simulate data missing split-adjustment:
df[data_cols] *= 100.0
@ -645,7 +646,7 @@ class TestPriceRepair(unittest.TestCase):
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
except AssertionError:
print("Mismatch in column", c)
print("- df_repaired:")
print(df_repaired[c])
@ -679,10 +680,10 @@ class TestPriceRepair(unittest.TestCase):
"Close": [475.5, 475.5, 474.5, 475],
"Adj Close": [475.5, 475.5, 474.5, 475],
"Volume": [436414, 485947, 358067, 287620]},
index=_pd.to_datetime([_dt.date(2022, 11, 1),
_dt.date(2022, 10, 31),
_dt.date(2022, 10, 28),
_dt.date(2022, 10, 27)]))
index=_pd.to_datetime([_dt.date(2022, 11, 1),
_dt.date(2022, 10, 31),
_dt.date(2022, 10, 28),
_dt.date(2022, 10, 27)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
@ -722,7 +723,7 @@ class TestPriceRepair(unittest.TestCase):
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
_dp = os.path.dirname(__file__)
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-100x-error.csv"), index_col="Date")
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.', '-')+"-100x-error.csv"), index_col="Date")
df_bad.index = _pd.to_datetime(df_bad.index)
df_bad = df_bad.sort_index()
@ -737,7 +738,7 @@ class TestPriceRepair(unittest.TestCase):
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
except AssertionError:
print(df_repaired[c])
print(df[c])
print(f"TEST FAIL on column '{c}")
@ -768,9 +769,9 @@ class TestPriceRepair(unittest.TestCase):
"Close": [103.03, 102.05, 102.08],
"Adj Close": [102.03, 102.05, 102.08],
"Volume": [560, 137, 117]},
index=_pd.to_datetime([_dt.datetime(2022, 11, 1),
_dt.datetime(2022, 10, 31),
_dt.datetime(2022, 10, 30)]))
index=_pd.to_datetime([_dt.datetime(2022, 11, 1),
_dt.datetime(2022, 10, 31),
_dt.datetime(2022, 10, 30)]))
df_bad = df_bad.sort_index()
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
@ -799,11 +800,11 @@ class TestPriceRepair(unittest.TestCase):
"Adj Close": [28.12, 28.93, 28.57, 29.83, 29.70],
"Volume": [36e6, 51e6, 49e6, 58e6, 62e6],
"Dividends": [0, 0, 0.365, 0, 0]},
index=_pd.to_datetime([_dt.datetime(2023, 2, 8),
_dt.datetime(2023, 2, 7),
_dt.datetime(2023, 2, 6),
_dt.datetime(2023, 2, 3),
_dt.datetime(2023, 2, 2)]))
index=_pd.to_datetime([_dt.datetime(2023, 2, 8),
_dt.datetime(2023, 2, 7),
_dt.datetime(2023, 2, 6),
_dt.datetime(2023, 2, 3),
_dt.datetime(2023, 2, 2)]))
df = df.sort_index()
df.index.name = "Date"
dat = yf.Ticker(tkr, session=self.session)
@ -844,7 +845,7 @@ class TestPriceRepair(unittest.TestCase):
for c in ["Open", "Low", "High", "Close"]:
try:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
except:
except AssertionError:
print("COLUMN", c)
print("- repaired_df")
print(repaired_df)
@ -864,12 +865,12 @@ class TestPriceRepair(unittest.TestCase):
tz_exchange = dat.fast_info["timezone"]
_dp = os.path.dirname(__file__)
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-bad-stock-split.csv"), index_col="Date")
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.', '-')+"-bad-stock-split.csv"), index_col="Date")
df_bad.index = _pd.to_datetime(df_bad.index)
repaired_df = dat._fix_bad_stock_split(df_bad, "1d", tz_exchange)
correct_df = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-bad-stock-split-fixed.csv"), index_col="Date")
correct_df = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.', '-')+"-bad-stock-split-fixed.csv"), index_col="Date")
correct_df.index = _pd.to_datetime(correct_df.index)
repaired_df = repaired_df.sort_index()
@ -877,7 +878,7 @@ class TestPriceRepair(unittest.TestCase):
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
try:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=5e-6).all())
except:
except AssertionError:
print(f"tkr={tkr} COLUMN={c}")
print("- repaired_df")
print(repaired_df)
@ -909,7 +910,7 @@ class TestPriceRepair(unittest.TestCase):
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
try:
self.assertTrue((repaired_df[c].to_numpy() == df_good[c].to_numpy()).all())
except:
except AssertionError:
print(f"tkr={tkr} interval={interval} COLUMN={c}")
df_dbg = df_good[[c]].join(repaired_df[[c]], lsuffix='.good', rsuffix='.repaired')
f_diff = repaired_df[c].to_numpy() != df_good[c].to_numpy()