Merge branch 'dev' into feature/price-repair-improvements

pull/1633/head
ValueRaider 2023-07-22 20:34:56 +01:00 committed by GitHub
commit 4a1e1c4447
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 55 additions and 54 deletions

View File

@ -59,7 +59,7 @@ class TestPriceHistory(unittest.TestCase):
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.hour, dt1.hour)
except:
except AssertionError:
print("Ticker = ", tkr)
raise
@ -82,7 +82,7 @@ class TestPriceHistory(unittest.TestCase):
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0, dt1)
except:
except AssertionError:
print("Ticker = ", tkr)
raise
@ -106,7 +106,7 @@ class TestPriceHistory(unittest.TestCase):
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.week, dt1.week)
except:
except AssertionError:
print("Ticker={}: Last two rows within same week:".format(tkr))
print(df.iloc[df.shape[0] - 2:])
raise
@ -172,18 +172,19 @@ class TestPriceHistory(unittest.TestCase):
start_d = _dt.date(2022, 1, 1)
end_d = _dt.date(2023, 1, 1)
tkr_div_dates = {}
tkr_div_dates['BHP.AX'] = [_dt.date(2022, 9, 1), _dt.date(2022, 2, 24)] # Yahoo claims 23-Feb but wrong because DST
tkr_div_dates['IMP.JO'] = [_dt.date(2022, 9, 21), _dt.date(2022, 3, 16)]
tkr_div_dates['BP.L'] = [_dt.date(2022, 11, 10), _dt.date(2022, 8, 11), _dt.date(2022, 5, 12), _dt.date(2022, 2, 17)]
tkr_div_dates['INTC'] = [_dt.date(2022, 11, 4), _dt.date(2022, 8, 4), _dt.date(2022, 5, 5), _dt.date(2022, 2, 4)]
tkr_div_dates = {'BHP.AX': [_dt.date(2022, 9, 1), _dt.date(2022, 2, 24)], # Yahoo claims 23-Feb but wrong because DST
'IMP.JO': [_dt.date(2022, 9, 21), _dt.date(2022, 3, 16)],
'BP.L': [_dt.date(2022, 11, 10), _dt.date(2022, 8, 11), _dt.date(2022, 5, 12),
_dt.date(2022, 2, 17)],
'INTC': [_dt.date(2022, 11, 4), _dt.date(2022, 8, 4), _dt.date(2022, 5, 5),
_dt.date(2022, 2, 4)]}
for tkr,dates in tkr_div_dates.items():
for tkr, dates in tkr_div_dates.items():
df = yf.Ticker(tkr, session=self.session).history(interval='1d', start=start_d, end=end_d)
df_divs = df[df['Dividends']!=0].sort_index(ascending=False)
df_divs = df[df['Dividends'] != 0].sort_index(ascending=False)
try:
self.assertTrue((df_divs.index.date == dates).all())
except:
except AssertionError:
print(f'- ticker = {tkr}')
print('- response:') ; print(df_divs.index.date)
print('- answer:') ; print(dates)
@ -201,7 +202,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
@ -216,7 +217,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
@ -289,7 +290,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
@ -304,7 +305,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
@ -322,7 +323,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
@ -337,7 +338,7 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
except AssertionError:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
@ -349,15 +350,15 @@ class TestPriceHistory(unittest.TestCase):
dfm = yf.Ticker("ABBV").history(period="max", interval="1mo")
dfd = yf.Ticker("ABBV").history(period="max", interval="1d")
dfd = dfd[dfd.index > dfm.index[0]]
dfm_divs = dfm[dfm['Dividends']!=0]
dfd_divs = dfd[dfd['Dividends']!=0]
dfm_divs = dfm[dfm['Dividends'] != 0]
dfd_divs = dfd[dfd['Dividends'] != 0]
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])
dfm = yf.Ticker("F").history(period="50mo",interval="1mo")
dfm = yf.Ticker("F").history(period="50mo", interval="1mo")
dfd = yf.Ticker("F").history(period="50mo", interval="1d")
dfd = dfd[dfd.index > dfm.index[0]]
dfm_divs = dfm[dfm['Dividends']!=0]
dfd_divs = dfd[dfd['Dividends']!=0]
dfm_divs = dfm[dfm['Dividends'] != 0]
dfd_divs = dfd[dfd['Dividends'] != 0]
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])
def test_tz_dst_ambiguous(self):
@ -388,7 +389,7 @@ class TestPriceHistory(unittest.TestCase):
df = dat.history(start=start, end=end, interval=interval)
try:
self.assertTrue((df.index.weekday == 0).all())
except:
except AssertionError:
print("Weekly data not aligned to Monday")
raise
@ -440,18 +441,18 @@ class TestPriceHistory(unittest.TestCase):
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9)
time_close = _dt.time(17,30)
time_close = _dt.time(17, 30)
special_day = _dt.date(2022, 12, 23)
time_early_close = _dt.time(13, 2)
dat = yf.Ticker(tkr, session=self.session)
# Half trading day Jan 5, Apr 14, May 25, Jun 23, Nov 4, Dec 23, Dec 30
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1,5), (4,14), (5,25), (6,23), (11,4), (12,23), (12,30)]]
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1, 5), (4, 14), (5, 25), (6, 23), (11, 4), (12, 23), (12, 30)]]
# Yahoo has incorrectly classified afternoon of 2022-04-13 as post-market.
# Nothing yfinance can do because Yahoo doesn't return data with prepost=False.
# But need to handle in this test.
expected_incorrect_half_days = [_dt.date(2022,4,13)]
expected_incorrect_half_days = [_dt.date(2022, 4, 13)]
half_days = sorted(half_days+expected_incorrect_half_days)
# Run
@ -468,7 +469,7 @@ class TestPriceHistory(unittest.TestCase):
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
unexpected_early_close_dates = [d for d in early_close_dates if not d in half_days]
unexpected_early_close_dates = [d for d in early_close_dates if d not in half_days]
self.assertEqual(len(unexpected_early_close_dates), 0)
self.assertEqual(len(early_close_dates), len(half_days))
self.assertTrue(_np.equal(early_close_dates, half_days).all())
@ -484,7 +485,7 @@ class TestPriceHistory(unittest.TestCase):
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(10)
time_close = _dt.time(16,12)
time_close = _dt.time(16, 12)
# No early closes in 2022
dat = yf.Ticker(tkr, session=self.session)
@ -521,6 +522,7 @@ class TestPriceHistory(unittest.TestCase):
df = dat.history(start=start, end=end, interval=interval)
class TestPriceRepair(unittest.TestCase):
session = None
@ -566,10 +568,10 @@ class TestPriceRepair(unittest.TestCase):
"Close": [475, 473.5, 472, 473.5],
"Adj Close": [470.1, 468.6, 467.1, 468.6],
"Volume": [2295613, 2245604, 3000287, 2635611]},
index=_pd.to_datetime([_dt.date(2022, 10, 24),
_dt.date(2022, 10, 17),
_dt.date(2022, 10, 10),
_dt.date(2022, 10, 3)]))
index=_pd.to_datetime([_dt.date(2022, 10, 24),
_dt.date(2022, 10, 17),
_dt.date(2022, 10, 10),
_dt.date(2022, 10, 3)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
@ -587,12 +589,11 @@ class TestPriceRepair(unittest.TestCase):
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
except AssertionError:
print(df[c])
print(df_repaired[c])
raise
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
@ -621,10 +622,10 @@ class TestPriceRepair(unittest.TestCase):
"Close": [410, 409.5, 402, 399],
"Adj Close": [393.91, 393.43, 386.22, 383.34],
"Volume": [3232600, 3773900, 10835000, 4257900]},
index=_pd.to_datetime([_dt.date(2020, 3, 30),
_dt.date(2020, 3, 23),
_dt.date(2020, 3, 16),
_dt.date(2020, 3, 9)]))
index=_pd.to_datetime([_dt.date(2020, 3, 30),
_dt.date(2020, 3, 23),
_dt.date(2020, 3, 16),
_dt.date(2020, 3, 9)]))
df = df.sort_index()
# Simulate data missing split-adjustment:
df[data_cols] *= 100.0
@ -645,7 +646,7 @@ class TestPriceRepair(unittest.TestCase):
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
except AssertionError:
print("Mismatch in column", c)
print("- df_repaired:")
print(df_repaired[c])
@ -679,10 +680,10 @@ class TestPriceRepair(unittest.TestCase):
"Close": [475.5, 475.5, 474.5, 475],
"Adj Close": [475.5, 475.5, 474.5, 475],
"Volume": [436414, 485947, 358067, 287620]},
index=_pd.to_datetime([_dt.date(2022, 11, 1),
_dt.date(2022, 10, 31),
_dt.date(2022, 10, 28),
_dt.date(2022, 10, 27)]))
index=_pd.to_datetime([_dt.date(2022, 11, 1),
_dt.date(2022, 10, 31),
_dt.date(2022, 10, 28),
_dt.date(2022, 10, 27)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
@ -777,9 +778,9 @@ class TestPriceRepair(unittest.TestCase):
"Close": [103.03, 102.05, 102.08],
"Adj Close": [102.03, 102.05, 102.08],
"Volume": [560, 137, 117]},
index=_pd.to_datetime([_dt.datetime(2022, 11, 1),
_dt.datetime(2022, 10, 31),
_dt.datetime(2022, 10, 30)]))
index=_pd.to_datetime([_dt.datetime(2022, 11, 1),
_dt.datetime(2022, 10, 31),
_dt.datetime(2022, 10, 30)]))
df_bad = df_bad.sort_index()
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
@ -808,11 +809,11 @@ class TestPriceRepair(unittest.TestCase):
"Adj Close": [28.12, 28.93, 28.57, 29.83, 29.70],
"Volume": [36e6, 51e6, 49e6, 58e6, 62e6],
"Dividends": [0, 0, 0.365, 0, 0]},
index=_pd.to_datetime([_dt.datetime(2023, 2, 8),
_dt.datetime(2023, 2, 7),
_dt.datetime(2023, 2, 6),
_dt.datetime(2023, 2, 3),
_dt.datetime(2023, 2, 2)]))
index=_pd.to_datetime([_dt.datetime(2023, 2, 8),
_dt.datetime(2023, 2, 7),
_dt.datetime(2023, 2, 6),
_dt.datetime(2023, 2, 3),
_dt.datetime(2023, 2, 2)]))
df = df.sort_index()
df.index.name = "Date"
dat = yf.Ticker(tkr, session=self.session)
@ -853,7 +854,7 @@ class TestPriceRepair(unittest.TestCase):
for c in ["Open", "Low", "High", "Close"]:
try:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
except:
except AssertionError:
print("COLUMN", c)
print("- repaired_df")
print(repaired_df)
@ -922,7 +923,7 @@ class TestPriceRepair(unittest.TestCase):
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
try:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=5e-6).all())
except:
except AssertionError:
print(f"tkr={tkr} COLUMN={c}")
# print("- repaired_df")
# print(repaired_df)
@ -953,7 +954,7 @@ class TestPriceRepair(unittest.TestCase):
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
try:
self.assertTrue((repaired_df[c].to_numpy() == df_good[c].to_numpy()).all())
except:
except AssertionError:
print(f"tkr={tkr} interval={interval} COLUMN={c}")
df_dbg = df_good[[c]].join(repaired_df[[c]], lsuffix='.good', rsuffix='.repaired')
f_diff = repaired_df[c].to_numpy() != df_good[c].to_numpy()