1006 lines
45 KiB
Python
1006 lines
45 KiB
Python
from .context import yfinance as yf
|
|
from .context import session_gbl
|
|
|
|
import unittest
|
|
|
|
import os
|
|
import datetime as _dt
|
|
import pytz as _tz
|
|
import numpy as _np
|
|
import pandas as _pd
|
|
|
|
|
|
class TestPriceHistory(unittest.TestCase):
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
cls.session = session_gbl
|
|
|
|
@classmethod
|
|
def tearDownClass(cls):
|
|
if cls.session is not None:
|
|
cls.session.close()
|
|
|
|
def test_daily_index(self):
|
|
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
|
intervals = ["1d", "1wk", "1mo"]
|
|
for tkr in tkrs:
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
|
|
for interval in intervals:
|
|
df = dat.history(period="5y", interval=interval)
|
|
|
|
f = df.index.time == _dt.time(0)
|
|
self.assertTrue(f.all())
|
|
|
|
def test_download(self):
|
|
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
|
intervals = ["1d", "1wk", "1mo"]
|
|
for interval in intervals:
|
|
df = yf.download(tkrs, period="5y", interval=interval)
|
|
|
|
f = df.index.time == _dt.time(0)
|
|
self.assertTrue(f.all())
|
|
|
|
df_tkrs = df.columns.levels[1]
|
|
self.assertEqual(sorted(tkrs), sorted(df_tkrs))
|
|
|
|
def test_duplicatingHourly(self):
|
|
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
|
|
for tkr in tkrs:
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz = dat._get_ticker_tz(proxy=None, timeout=None)
|
|
|
|
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
|
|
dt = dt_utc.astimezone(_tz.timezone(tz))
|
|
start_d = dt.date() - _dt.timedelta(days=7)
|
|
df = dat.history(start=start_d, interval="1h")
|
|
|
|
dt0 = df.index[-2]
|
|
dt1 = df.index[-1]
|
|
try:
|
|
self.assertNotEqual(dt0.hour, dt1.hour)
|
|
except AssertionError:
|
|
print("Ticker = ", tkr)
|
|
raise
|
|
|
|
def test_duplicatingDaily(self):
|
|
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
|
|
test_run = False
|
|
for tkr in tkrs:
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz = dat._get_ticker_tz(proxy=None, timeout=None)
|
|
|
|
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
|
|
dt = dt_utc.astimezone(_tz.timezone(tz))
|
|
if dt.time() < _dt.time(17, 0):
|
|
continue
|
|
test_run = True
|
|
|
|
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
|
|
|
|
dt0 = df.index[-2]
|
|
dt1 = df.index[-1]
|
|
try:
|
|
self.assertNotEqual(dt0, dt1)
|
|
except AssertionError:
|
|
print("Ticker = ", tkr)
|
|
raise
|
|
|
|
if not test_run:
|
|
self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close")
|
|
|
|
def test_duplicatingWeekly(self):
|
|
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
|
|
test_run = False
|
|
for tkr in tkrs:
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz = dat._get_ticker_tz(proxy=None, timeout=None)
|
|
|
|
dt = _tz.timezone(tz).localize(_dt.datetime.now())
|
|
if dt.date().weekday() not in [1, 2, 3, 4]:
|
|
continue
|
|
test_run = True
|
|
|
|
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
|
|
dt0 = df.index[-2]
|
|
dt1 = df.index[-1]
|
|
try:
|
|
self.assertNotEqual(dt0.week, dt1.week)
|
|
except AssertionError:
|
|
print("Ticker={}: Last two rows within same week:".format(tkr))
|
|
print(df.iloc[df.shape[0] - 2:])
|
|
raise
|
|
|
|
if not test_run:
|
|
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
|
|
|
|
def test_intraDayWithEvents(self):
|
|
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
|
test_run = False
|
|
for tkr in tkrs:
|
|
start_d = _dt.date.today() - _dt.timedelta(days=59)
|
|
end_d = None
|
|
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
|
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
|
if df_daily_divs.shape[0] == 0:
|
|
continue
|
|
|
|
last_div_date = df_daily_divs.index[-1]
|
|
start_d = last_div_date.date()
|
|
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
|
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
|
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
|
|
|
|
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
|
|
df_intraday_divs.index = df_intraday_divs.index.floor('D')
|
|
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
|
|
|
|
test_run = True
|
|
|
|
if not test_run:
|
|
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
|
|
|
|
def test_intraDayWithEvents_tase(self):
|
|
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
|
|
|
|
tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"]
|
|
test_run = False
|
|
for tkr in tase_tkrs:
|
|
start_d = _dt.date.today() - _dt.timedelta(days=59)
|
|
end_d = None
|
|
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
|
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
|
if df_daily_divs.shape[0] == 0:
|
|
continue
|
|
|
|
last_div_date = df_daily_divs.index[-1]
|
|
start_d = last_div_date.date()
|
|
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
|
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
|
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
|
|
|
|
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
|
|
df_intraday_divs.index = df_intraday_divs.index.floor('D')
|
|
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
|
|
|
|
test_run = True
|
|
|
|
if not test_run:
|
|
self.skipTest("Skipping test_intraDayWithEvents_tase() because no tickers had a dividend in last 60 days")
|
|
|
|
def test_dailyWithEvents(self):
|
|
start_d = _dt.date(2022, 1, 1)
|
|
end_d = _dt.date(2023, 1, 1)
|
|
|
|
tkr_div_dates = {'BHP.AX': [_dt.date(2022, 9, 1), _dt.date(2022, 2, 24)], # Yahoo claims 23-Feb but wrong because DST
|
|
'IMP.JO': [_dt.date(2022, 9, 21), _dt.date(2022, 3, 16)],
|
|
'BP.L': [_dt.date(2022, 11, 10), _dt.date(2022, 8, 11), _dt.date(2022, 5, 12),
|
|
_dt.date(2022, 2, 17)],
|
|
'INTC': [_dt.date(2022, 11, 4), _dt.date(2022, 8, 4), _dt.date(2022, 5, 5),
|
|
_dt.date(2022, 2, 4)]}
|
|
|
|
for tkr, dates in tkr_div_dates.items():
|
|
df = yf.Ticker(tkr, session=self.session).history(interval='1d', start=start_d, end=end_d)
|
|
df_divs = df[df['Dividends'] != 0].sort_index(ascending=False)
|
|
try:
|
|
self.assertTrue((df_divs.index.date == dates).all())
|
|
except AssertionError:
|
|
print(f'- ticker = {tkr}')
|
|
print('- response:') ; print(df_divs.index.date)
|
|
print('- answer:') ; print(dates)
|
|
raise
|
|
|
|
def test_dailyWithEvents_bugs(self):
|
|
# Reproduce issue #521
|
|
tkr1 = "QQQ"
|
|
tkr2 = "GDX"
|
|
start_d = "2014-12-29"
|
|
end_d = "2020-11-29"
|
|
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True)
|
|
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True)
|
|
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
|
|
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
|
|
try:
|
|
self.assertTrue(df1.index.equals(df2.index))
|
|
except AssertionError:
|
|
missing_from_df1 = df2.index.difference(df1.index)
|
|
missing_from_df2 = df1.index.difference(df2.index)
|
|
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
|
|
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
|
|
raise
|
|
|
|
# Test that index same with and without events:
|
|
tkrs = [tkr1, tkr2]
|
|
for tkr in tkrs:
|
|
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
|
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=False)
|
|
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
|
|
try:
|
|
self.assertTrue(df1.index.equals(df2.index))
|
|
except AssertionError:
|
|
missing_from_df1 = df2.index.difference(df1.index)
|
|
missing_from_df2 = df1.index.difference(df2.index)
|
|
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
|
|
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
|
|
raise
|
|
|
|
# Reproduce issue #1634 - 1d dividend out-of-range, should be prepended to prices
|
|
div_dt = _pd.Timestamp(2022, 7, 21).tz_localize("America/New_York")
|
|
df_dividends = _pd.DataFrame(data={"Dividends":[1.0]}, index=[div_dt])
|
|
df_prices = _pd.DataFrame(data={c:[1.0] for c in yf.const.price_colnames}|{'Volume':0}, index=[div_dt+_dt.timedelta(days=1)])
|
|
df_merged = yf.utils.safe_merge_dfs(df_prices, df_dividends, '1d')
|
|
self.assertEqual(df_merged.shape[0], 2)
|
|
self.assertTrue(df_merged[df_prices.columns].iloc[1:].equals(df_prices))
|
|
self.assertEqual(df_merged.index[0], div_dt)
|
|
|
|
def test_intraDayWithEvents(self):
|
|
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
|
test_run = False
|
|
for tkr in tkrs:
|
|
start_d = _dt.date.today() - _dt.timedelta(days=59)
|
|
end_d = None
|
|
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
|
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
|
if df_daily_divs.shape[0] == 0:
|
|
continue
|
|
|
|
last_div_date = df_daily_divs.index[-1]
|
|
start_d = last_div_date.date()
|
|
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
|
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
|
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
|
|
|
|
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
|
|
df_intraday_divs.index = df_intraday_divs.index.floor('D')
|
|
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
|
|
|
|
test_run = True
|
|
|
|
if not test_run:
|
|
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
|
|
|
|
def test_intraDayWithEvents_tase(self):
|
|
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
|
|
|
|
tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"]
|
|
test_run = False
|
|
for tkr in tase_tkrs:
|
|
start_d = _dt.date.today() - _dt.timedelta(days=59)
|
|
end_d = None
|
|
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
|
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
|
if df_daily_divs.shape[0] == 0:
|
|
continue
|
|
|
|
last_div_date = df_daily_divs.index[-1]
|
|
start_d = last_div_date.date()
|
|
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
|
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
|
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
|
|
|
|
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
|
|
df_intraday_divs.index = df_intraday_divs.index.floor('D')
|
|
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
|
|
|
|
test_run = True
|
|
|
|
if not test_run:
|
|
self.skipTest("Skipping test_intraDayWithEvents_tase() because no tickers had a dividend in last 60 days")
|
|
|
|
def test_weeklyWithEvents(self):
|
|
# Reproduce issue #521
|
|
tkr1 = "QQQ"
|
|
tkr2 = "GDX"
|
|
start_d = "2014-12-29"
|
|
end_d = "2020-11-29"
|
|
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True)
|
|
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True)
|
|
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
|
|
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
|
|
try:
|
|
self.assertTrue(df1.index.equals(df2.index))
|
|
except AssertionError:
|
|
missing_from_df1 = df2.index.difference(df1.index)
|
|
missing_from_df2 = df1.index.difference(df2.index)
|
|
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
|
|
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
|
|
raise
|
|
|
|
# Test that index same with and without events:
|
|
tkrs = [tkr1, tkr2]
|
|
for tkr in tkrs:
|
|
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=True)
|
|
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=False)
|
|
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
|
|
try:
|
|
self.assertTrue(df1.index.equals(df2.index))
|
|
except AssertionError:
|
|
missing_from_df1 = df2.index.difference(df1.index)
|
|
missing_from_df2 = df1.index.difference(df2.index)
|
|
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
|
|
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
|
|
raise
|
|
|
|
def test_monthlyWithEvents(self):
|
|
tkr1 = "QQQ"
|
|
tkr2 = "GDX"
|
|
start_d = "2014-12-29"
|
|
end_d = "2020-11-29"
|
|
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True)
|
|
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True)
|
|
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
|
|
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
|
|
try:
|
|
self.assertTrue(df1.index.equals(df2.index))
|
|
except AssertionError:
|
|
missing_from_df1 = df2.index.difference(df1.index)
|
|
missing_from_df2 = df1.index.difference(df2.index)
|
|
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
|
|
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
|
|
raise
|
|
|
|
# Test that index same with and without events:
|
|
tkrs = [tkr1, tkr2]
|
|
for tkr in tkrs:
|
|
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=True)
|
|
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=False)
|
|
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
|
|
try:
|
|
self.assertTrue(df1.index.equals(df2.index))
|
|
except AssertionError:
|
|
missing_from_df1 = df2.index.difference(df1.index)
|
|
missing_from_df2 = df1.index.difference(df2.index)
|
|
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
|
|
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
|
|
raise
|
|
|
|
def test_monthlyWithEvents2(self):
|
|
# Simply check no exception from internal merge
|
|
dfm = yf.Ticker("ABBV").history(period="max", interval="1mo")
|
|
dfd = yf.Ticker("ABBV").history(period="max", interval="1d")
|
|
dfd = dfd[dfd.index > dfm.index[0]]
|
|
dfm_divs = dfm[dfm['Dividends'] != 0]
|
|
dfd_divs = dfd[dfd['Dividends'] != 0]
|
|
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])
|
|
|
|
dfm = yf.Ticker("F").history(period="50mo", interval="1mo")
|
|
dfd = yf.Ticker("F").history(period="50mo", interval="1d")
|
|
dfd = dfd[dfd.index > dfm.index[0]]
|
|
dfm_divs = dfm[dfm['Dividends'] != 0]
|
|
dfd_divs = dfd[dfd['Dividends'] != 0]
|
|
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])
|
|
|
|
def test_tz_dst_ambiguous(self):
|
|
# Reproduce issue #1100
|
|
try:
|
|
yf.Ticker("ESLT.TA", session=self.session).history(start="2002-10-06", end="2002-10-09", interval="1d")
|
|
except _tz.exceptions.AmbiguousTimeError:
|
|
raise Exception("Ambiguous DST issue not resolved")
|
|
|
|
def test_dst_fix(self):
|
|
# Daily intervals should start at time 00:00. But for some combinations of date and timezone,
|
|
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
|
|
# The clue is (a) minutes=0 and (b) hour near 0.
|
|
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion.
|
|
|
|
# The correction is successful if no days are weekend, and weekly data begins Monday
|
|
|
|
tkr = "AGRO3.SA"
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
start = "2021-01-11"
|
|
end = "2022-11-05"
|
|
|
|
interval = "1d"
|
|
df = dat.history(start=start, end=end, interval=interval)
|
|
self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all())
|
|
|
|
interval = "1wk"
|
|
df = dat.history(start=start, end=end, interval=interval)
|
|
try:
|
|
self.assertTrue((df.index.weekday == 0).all())
|
|
except AssertionError:
|
|
print("Weekly data not aligned to Monday")
|
|
raise
|
|
|
|
def test_prune_post_intraday_us(self):
|
|
# Half-day before USA Thanksgiving. Yahoo normally
|
|
# returns an interval starting when regular trading closes,
|
|
# even if prepost=False.
|
|
|
|
# Setup
|
|
tkr = "AMZN"
|
|
interval = "1h"
|
|
interval_td = _dt.timedelta(hours=1)
|
|
time_open = _dt.time(9, 30)
|
|
time_close = _dt.time(16)
|
|
special_day = _dt.date(2022, 11, 25)
|
|
time_early_close = _dt.time(13)
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
|
|
# Run
|
|
start_d = special_day - _dt.timedelta(days=7)
|
|
end_d = special_day + _dt.timedelta(days=7)
|
|
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
|
|
tg_last_dt = df.loc[str(special_day)].index[-1]
|
|
self.assertTrue(tg_last_dt.time() < time_early_close)
|
|
|
|
# Test no other afternoons (or mornings) were pruned
|
|
start_d = _dt.date(special_day.year, 1, 1)
|
|
end_d = _dt.date(special_day.year+1, 1, 1)
|
|
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
|
|
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
|
|
f_early_close = (last_dts+interval_td).dt.time < time_close
|
|
early_close_dates = last_dts.index[f_early_close].values
|
|
self.assertEqual(len(early_close_dates), 1)
|
|
self.assertEqual(early_close_dates[0], special_day)
|
|
|
|
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
|
|
f_late_open = first_dts.dt.time > time_open
|
|
late_open_dates = first_dts.index[f_late_open]
|
|
self.assertEqual(len(late_open_dates), 0)
|
|
|
|
def test_prune_post_intraday_omx(self):
|
|
# Half-day before Sweden Christmas. Yahoo normally
|
|
# returns an interval starting when regular trading closes,
|
|
# even if prepost=False.
|
|
# If prepost=False, test that yfinance is removing prepost intervals.
|
|
|
|
# Setup
|
|
tkr = "AEC.ST"
|
|
interval = "1h"
|
|
interval_td = _dt.timedelta(hours=1)
|
|
time_open = _dt.time(9)
|
|
time_close = _dt.time(17, 30)
|
|
special_day = _dt.date(2022, 12, 23)
|
|
time_early_close = _dt.time(13, 2)
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
|
|
# Half trading day Jan 5, Apr 14, May 25, Jun 23, Nov 4, Dec 23, Dec 30
|
|
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1, 5), (4, 14), (5, 25), (6, 23), (11, 4), (12, 23), (12, 30)]]
|
|
|
|
# Yahoo has incorrectly classified afternoon of 2022-04-13 as post-market.
|
|
# Nothing yfinance can do because Yahoo doesn't return data with prepost=False.
|
|
# But need to handle in this test.
|
|
expected_incorrect_half_days = [_dt.date(2022, 4, 13)]
|
|
half_days = sorted(half_days+expected_incorrect_half_days)
|
|
|
|
# Run
|
|
start_d = special_day - _dt.timedelta(days=7)
|
|
end_d = special_day + _dt.timedelta(days=7)
|
|
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
|
|
tg_last_dt = df.loc[str(special_day)].index[-1]
|
|
self.assertTrue(tg_last_dt.time() < time_early_close)
|
|
|
|
# Test no other afternoons (or mornings) were pruned
|
|
start_d = _dt.date(special_day.year, 1, 1)
|
|
end_d = _dt.date(special_day.year+1, 1, 1)
|
|
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
|
|
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
|
|
f_early_close = (last_dts+interval_td).dt.time < time_close
|
|
early_close_dates = last_dts.index[f_early_close].values
|
|
unexpected_early_close_dates = [d for d in early_close_dates if d not in half_days]
|
|
self.assertEqual(len(unexpected_early_close_dates), 0)
|
|
self.assertEqual(len(early_close_dates), len(half_days))
|
|
self.assertTrue(_np.equal(early_close_dates, half_days).all())
|
|
|
|
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
|
|
f_late_open = first_dts.dt.time > time_open
|
|
late_open_dates = first_dts.index[f_late_open]
|
|
self.assertEqual(len(late_open_dates), 0)
|
|
|
|
def test_prune_post_intraday_asx(self):
|
|
# Setup
|
|
tkr = "BHP.AX"
|
|
interval = "1h"
|
|
interval_td = _dt.timedelta(hours=1)
|
|
time_open = _dt.time(10)
|
|
time_close = _dt.time(16, 12)
|
|
# No early closes in 2022
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
|
|
# Test no afternoons (or mornings) were pruned
|
|
start_d = _dt.date(2022, 1, 1)
|
|
end_d = _dt.date(2022+1, 1, 1)
|
|
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
|
|
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
|
|
f_early_close = (last_dts+interval_td).dt.time < time_close
|
|
early_close_dates = last_dts.index[f_early_close].values
|
|
self.assertEqual(len(early_close_dates), 0)
|
|
|
|
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
|
|
f_late_open = first_dts.dt.time > time_open
|
|
late_open_dates = first_dts.index[f_late_open]
|
|
self.assertEqual(len(late_open_dates), 0)
|
|
|
|
def test_weekly_2rows_fix(self):
|
|
tkr = "AMZN"
|
|
start = _dt.date.today() - _dt.timedelta(days=14)
|
|
start -= _dt.timedelta(days=start.weekday())
|
|
|
|
dat = yf.Ticker(tkr)
|
|
df = dat.history(start=start, interval="1wk")
|
|
self.assertTrue((df.index.weekday == 0).all())
|
|
|
|
def test_aggregate_capital_gains(self):
|
|
# Setup
|
|
tkr = "FXAIX"
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
start = "2017-12-31"
|
|
end = "2019-12-31"
|
|
interval = "3mo"
|
|
|
|
df = dat.history(start=start, end=end, interval=interval)
|
|
|
|
|
|
class TestPriceRepair(unittest.TestCase):
|
|
session = None
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
cls.session = session_gbl
|
|
|
|
@classmethod
|
|
def tearDownClass(cls):
|
|
if cls.session is not None:
|
|
cls.session.close()
|
|
|
|
def test_reconstruct_2m(self):
|
|
# 2m repair requires 1m data.
|
|
# Yahoo restricts 1m fetches to 7 days max within last 30 days.
|
|
# Need to test that '_reconstruct_intervals_batch()' can handle this.
|
|
|
|
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
|
|
|
dt_now = _pd.Timestamp.utcnow()
|
|
td_7d = _dt.timedelta(days=7)
|
|
td_60d = _dt.timedelta(days=60)
|
|
|
|
# Round time for 'requests_cache' reuse
|
|
dt_now = dt_now.ceil("1h")
|
|
|
|
for tkr in tkrs:
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
end_dt = dt_now
|
|
start_dt = end_dt - td_60d
|
|
df = dat.history(start=start_dt, end=end_dt, interval="2m", repair=True)
|
|
|
|
def test_repair_100x_random_weekly(self):
|
|
# Setup:
|
|
tkr = "PNL.L"
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
|
|
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
|
|
"High": [476, 476.5, 477, 480],
|
|
"Low": [470.5, 470, 465.5, 468.26],
|
|
"Close": [475, 473.5, 472, 473.5],
|
|
"Adj Close": [470.1, 468.6, 467.1, 468.6],
|
|
"Volume": [2295613, 2245604, 3000287, 2635611]},
|
|
index=_pd.to_datetime([_dt.date(2022, 10, 24),
|
|
_dt.date(2022, 10, 17),
|
|
_dt.date(2022, 10, 10),
|
|
_dt.date(2022, 10, 3)]))
|
|
df = df.sort_index()
|
|
df.index.name = "Date"
|
|
df_bad = df.copy()
|
|
df_bad.loc["2022-10-24", "Close"] *= 100
|
|
df_bad.loc["2022-10-17", "Low"] *= 100
|
|
df_bad.loc["2022-10-03", "Open"] *= 100
|
|
df.index = df.index.tz_localize(tz_exchange)
|
|
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
|
|
|
# Run test
|
|
|
|
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
|
|
|
|
# First test - no errors left
|
|
for c in data_cols:
|
|
try:
|
|
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
|
|
except AssertionError:
|
|
print(df[c])
|
|
print(df_repaired[c])
|
|
raise
|
|
|
|
# Second test - all differences should be either ~1x or ~100x
|
|
ratio = df_bad[data_cols].values / df[data_cols].values
|
|
ratio = ratio.round(2)
|
|
# - round near-100 ratio to 100:
|
|
f = ratio > 90
|
|
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
|
|
# - now test
|
|
f_100 = ratio == 100
|
|
f_1 = ratio == 1
|
|
self.assertTrue((f_100 | f_1).all())
|
|
|
|
self.assertTrue("Repaired?" in df_repaired.columns)
|
|
self.assertFalse(df_repaired["Repaired?"].isna().any())
|
|
|
|
def test_repair_100x_random_weekly_preSplit(self):
|
|
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
|
|
|
|
tkr = "PNL.L"
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
|
|
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
|
|
"High": [421, 425, 419, 420.5],
|
|
"Low": [400, 380.5, 376.5, 396],
|
|
"Close": [410, 409.5, 402, 399],
|
|
"Adj Close": [393.91, 393.43, 386.22, 383.34],
|
|
"Volume": [3232600, 3773900, 10835000, 4257900]},
|
|
index=_pd.to_datetime([_dt.date(2020, 3, 30),
|
|
_dt.date(2020, 3, 23),
|
|
_dt.date(2020, 3, 16),
|
|
_dt.date(2020, 3, 9)]))
|
|
df = df.sort_index()
|
|
# Simulate data missing split-adjustment:
|
|
df[data_cols] *= 100.0
|
|
df["Volume"] *= 0.01
|
|
#
|
|
df.index.name = "Date"
|
|
# Create 100x errors:
|
|
df_bad = df.copy()
|
|
df_bad.loc["2020-03-30", "Close"] *= 100
|
|
df_bad.loc["2020-03-23", "Low"] *= 100
|
|
df_bad.loc["2020-03-09", "Open"] *= 100
|
|
df.index = df.index.tz_localize(tz_exchange)
|
|
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
|
|
|
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
|
|
|
|
# First test - no errors left
|
|
for c in data_cols:
|
|
try:
|
|
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
|
|
except AssertionError:
|
|
print("Mismatch in column", c)
|
|
print("- df_repaired:")
|
|
print(df_repaired[c])
|
|
print("- answer:")
|
|
print(df[c])
|
|
raise
|
|
|
|
# Second test - all differences should be either ~1x or ~100x
|
|
ratio = df_bad[data_cols].values / df[data_cols].values
|
|
ratio = ratio.round(2)
|
|
# - round near-100 ratio to 100:
|
|
f = ratio > 90
|
|
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
|
|
# - now test
|
|
f_100 = ratio == 100
|
|
f_1 = ratio == 1
|
|
self.assertTrue((f_100 | f_1).all())
|
|
|
|
self.assertTrue("Repaired?" in df_repaired.columns)
|
|
self.assertFalse(df_repaired["Repaired?"].isna().any())
|
|
|
|
def test_repair_100x_random_daily(self):
|
|
tkr = "PNL.L"
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
|
|
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
|
|
"High": [478, 477.5, 477, 475],
|
|
"Low": [474.02, 474, 473, 470.75],
|
|
"Close": [475.5, 475.5, 474.5, 475],
|
|
"Adj Close": [475.5, 475.5, 474.5, 475],
|
|
"Volume": [436414, 485947, 358067, 287620]},
|
|
index=_pd.to_datetime([_dt.date(2022, 11, 1),
|
|
_dt.date(2022, 10, 31),
|
|
_dt.date(2022, 10, 28),
|
|
_dt.date(2022, 10, 27)]))
|
|
df = df.sort_index()
|
|
df.index.name = "Date"
|
|
df_bad = df.copy()
|
|
df_bad.loc["2022-11-01", "Close"] *= 100
|
|
df_bad.loc["2022-10-31", "Low"] *= 100
|
|
df_bad.loc["2022-10-27", "Open"] *= 100
|
|
df.index = df.index.tz_localize(tz_exchange)
|
|
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
|
|
|
df_repaired = dat._fix_unit_random_mixups(df_bad, "1d", tz_exchange, prepost=False)
|
|
|
|
# First test - no errors left
|
|
for c in data_cols:
|
|
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
|
|
|
|
# Second test - all differences should be either ~1x or ~100x
|
|
ratio = df_bad[data_cols].values / df[data_cols].values
|
|
ratio = ratio.round(2)
|
|
# - round near-100 ratio to 100:
|
|
f = ratio > 90
|
|
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
|
|
# - now test
|
|
f_100 = ratio == 100
|
|
f_1 = ratio == 1
|
|
self.assertTrue((f_100 | f_1).all())
|
|
|
|
self.assertTrue("Repaired?" in df_repaired.columns)
|
|
self.assertFalse(df_repaired["Repaired?"].isna().any())
|
|
|
|
def test_repair_100x_block_daily(self):
|
|
# Some 100x errors are not sporadic.
|
|
# Sometimes Yahoo suddenly shifts from cents->$ from some recent date.
|
|
|
|
tkrs = ['AET.L', 'SSW.JO']
|
|
for tkr in tkrs:
|
|
for interval in ['1d', '1wk']:
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
|
|
_dp = os.path.dirname(__file__)
|
|
fp = os.path.join(_dp, "data", tkr.replace('.','-') + '-' + interval + "-100x-error.csv")
|
|
if not os.path.isfile(fp):
|
|
continue
|
|
df_bad = _pd.read_csv(fp, index_col="Date")
|
|
df_bad.index = _pd.to_datetime(df_bad.index, utc=True).tz_convert(tz_exchange)
|
|
df_bad = df_bad.sort_index()
|
|
|
|
df = df_bad.copy()
|
|
fp = os.path.join(_dp, "data", tkr.replace('.','-') + '-' + interval + "-100x-error-fixed.csv")
|
|
df = _pd.read_csv(fp, index_col="Date")
|
|
df.index = _pd.to_datetime(df.index, utc=True).tz_convert(tz_exchange)
|
|
df = df.sort_index()
|
|
|
|
df_repaired = dat._fix_unit_switch(df_bad, interval, tz_exchange)
|
|
df_repaired = df_repaired.sort_index()
|
|
|
|
# First test - no errors left
|
|
for c in data_cols:
|
|
try:
|
|
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
|
|
except:
|
|
print("- repaired:")
|
|
print(df_repaired[c])
|
|
print("- correct:")
|
|
print(df[c])
|
|
print(f"TEST FAIL on column '{c}' (tkr={tkr} interval={interval})")
|
|
raise
|
|
|
|
# Second test - all differences should be either ~1x or ~100x
|
|
ratio = df_bad[data_cols].values / df[data_cols].values
|
|
ratio = ratio.round(2)
|
|
# - round near-100 ratio to 100:
|
|
f = ratio > 90
|
|
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
|
|
# - now test
|
|
f_100 = (ratio == 100) | (ratio == 0.01)
|
|
f_1 = ratio == 1
|
|
self.assertTrue((f_100 | f_1).all())
|
|
|
|
self.assertTrue("Repaired?" in df_repaired.columns)
|
|
self.assertFalse(df_repaired["Repaired?"].isna().any())
|
|
|
|
def test_repair_zeroes_daily(self):
|
|
tkr = "BBIL.L"
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
|
|
"High": [0, 102.1, 102.11],
|
|
"Low": [0, 102.04, 102.04],
|
|
"Close": [103.03, 102.05, 102.08],
|
|
"Adj Close": [102.03, 102.05, 102.08],
|
|
"Volume": [560, 137, 117]},
|
|
index=_pd.to_datetime([_dt.datetime(2022, 11, 1),
|
|
_dt.datetime(2022, 10, 31),
|
|
_dt.datetime(2022, 10, 30)]))
|
|
df_bad = df_bad.sort_index()
|
|
df_bad.index.name = "Date"
|
|
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
|
|
|
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
|
|
|
|
correct_df = df_bad.copy()
|
|
correct_df.loc["2022-11-01", "Open"] = 102.080002
|
|
correct_df.loc["2022-11-01", "Low"] = 102.032501
|
|
correct_df.loc["2022-11-01", "High"] = 102.080002
|
|
for c in ["Open", "Low", "High", "Close"]:
|
|
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-8).all())
|
|
|
|
self.assertTrue("Repaired?" in repaired_df.columns)
|
|
self.assertFalse(repaired_df["Repaired?"].isna().any())
|
|
|
|
def test_repair_zeroes_daily_adjClose(self):
|
|
# Test that 'Adj Close' is reconstructed correctly,
|
|
# particularly when a dividend occurred within 1 day.
|
|
|
|
tkr = "INTC"
|
|
df = _pd.DataFrame(data={"Open": [28.95, 28.65, 29.55, 29.62, 29.25],
|
|
"High": [29.12, 29.27, 29.65, 31.17, 30.30],
|
|
"Low": [28.21, 28.43, 28.61, 29.53, 28.80],
|
|
"Close": [28.24, 29.05, 28.69, 30.32, 30.19],
|
|
"Adj Close": [28.12, 28.93, 28.57, 29.83, 29.70],
|
|
"Volume": [36e6, 51e6, 49e6, 58e6, 62e6],
|
|
"Dividends": [0, 0, 0.365, 0, 0]},
|
|
index=_pd.to_datetime([_dt.datetime(2023, 2, 8),
|
|
_dt.datetime(2023, 2, 7),
|
|
_dt.datetime(2023, 2, 6),
|
|
_dt.datetime(2023, 2, 3),
|
|
_dt.datetime(2023, 2, 2)]))
|
|
df = df.sort_index()
|
|
df.index.name = "Date"
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
df.index = df.index.tz_localize(tz_exchange)
|
|
|
|
rtol = 5e-3
|
|
for i in [0, 1, 2]:
|
|
df_slice = df.iloc[i:i+3]
|
|
for j in range(3):
|
|
df_slice_bad = df_slice.copy()
|
|
df_slice_bad.loc[df_slice_bad.index[j], "Adj Close"] = 0.0
|
|
|
|
df_slice_bad_repaired = dat._fix_zeroes(df_slice_bad, "1d", tz_exchange, prepost=False)
|
|
for c in ["Close", "Adj Close"]:
|
|
self.assertTrue(_np.isclose(df_slice_bad_repaired[c], df_slice[c], rtol=rtol).all())
|
|
self.assertTrue("Repaired?" in df_slice_bad_repaired.columns)
|
|
self.assertFalse(df_slice_bad_repaired["Repaired?"].isna().any())
|
|
|
|
def test_repair_zeroes_hourly(self):
|
|
tkr = "INTC"
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
correct_df = dat.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
|
|
|
|
df_bad = correct_df.copy()
|
|
bad_idx = correct_df.index[10]
|
|
df_bad.loc[bad_idx, "Open"] = _np.nan
|
|
df_bad.loc[bad_idx, "High"] = _np.nan
|
|
df_bad.loc[bad_idx, "Low"] = _np.nan
|
|
df_bad.loc[bad_idx, "Close"] = _np.nan
|
|
df_bad.loc[bad_idx, "Adj Close"] = _np.nan
|
|
df_bad.loc[bad_idx, "Volume"] = 0
|
|
|
|
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
|
|
|
|
for c in ["Open", "Low", "High", "Close"]:
|
|
try:
|
|
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
|
|
except AssertionError:
|
|
print("COLUMN", c)
|
|
print("- repaired_df")
|
|
print(repaired_df)
|
|
print("- correct_df[c]:")
|
|
print(correct_df[c])
|
|
print("- diff:")
|
|
print(repaired_df[c] - correct_df[c])
|
|
raise
|
|
|
|
self.assertTrue("Repaired?" in repaired_df.columns)
|
|
self.assertFalse(repaired_df["Repaired?"].isna().any())
|
|
|
|
def test_repair_bad_stock_split(self):
|
|
# Stocks that split in 2022 but no problems in Yahoo data,
|
|
# so repair should change nothing
|
|
good_tkrs = ['AMZN', 'DXCM', 'FTNT', 'GOOG', 'GME', 'PANW', 'SHOP', 'TSLA']
|
|
good_tkrs += ['AEI', 'CHRA', 'GHI', 'IRON', 'LXU', 'NUZE', 'RSLS', 'TISI']
|
|
good_tkrs += ['BOL.ST', 'TUI1.DE']
|
|
intervals = ['1d', '1wk', '1mo', '3mo']
|
|
for tkr in good_tkrs:
|
|
for interval in intervals:
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
_dp = os.path.dirname(__file__)
|
|
df_good = dat.history(start='2020-01-01', end=_dt.date.today(), interval=interval, auto_adjust=False)
|
|
|
|
repaired_df = dat._fix_bad_stock_split(df_good, interval, tz_exchange)
|
|
|
|
# Expect no change from repair
|
|
df_good = df_good.sort_index()
|
|
repaired_df = repaired_df.sort_index()
|
|
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
|
|
try:
|
|
self.assertTrue((repaired_df[c].to_numpy() == df_good[c].to_numpy()).all())
|
|
except:
|
|
print(f"tkr={tkr} interval={interval} COLUMN={c}")
|
|
df_dbg = df_good[[c]].join(repaired_df[[c]], lsuffix='.good', rsuffix='.repaired')
|
|
f_diff = repaired_df[c].to_numpy() != df_good[c].to_numpy()
|
|
print(df_dbg[f_diff | _np.roll(f_diff, 1) | _np.roll(f_diff, -1)])
|
|
raise
|
|
|
|
bad_tkrs = ['4063.T', 'ALPHA.PA', 'AV.L', 'CNE.L', 'MOB.ST', 'SPM.MI']
|
|
bad_tkrs.append('LA.V') # special case - stock split error is 3 years ago! why not fixed?
|
|
for tkr in bad_tkrs:
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
_dp = os.path.dirname(__file__)
|
|
interval = '1d'
|
|
fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split.csv")
|
|
if not os.path.isfile(fp):
|
|
interval = '1wk'
|
|
fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split.csv")
|
|
df_bad = _pd.read_csv(fp, index_col="Date")
|
|
df_bad.index = _pd.to_datetime(df_bad.index, utc=True)
|
|
|
|
repaired_df = dat._fix_bad_stock_split(df_bad, "1d", tz_exchange)
|
|
|
|
fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split-fixed.csv")
|
|
correct_df = _pd.read_csv(fp, index_col="Date")
|
|
correct_df.index = _pd.to_datetime(correct_df.index)
|
|
|
|
repaired_df = repaired_df.sort_index()
|
|
correct_df = correct_df.sort_index()
|
|
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
|
|
try:
|
|
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=5e-6).all())
|
|
except AssertionError:
|
|
print(f"tkr={tkr} COLUMN={c}")
|
|
# print("- repaired_df")
|
|
# print(repaired_df)
|
|
# print("- correct_df[c]:")
|
|
# print(correct_df[c])
|
|
# print("- diff:")
|
|
# print(repaired_df[c] - correct_df[c])
|
|
raise
|
|
|
|
# Had very high price volatility in Jan-2021 around split date that could
|
|
# be mistaken for missing stock split adjustment. And old logic did think
|
|
# column 'High' required fixing - wrong!
|
|
sketchy_tkrs = ['FIZZ']
|
|
intervals = ['1wk']
|
|
for tkr in sketchy_tkrs:
|
|
for interval in intervals:
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
_dp = os.path.dirname(__file__)
|
|
df_good = dat.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False)
|
|
|
|
repaired_df = dat._fix_bad_stock_split(df_good, interval, tz_exchange)
|
|
|
|
# Expect no change from repair
|
|
df_good = df_good.sort_index()
|
|
repaired_df = repaired_df.sort_index()
|
|
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
|
|
try:
|
|
self.assertTrue((repaired_df[c].to_numpy() == df_good[c].to_numpy()).all())
|
|
except AssertionError:
|
|
print(f"tkr={tkr} interval={interval} COLUMN={c}")
|
|
df_dbg = df_good[[c]].join(repaired_df[[c]], lsuffix='.good', rsuffix='.repaired')
|
|
f_diff = repaired_df[c].to_numpy() != df_good[c].to_numpy()
|
|
print(df_dbg[f_diff | _np.roll(f_diff, 1) | _np.roll(f_diff, -1)])
|
|
raise
|
|
|
|
def test_repair_missing_div_adjust(self):
|
|
tkr = '8TRA.DE'
|
|
|
|
dat = yf.Ticker(tkr, session=self.session)
|
|
tz_exchange = dat.fast_info["timezone"]
|
|
|
|
_dp = os.path.dirname(__file__)
|
|
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-1d-missing-div-adjust.csv"), index_col="Date")
|
|
df_bad.index = _pd.to_datetime(df_bad.index)
|
|
|
|
repaired_df = dat._fix_missing_div_adjust(df_bad, "1d", tz_exchange)
|
|
|
|
correct_df = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-1d-missing-div-adjust-fixed.csv"), index_col="Date")
|
|
correct_df.index = _pd.to_datetime(correct_df.index)
|
|
|
|
repaired_df = repaired_df.sort_index()
|
|
correct_df = correct_df.sort_index()
|
|
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
|
|
try:
|
|
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=5e-6).all())
|
|
except:
|
|
print(f"tkr={tkr} COLUMN={c}")
|
|
print("- repaired_df")
|
|
print(repaired_df)
|
|
print("- correct_df[c]:")
|
|
print(correct_df[c])
|
|
print("- diff:")
|
|
print(repaired_df[c] - correct_df[c])
|
|
raise
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|