Compare commits

...

5 Commits

Author SHA1 Message Date
ValueRaider 462783a2a1
Merge branch 'dev' into feature/fix-prices-div-adjust 2023-04-11 21:12:24 +01:00
ValueRaider 9f1661ee5b
Merge branch 'dev' into feature/fix-prices-div-adjust 2023-02-22 22:40:07 +00:00
Value Raider 79ea20e77c Print notice for changing 'div-adjust' default 2023-02-20 13:02:36 +00:00
Value Raider cef150129a Improve 'deprecated' warnings with 'print_once()' 2023-02-20 12:19:26 +00:00
Value Raider 3e8bbc3c62 Fix multiday div-adjust ; Tidy adjust args
Fix multiday div-adjustment, Yahoo's 'Adj Close' is bad ; Rename 'auto_adjust'->'div_adjust', drop 'back_adjust'
2023-02-19 21:18:51 +00:00
3 changed files with 122 additions and 52 deletions

View File

@ -92,7 +92,9 @@ class TickerBase:
def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False, actions=True,
auto_adjust=True, back_adjust=False, repair=False, keepna=False,
auto_adjust=None, back_adjust=None, # deprecated
div_adjust=False,
repair=False, keepna=False,
proxy=None, rounding=False, timeout=10,
debug=True, raise_errors=False) -> pd.DataFrame:
"""
@ -114,10 +116,8 @@ class TickerBase:
prepost : bool
Include Pre and Post market data in results?
Default is False
auto_adjust: bool
Adjust all OHLC automatically? Default is True
back_adjust: bool
Back-adjusted data to mimic true historical prices
div_adjust: bool
Dividend-adjust all OHLC data? Default is False
repair: bool or "silent"
Detect currency unit 100x mixups and attempt repair.
If True, fix & print summary. If "silent", just fix.
@ -142,6 +142,25 @@ class TickerBase:
exceptions instead of printing to console.
"""
utils.print_once("NOTICE: yfinance.Ticker::history(): Be aware that dividend-adjustment is now default disabled, default used to be enabled")
# Handle deprecated arguments first
if auto_adjust is not None:
utils.print_once("WARNING: yfinance.Ticker::history(): 'auto_adjust' is deprecated, switch to 'div_adjust' instead")
div_adjust = auto_adjust
auto_adjust = None
elif back_adjust is not None:
utils.print_once("WARNING: yfinance.Ticker::history(): 'back_adjust' is deprecated, switch to 'div_adjust' instead")
back_adjust = None
if start is not None or end is not None:
period = None
if div_adjust and interval in ["1wk", "1mo", "3mo"]:
hist_args = locals() # function arguments
df = self._get_div_adjusted_multiday_prices(hist_args)
return df
if raise_errors:
debug = True
@ -383,15 +402,11 @@ class TickerBase:
# Auto/back adjust
try:
if auto_adjust:
df = utils.auto_adjust(df)
elif back_adjust:
df = utils.back_adjust(df)
if div_adjust:
df = utils.adjust_with_Yahoo_adj_close(df)
except Exception as e:
if auto_adjust:
err_msg = "auto_adjust failed with %s" % e
else:
err_msg = "back_adjust failed with %s" % e
if div_adjust:
err_msg = "div_adjust failed with %s" % e
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if debug:
@ -583,7 +598,7 @@ class TickerBase:
fetch_start = max(min_dt.date(), fetch_start)
logger.debug(f"Fetching {sub_interval} prepost={prepost} {fetch_start}->{fetch_end}")
r = "silent" if silent else True
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=r, keepna=True)
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, div_adjust=False, actions=False, prepost=prepost, repair=r, keepna=True)
if df_fine is None or df_fine.empty:
if not silent:
logger.warning(f"Cannot reconstruct {interval} block starting {start_d}, too old, Yahoo is rejecting request for finer-grain data")
@ -988,6 +1003,71 @@ class TickerBase:
print("-------------")
return None
def _get_div_adjusted_multiday_prices(self, hist_args):
# Not possible to correctly div-adjust multi-day intervals
# using only 'Adj Close' returned by Yahoo.
# Need to fetch 1d -> div adjust -> aggregate into larger interval
interval = hist_args["interval"]
del hist_args["self"]
ohlcv = ["Open", "High", "Low", "Close", "Volume"]
df_unadj = None
if "period" in hist_args and hist_args["period"] is not None:
# Yahoo sets period start differently depending on interval, need to fetch to know.
hist_args["div_adjust"] = False # avoid this code path
df_unadj = self.history(**hist_args)
hist_args["start"] = df_unadj.index[0].date()
del hist_args["period"]
hist_args["div_adjust"] = True
hist_args["interval"] = "1d"
df_daily = self.history(**hist_args)
if interval == "1wk":
pd_period = "W"
elif interval == "1mo":
pd_period = "M"
elif interval == "3mo":
# Not quarterly. How Yahoo aggregates depends on if period set:
# - period set => aggregate backwards => last-3mo-interval-end is last month.
# - else => start set => aggregate forwards => first-3mo-interval-start is start month
# So like quarterly but offset.
if not "period" in hist_args or not hist_args["period"] is None:
offset_months = df_daily.index[-1].month - 1
else:
offset_months = df_daily.index[0].month - 1
offset_td = pd.tseries.offsets.DateOffset(months=offset_months)
pd_period = "Q"
df_daily.index = df_daily.index - offset_td
df_daily.loc[df_daily["Stock Splits"]==0,"Stock Splits"] = 1
df = df_daily.groupby(df_daily.index.tz_localize(None).to_period(pd_period)).agg(
Open=("Open", "first"),
High=("High", "max"),
Low=("Low", "min"),
Close=("Close", "last"),
Volume=("Volume", "sum"),
Dividends=("Dividends", "sum"),
StockSplits=("Stock Splits", "prod"))
df = df.rename(columns={"StockSplits":"Stock Splits"})
df.loc[df["Stock Splits"]==1,"Stock Splits"] = 0
df.index = df.index.start_time.tz_localize(df_daily.index.tz)
if interval == "3mo":
# Reverse the offset
df.index = df.index + offset_td
if df_unadj is not None:
# Copy over 'Volume', because fetching daily -> aggregating can differ slightly,
# because Yahoo returning slightly different volumes.
df = df.drop("Volume", axis=1).join(df_unadj[["Volume"]], validate="1:1")
df = df[["Open", "High", "Low", "Close", "Volume", "Dividends", "Stock Splits"]]
return df
def get_recommendations(self, proxy=None, as_dict=False):
self._quote.proxy = proxy
data = self._quote.recommendations

View File

@ -32,7 +32,10 @@ from . import shared
logger = logging.getLogger(__name__)
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
group_by='column',
auto_adjust=None, back_adjust=None,
div_adjust=False,
repair=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=10):
"""Download yahoo tickers
@ -58,8 +61,8 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
prepost : bool
Include Pre and Post market data in results?
Default is False
auto_adjust: bool
Adjust all OHLC automatically? Default is False
div_adjust: bool
Dividend-adjust all OHLC data? Default is False
repair: bool
Detect currency unit 100x mixups and attempt repair
Default is False
@ -84,6 +87,15 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
seconds. (Can also be a fraction of a second e.g. 0.01)
"""
# Handle deprecated arguments first
if auto_adjust is not None:
print("WARNING: 'auto_adjust' is deprecated, switch to 'div_adjust' instead")
div_adjust = auto_adjust
auto_adjust = None
if back_adjust is not None:
print("WARNING: 'back_adjust' is deprecated, it was nonsense")
back_adjust = None
if ignore_tz is None:
# Set default value depending on interval
if interval[1:] in ['m', 'h']:
@ -125,8 +137,8 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
for i, ticker in enumerate(tickers):
_download_one_threaded(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
actions=actions, div_adjust=div_adjust,
repair=repair, keepna=keepna,
progress=(progress and i > 0), proxy=proxy,
rounding=rounding, timeout=timeout)
while len(shared._DFS) < len(tickers):
@ -137,8 +149,8 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
for i, ticker in enumerate(tickers):
data = _download_one(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
actions=actions, div_adjust=div_adjust,
repair=repair, keepna=keepna,
proxy=proxy,
rounding=rounding, timeout=timeout)
shared._DFS[ticker.upper()] = data
@ -211,12 +223,12 @@ def _realign_dfs():
@_multitasking.task
def _download_one_threaded(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
div_adjust=False, repair=False,
actions=False, progress=True, period="max",
interval="1d", prepost=False, proxy=None,
keepna=False, rounding=False, timeout=10):
try:
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
data = _download_one(ticker, start, end, div_adjust, repair,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout)
except Exception as e:
@ -230,15 +242,15 @@ def _download_one_threaded(ticker, start=None, end=None,
def _download_one(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
div_adjust=False, repair=False,
actions=False, period="max", interval="1d",
prepost=False, proxy=None, rounding=False,
keepna=False, timeout=10):
return Ticker(ticker).history(
period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, proxy=proxy,
actions=actions,
div_adjust=div_adjust, repair=repair, proxy=proxy,
rounding=rounding, keepna=keepna, timeout=timeout,
debug=False, raise_errors=False # debug and raise_errors false to not log and raise errors in threads
)

View File

@ -343,29 +343,7 @@ def _interval_to_timedelta(interval):
return _pd.Timedelta(interval)
def auto_adjust(data):
col_order = data.columns
df = data.copy()
ratio = df["Close"] / df["Adj Close"]
df["Adj Open"] = df["Open"] / ratio
df["Adj High"] = df["High"] / ratio
df["Adj Low"] = df["Low"] / ratio
df.drop(
["Open", "High", "Low", "Close"],
axis=1, inplace=True)
df.rename(columns={
"Adj Open": "Open", "Adj High": "High",
"Adj Low": "Low", "Adj Close": "Close"
}, inplace=True)
return df[[c for c in col_order if c in df.columns]]
def back_adjust(data):
""" back-adjusted data to mimic true historical prices """
def adjust_with_Yahoo_adj_close(data):
col_order = data.columns
df = data.copy()
ratio = df["Adj Close"] / df["Close"]
@ -374,12 +352,12 @@ def back_adjust(data):
df["Adj Low"] = df["Low"] * ratio
df.drop(
["Open", "High", "Low", "Adj Close"],
["Open", "High", "Low", "Close"],
axis=1, inplace=True)
df.rename(columns={
"Adj Open": "Open", "Adj High": "High",
"Adj Low": "Low"
"Adj Low": "Low", "Adj Close": "Close"
}, inplace=True)
return df[[c for c in col_order if c in df.columns]]
@ -511,7 +489,7 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
elif interval == "1mo":
last_rows_same_interval = dt1.month == dt2.month
elif interval == "3mo":
last_rows_same_interval = dt1.year == dt2.year and dt1.quarter == dt2.quarter
last_rows_same_interval = (dt1 - _dateutil.relativedelta.relativedelta(months=3)) < dt2
else:
last_rows_same_interval = (dt1-dt2) < _pd.Timedelta(interval)