Compare commits

...

8 Commits

Author SHA1 Message Date
ValueRaider efd278a3e0 Relax requests_cache purging - allow empty earnings calendar table 2023-02-07 20:38:51 +00:00
ValueRaider 4d8ca3777a Refactor check_Yahoo_response() to work with latest decryption 2023-02-07 20:37:06 +00:00
ValueRaider 14c6136699
Merge branch 'dev' into feature/session-prune-v2 2023-02-07 13:38:32 +00:00
ValueRaider 8b9faf15b3 Move requests_cache pruning into hook, enable-by-default 2022-12-22 13:59:49 +00:00
ValueRaider 71362f2252 Tests: rename 'dat' -> 'ticker' 2022-12-21 21:56:23 +00:00
ValueRaider 287cb0786e Simplify midnight calculation 2022-12-21 21:51:23 +00:00
ValueRaider 0840b602b4 Default disable requests_cache pruning ; Add a prune check ; Improve doc 2022-12-21 21:38:08 +00:00
ValueRaider 6c0b4ddb7b Rebase 'session-prune-v2' to 'dev' 2022-12-21 21:34:18 +00:00
6 changed files with 492 additions and 158 deletions

View File

@ -154,19 +154,6 @@ msft.option_chain(..., proxy="PROXY_SERVER")
...
```
To use a custom `requests` session (for example to cache calls to the
API or customize the `User-agent` header), pass a `session=` argument to
the Ticker constructor.
```python
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'my-program/1.0'
ticker = yf.Ticker('msft', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
To initialize multiple `Ticker` objects, use
```python
@ -180,6 +167,29 @@ tickers.tickers['AAPL'].history(period="1mo")
tickers.tickers['GOOG'].actions
```
### Caching
Heavy users will quickly encounter Yahoo's rate limits on free use.
A `requests` session can help by caching web requests.
To use, pass a `session=` argument to the Ticker constructor:
```python
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
# session.headers['User-agent'] = 'my-program/1.0' # <- Optional
ticker = yf.Ticker('msft aapl goog', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
To assist, `yfinance` removes requests from cache that failed to parse.
To disable this feature call `yfinance.disable_prune_session_cache()`.
Add expiration to the session to prune old data:
```python
session = requests_cache.CachedSession('yfinance.cache', expire_after=datetime.timedelta(minutes=60))
```
More info here: https://requests-cache.readthedocs.io/en/stable/user_guide/expiration.html
### Fetching data for multiple tickers
```python

View File

@ -26,10 +26,10 @@ class TestPriceHistory(unittest.TestCase):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
intervals = ["1d", "1wk", "1mo"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
ticker = yf.Ticker(tkr, session=self.session)
for interval in intervals:
df = dat.history(period="5y", interval=interval)
df = ticker.history(period="5y", interval=interval)
f = df.index.time == _dt.time(0)
self.assertTrue(f.all())
@ -37,13 +37,14 @@ class TestPriceHistory(unittest.TestCase):
def test_duplicatingHourly(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
ticker = yf.Ticker(tkr, session=self.session)
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
start_d = dt.date() - _dt.timedelta(days=7)
df = dat.history(start=start_d, interval="1h")
df = ticker.history(start=start_d, interval="1h")
dt0 = df.index[-2]
dt1 = df.index[-1]
@ -57,8 +58,8 @@ class TestPriceHistory(unittest.TestCase):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
ticker = yf.Ticker(tkr, session=self.session)
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
@ -66,7 +67,7 @@ class TestPriceHistory(unittest.TestCase):
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
df = ticker.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
dt0 = df.index[-2]
dt1 = df.index[-1]
@ -83,15 +84,15 @@ class TestPriceHistory(unittest.TestCase):
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
ticker = yf.Ticker(tkr, session=self.session)
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt = _tz.timezone(tz).localize(_dt.datetime.now())
if dt.date().weekday() not in [1, 2, 3, 4]:
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
df = ticker.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
@ -246,16 +247,16 @@ class TestPriceHistory(unittest.TestCase):
# The correction is successful if no days are weekend, and weekly data begins Monday
tkr = "AGRO3.SA"
dat = yf.Ticker(tkr, session=self.session)
ticker = yf.Ticker(tkr, session=self.session)
start = "2021-01-11"
end = "2022-11-05"
interval = "1d"
df = dat.history(start=start, end=end, interval=interval)
df = ticker.history(start=start, end=end, interval=interval)
self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all())
interval = "1wk"
df = dat.history(start=start, end=end, interval=interval)
df = ticker.history(start=start, end=end, interval=interval)
try:
self.assertTrue((df.index.weekday == 0).all())
except:
@ -377,8 +378,8 @@ class TestPriceHistory(unittest.TestCase):
start = _dt.date.today() - _dt.timedelta(days=14)
start -= _dt.timedelta(days=start.weekday())
dat = yf.Ticker(tkr)
df = dat.history(start=start, interval="1wk")
ticker = yf.Ticker(tkr)
df = ticker.history(start=start, interval="1wk")
self.assertTrue((df.index.weekday == 0).all())
class TestPriceRepair(unittest.TestCase):
@ -417,7 +418,7 @@ class TestPriceRepair(unittest.TestCase):
# Setup:
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
tz_exchange = ticker.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
@ -441,7 +442,7 @@ class TestPriceRepair(unittest.TestCase):
# Run test
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
df_repaired = ticker._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@ -468,8 +469,9 @@ class TestPriceRepair(unittest.TestCase):
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
ticker = yf.Ticker(tkr, session=self.session)
tz_exchange = ticker.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
@ -496,7 +498,7 @@ class TestPriceRepair(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
df_repaired = ticker._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@ -523,8 +525,8 @@ class TestPriceRepair(unittest.TestCase):
def test_repair_100x_daily(self):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
ticker = yf.Ticker(tkr, session=self.session)
tz_exchange = ticker.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
@ -546,7 +548,7 @@ class TestPriceRepair(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)
df_repaired = ticker._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@ -565,8 +567,9 @@ class TestPriceRepair(unittest.TestCase):
def test_repair_zeroes_daily(self):
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
ticker = yf.Ticker(tkr, session=self.session)
tz_exchange = ticker.fast_info["timezone"]
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
"High": [0, 102.1, 102.11],
@ -581,7 +584,7 @@ class TestPriceRepair(unittest.TestCase):
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
repaired_df = ticker._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
correct_df = df_bad.copy()
correct_df.loc["2022-11-01", "Open"] = 102.080002
@ -592,10 +595,10 @@ class TestPriceRepair(unittest.TestCase):
def test_repair_zeroes_hourly(self):
tkr = "INTC"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
ticker = yf.Ticker(tkr, session=self.session)
tz_exchange = ticker.fast_info["timezone"]
correct_df = dat.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
correct_df = ticker.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
df_bad = correct_df.copy()
bad_idx = correct_df.index[10]
@ -606,7 +609,7 @@ class TestPriceRepair(unittest.TestCase):
df_bad.loc[bad_idx, "Adj Close"] = _np.nan
df_bad.loc[bad_idx, "Volume"] = 0
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
repaired_df = ticker._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
for c in ["Open", "Low", "High", "Close"]:
try:

View File

@ -44,8 +44,8 @@ class TestTicker(unittest.TestCase):
yf.utils.get_tz_cache().store(tkr, None)
# Test:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
ticker = yf.Ticker(tkr, session=self.session)
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
self.assertIsNotNone(tz)
@ -53,82 +53,245 @@ class TestTicker(unittest.TestCase):
# Check yfinance doesn't die when ticker delisted
tkr = "AM2Z.TA"
dat = yf.Ticker(tkr, session=self.session)
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
ticker = yf.Ticker(tkr, session=self.session)
ticker.history(period="1wk")
ticker.history(start="2022-01-01")
ticker.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
ticker.isin
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
ticker.dividends
ticker.splits
ticker.actions
ticker.shares
ticker.get_shares_full()
ticker.info
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.income_stmt
ticker.quarterly_income_stmt
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.recommendations_summary
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.sustainability
ticker.options
ticker.news
ticker.earnings_trend
ticker.earnings_dates
ticker.earnings_forecasts
def test_goodTicker(self):
# that yfinance works when full api is called on same instance of ticker
tkr = "IBM"
dat = yf.Ticker(tkr, session=self.session)
ticker = yf.Ticker(tkr, session=self.session)
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
ticker.isin
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
ticker.dividends
ticker.splits
ticker.actions
ticker.shares
ticker.get_shares_full()
ticker.info
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.income_stmt
ticker.quarterly_income_stmt
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.recommendations_summary
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.sustainability
ticker.options
ticker.news
ticker.earnings_trend
ticker.earnings_dates
ticker.earnings_forecasts
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
ticker.history(period="1wk")
ticker.history(start="2022-01-01")
ticker.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
def test_session_pruning_goodTkr(self):
tkr = "IBM"
url = "https://finance.yahoo.com/quote/"+tkr
ticker = yf.Ticker(tkr, session=self.session)
# All requests should succeed, so all urls should be in cache
yf.enable_prune_session_cache()
expected_urls = []
ticker.history(period="1wk")
ticker.dividends
ticker.splits
ticker.actions
expected_urls.append(f"https://query2.finance.yahoo.com/v8/finance/chart/{tkr}?range=1wk&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains")
ticker.info
ticker.isin
ticker.calendar
ticker.recommendations
ticker.recommendations_summary
ticker.sustainability
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}")
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.earnings_trend
ticker.earnings_forecasts
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/analysis")
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/holders")
ticker.shares
ticker.earnings
ticker.quarterly_earnings
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/financials")
ticker.income_stmt
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalRevenue...")
ticker.quarterly_income_stmt
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalRevenue...")
ticker.balance_sheet
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalAssets...")
ticker.quarterly_balance_sheet
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalAssets...")
ticker.cashflow
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualCashFlowsfromusedinOperatingActivitiesDirect...")
ticker.quarterly_cashflow
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyCashFlowsfromusedinOperatingActivitiesDirect...")
ticker.options
expected_urls.append(f"https://query2.finance.yahoo.com/v7/finance/options/{tkr}")
ticker.news
expected_urls.append(f"https://query2.finance.yahoo.com/v1/finance/search?q={tkr}")
ticker.earnings_dates
expected_urls.append(f"https://finance.yahoo.com/calendar/earnings?symbol={tkr}&offset=0&size=12")
for url in expected_urls:
if url.endswith("..."):
# This url ridiculously long so just search for a partial match
url2 = url.replace("...", "")
in_cache = False
# for surl in self.session.cache.urls:
for response in self.session.cache.filter():
surl = response.url
if surl.startswith(url2):
in_cache = True
break
self.assertTrue(in_cache, "This url missing from requests_cache: "+url)
else:
self.assertTrue(self.session.cache.contains(url=url), "This url missing from requests_cache: "+url)
def test_session_pruning_badTkr(self):
# Ideally would test a valid ticker after triggering Yahoo block, but
# that's not god for me. As a proxy, use invalid ticker
tkr = "XYZ-X"
url = "https://finance.yahoo.com/quote/"+tkr
ticker = yf.Ticker(tkr, session=self.session)
# All requests should fail, so none of these urls should be in cache
yf.enable_prune_session_cache()
expected_urls = []
ticker.history(period="1wk")
ticker.dividends
ticker.splits
ticker.actions
expected_urls.append(f"https://query2.finance.yahoo.com/v8/finance/chart/{tkr}?range=1wk&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains")
ticker.info
ticker.isin
ticker.calendar
ticker.recommendations
ticker.recommendations_summary
ticker.sustainability
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}")
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.earnings_trend
ticker.earnings_forecasts
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/analysis")
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/holders")
ticker.shares
ticker.earnings
ticker.quarterly_earnings
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/financials")
ticker.income_stmt
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalRevenue...")
ticker.quarterly_income_stmt
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalRevenue...")
ticker.balance_sheet
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalAssets...")
ticker.quarterly_balance_sheet
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalAssets...")
ticker.cashflow
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualCashFlowsfromusedinOperatingActivitiesDirect...")
ticker.quarterly_cashflow
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyCashFlowsfromusedinOperatingActivitiesDirect...")
ticker.options
expected_urls.append(f"https://query2.finance.yahoo.com/v7/finance/options/{tkr}")
# Skip news, don't care if in cache
# ticker.news
# expected_urls.append(f"https://query2.finance.yahoo.com/v1/finance/search?q={tkr}")
df = ticker.earnings_dates
expected_urls.append(f"https://finance.yahoo.com/calendar/earnings?symbol={tkr}&offset=0&size=12")
for url in expected_urls:
if url.endswith("..."):
# This url ridiculously long so just search for a partial match
url2 = url.replace("...", "")
in_cache = False
# for surl in self.session.cache.urls:
for response in self.session.cache.filter():
surl = response.url
if surl.startswith(url2):
in_cache = True
break
self.assertFalse(in_cache, "This url wrongly in requests_cache: "+url)
else:
self.assertFalse(self.session.cache.contains(url=url), "This url wrongly in requests_cache: "+url)
class TestTickerHistory(unittest.TestCase):
session = None

View File

@ -24,6 +24,7 @@ from .ticker import Ticker
from .tickers import Tickers
from .multi import download
from .utils import set_tz_cache_location
from .data import enable_prune_session_cache, disable_prune_session_cache
__version__ = version.version
__author__ = "Ran Aroussi"
@ -44,3 +45,4 @@ def pdr_override():
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'set_tz_cache_location']
__all__ += ['enable_prune_session_cache', 'disable_prune_session_cache']

View File

@ -595,7 +595,8 @@ class TickerBase:
return utils.empty_df()
if end is None:
end = int(_time.time())
midnight = pd.Timestamp.utcnow().tz_convert(tz).ceil("D")
end = int(midnight.timestamp())
else:
end = utils._parse_user_dt(end, tz)
if start is None:
@ -651,7 +652,10 @@ class TickerBase:
"Our engineers are working quickly to resolve "
"the issue. Thank you for your patience.")
data = data.json()
if "yf_json" in dir(data):
data = data.yf_json
else:
data = data.json()
except Exception:
pass
@ -1816,22 +1820,25 @@ class TickerBase:
url = "{}/calendar/earnings?symbol={}&offset={}&size={}".format(
_ROOT_URL_, self.ticker, page_offset, page_size)
data = self._data.cache_get(url=url, proxy=proxy).text
response = self._data.cache_get(url=url, proxy=proxy)
data = response.text
if "Will be right back" in data:
raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
"Our engineers are working quickly to resolve "
"the issue. Thank you for your patience.")
try:
data = _pd.read_html(data)[0]
except ValueError:
if page_offset == 0:
# Should not fail on first page
if "Showing Earnings for:" in data:
# Actually YF was successful, problem is company doesn't have earnings history
dates = utils.empty_earnings_dates_df()
break
if "yf_html_pd" in dir(response):
data = response.yf_html_pd[0]
else:
try:
data = _pd.read_html(data)[0]
except ValueError:
if page_offset == 0:
# Should not fail on first page
if "Showing Earnings for:" in data:
# Actually YF was successful, problem is company doesn't have earnings history
dates = utils.empty_earnings_dates_df()
break
if dates is None:
dates = data
else:

View File

@ -14,6 +14,7 @@ else:
import requests as requests
import re
import pandas as _pd
from bs4 import BeautifulSoup
from frozendict import frozendict
@ -25,6 +26,8 @@ except ImportError:
cache_maxsize = 64
prune_session_cache = True
def lru_cache_freezeargs(func):
"""
@ -168,6 +171,13 @@ def decrypt_cryptojs_aes_stores(data, keys=None):
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
def enable_prune_session_cache():
global prune_session_cache
prune_session_cache = True
def disable_prune_session_cache():
global prune_session_cache
prune_session_cache = False
class TickerData:
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
@ -179,6 +189,18 @@ class TickerData:
self.ticker = ticker
self._session = session or requests
def _check_requests_cache_hook(self):
try:
c = self._session.cache
except AttributeError:
# Not a caching session
return
global prune_session_cache
if not prune_session_cache:
self._session.hooks["response"] = []
elif prune_session_cache and not self._check_Yahoo_response in self._session.hooks["response"]:
self._session.hooks["response"].append(self._check_Yahoo_response)
def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
proxy = self._get_proxy(proxy)
response = self._session.get(
@ -262,30 +284,7 @@ class TickerData:
return []
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
'''
get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page.
'''
if sub_page:
ticker_url = "{}/{}/{}".format(_SCRAPE_URL_, self.ticker, sub_page)
else:
ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
response = self.get(url=ticker_url, proxy=proxy)
html = response.text
# The actual json-data for stores is in a javascript assignment in the webpage
try:
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
except IndexError:
# Fetch failed, probably because Yahoo spam triggered
return {}
data = json.loads(json_str)
def _gather_keys_from_response(self, response):
# Gather decryption keys:
soup = BeautifulSoup(response.content, "html.parser")
keys = self._get_decryption_keys_from_yahoo_js(soup)
@ -305,19 +304,169 @@ class TickerData:
keys_url = "https://github.com/ranaroussi/yfinance/raw/main/yfinance/scrapers/yahoo-keys.txt"
response_gh = self.cache_get(keys_url)
keys += response_gh.text.splitlines()
return keys
# Decrypt!
stores = decrypt_cryptojs_aes_stores(data, keys)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:
stores = data['context']['dispatcher']['stores']
if stores is None:
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
def _check_Yahoo_response(self, r, *args, **kwargs):
# Parse the data returned by Yahoo to determine if corrupt/incomplete.
# If bad, set 'status_code' to 204 "No content" , that stops it
# entering a requests_cache.
# Because this involves parsing, the output is added to response object
# with prefix "yf_" and reused elsewhere.
if not "yahoo.com/" in r.url:
# Only check Yahoo responses
return
attrs = dir(r)
r_from_cache = "from_cache" in attrs and r.from_cache
if "yf_data" in attrs or "yf_json" in attrs or "yf_html_pd" in attrs:
# Have already parsed this response, successfully
return
if "Will be right back" in r.text:
# Simple check, no parsing needed
r.status_code = 204
return r
parse_failed = False
r_modified = False
if "/ws/fundamentals-timeseries" in r.url:
# Timeseries
try:
data = r.json()
r.yf_json = data
r_modified = True
data["timeseries"]["result"]
except:
parse_failed = True
elif "/finance/chart/" in r.url:
# Prices
try:
data = r.json()
r.yf_json = data
r_modified = True
if data["chart"]["error"] is not None:
parse_failed = True
except Exception:
parse_failed = True
elif "/finance/options/" in r.url:
# Options
if not "expirationDates" in r.text:
# Parse will fail
parse_failed = True
elif "/finance/search?" in r.url:
# News, can't be bothered to check
return
elif "/calendar/earnings?" in r.url:
try:
dfs = _pd.read_html(r.text)
except ValueError as e:
if "No tables found" in str(e):
# Maybe this ticker doesn't have any earnings dates
pass
else:
parse_failed = True
except Exception as e:
parse_failed = True
else:
r.yf_html_pd = dfs
r_modified = True
elif "root.App.main" in r.text:
# JSON data stores
try:
json_str = r.text.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
except IndexError:
parse_failed = True
if not parse_failed:
data = json.loads(json_str)
keys = self._gather_keys_from_response(r)
# Decrypt!
stores = decrypt_cryptojs_aes_stores(data, keys)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:
stores = data['context']['dispatcher']['stores']
if stores is None:
# raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
print(f"{self.ticker}: Failed to decrypt/extract data stores from web request")
parse_failed = True
if "yf_data" not in attrs:
# if not parse_failed and "yf_data" not in attrs:
r.yf_data = stores
r_modified = True
if stores is not None and "QuoteSummaryStore" not in stores:
parse_failed = True
else:
return
if parse_failed:
if not r_from_cache:
r.status_code = 204 # No content
r_modified = True
if r_modified:
return r
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
'''
get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page.
'''
if sub_page:
ticker_url = "{}/{}/{}".format(_SCRAPE_URL_, self.ticker, sub_page)
else:
ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
# Ensure hook ready to intercept get responses
self._check_requests_cache_hook()
response = self.get(url=ticker_url, proxy=proxy)
if "yf_data" in dir(response):
# _check_requests_cache_hook() already successfully extracted & decrypted
stores = response.yf_data
else:
# Extract JSON and decrypt
html = response.text
# The actual json-data for stores is in a javascript assignment in the webpage
try:
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
except IndexError:
# Problem with data so clear from session cache
# self.session_cache_prune_url(ticker_url)
# Then exit
return {}
data = json.loads(json_str)
keys = self._gather_keys_from_response(response)
# Decrypt!
stores = decrypt_cryptojs_aes_stores(data, keys)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:
stores = data['context']['dispatcher']['stores']
if stores is None:
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
# return data
new_data = json.dumps(stores).replace('{}', 'null')
new_data = re.sub(
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
return json.loads(new_data)
json_data = json.loads(new_data)
return json_data