Relax requests_cache purging - allow empty earnings calendar table

Refactor check_Yahoo_response() to work with latest decryption
Merge branch 'dev' into feature/session-prune-v2
2023-02-07 20:38:51 +00:00 · 2023-02-07 20:37:06 +00:00 · 2023-02-07 13:38:32 +00:00 · 2022-12-22 13:59:49 +00:00 · 2022-12-21 21:56:23 +00:00 · 2022-12-21 21:51:23 +00:00
6 changed files with 492 additions and 158 deletions
--- a/README.md
+++ b/README.md
@ -154,19 +154,6 @@ msft.option_chain(..., proxy="PROXY_SERVER")
 ...
 ```

-To use a custom `requests` session (for example to cache calls to the
-API or customize the `User-agent` header), pass a `session=` argument to
-the Ticker constructor.
-
-```python
-import requests_cache
-session = requests_cache.CachedSession('yfinance.cache')
-session.headers['User-agent'] = 'my-program/1.0'
-ticker = yf.Ticker('msft', session=session)
-# The scraped response will be stored in the cache
-ticker.actions
-```
-
 To initialize multiple `Ticker` objects, use

 ```python
@ -180,6 +167,29 @@ tickers.tickers['AAPL'].history(period="1mo")
 tickers.tickers['GOOG'].actions
 ```

+### Caching
+
+Heavy users will quickly encounter Yahoo's rate limits on free use. 
+A `requests` session can help by caching web requests. 
+To use, pass a `session=` argument to the Ticker constructor:
+
+```python
+import requests_cache
+session = requests_cache.CachedSession('yfinance.cache')
+# session.headers['User-agent'] = 'my-program/1.0'  # <- Optional
+ticker = yf.Ticker('msft aapl goog', session=session)
+# The scraped response will be stored in the cache
+ticker.actions
+```
+To assist, `yfinance` removes requests from cache that failed to parse.
+To disable this feature call `yfinance.disable_prune_session_cache()`.
+
+Add expiration to the session to prune old data:
+```python
+session = requests_cache.CachedSession('yfinance.cache', expire_after=datetime.timedelta(minutes=60))
+```
+More info here: https://requests-cache.readthedocs.io/en/stable/user_guide/expiration.html
+
 ### Fetching data for multiple tickers

 ```python
--- a/tests/prices.py
+++ b/tests/prices.py
@ -26,10 +26,10 @@ class TestPriceHistory(unittest.TestCase):
        tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
        intervals = ["1d", "1wk", "1mo"]
        for tkr in tkrs:
-            dat = yf.Ticker(tkr, session=self.session)
+            ticker = yf.Ticker(tkr, session=self.session)

            for interval in intervals:
-                df = dat.history(period="5y", interval=interval)
+                df = ticker.history(period="5y", interval=interval)

                f = df.index.time == _dt.time(0)
                self.assertTrue(f.all())
@ -37,13 +37,14 @@ class TestPriceHistory(unittest.TestCase):
    def test_duplicatingHourly(self):
        tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
        for tkr in tkrs:
-            dat = yf.Ticker(tkr, session=self.session)
-            tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
+            ticker = yf.Ticker(tkr, session=self.session)
+            tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)

            dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
            dt = dt_utc.astimezone(_tz.timezone(tz))
+
            start_d = dt.date() - _dt.timedelta(days=7)
-            df = dat.history(start=start_d, interval="1h")
+            df = ticker.history(start=start_d, interval="1h")

            dt0 = df.index[-2]
            dt1 = df.index[-1]
@ -57,8 +58,8 @@ class TestPriceHistory(unittest.TestCase):
        tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
        test_run = False
        for tkr in tkrs:
-            dat = yf.Ticker(tkr, session=self.session)
-            tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
+            ticker = yf.Ticker(tkr, session=self.session)
+            tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)

            dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
            dt = dt_utc.astimezone(_tz.timezone(tz))
@ -66,7 +67,7 @@ class TestPriceHistory(unittest.TestCase):
                continue
            test_run = True

-            df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
+            df = ticker.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")

            dt0 = df.index[-2]
            dt1 = df.index[-1]
@ -83,15 +84,15 @@ class TestPriceHistory(unittest.TestCase):
        tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
        test_run = False
        for tkr in tkrs:
-            dat = yf.Ticker(tkr, session=self.session)
-            tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
+            ticker = yf.Ticker(tkr, session=self.session)
+            tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)

            dt = _tz.timezone(tz).localize(_dt.datetime.now())
            if dt.date().weekday() not in [1, 2, 3, 4]:
                continue
            test_run = True

-            df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
+            df = ticker.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
            dt0 = df.index[-2]
            dt1 = df.index[-1]
            try:
@ -246,16 +247,16 @@ class TestPriceHistory(unittest.TestCase):
        # The correction is successful if no days are weekend, and weekly data begins Monday

        tkr = "AGRO3.SA"
-        dat = yf.Ticker(tkr, session=self.session)
+        ticker = yf.Ticker(tkr, session=self.session)
        start = "2021-01-11"
        end = "2022-11-05"

        interval = "1d"
-        df = dat.history(start=start, end=end, interval=interval)
+        df = ticker.history(start=start, end=end, interval=interval)
        self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all())

        interval = "1wk"
-        df = dat.history(start=start, end=end, interval=interval)
+        df = ticker.history(start=start, end=end, interval=interval)
        try:
            self.assertTrue((df.index.weekday == 0).all())
        except:
@ -377,8 +378,8 @@ class TestPriceHistory(unittest.TestCase):
        start = _dt.date.today() - _dt.timedelta(days=14)
        start -= _dt.timedelta(days=start.weekday())

-        dat = yf.Ticker(tkr)
-        df = dat.history(start=start, interval="1wk")
+        ticker = yf.Ticker(tkr)
+        df = ticker.history(start=start, interval="1wk")
        self.assertTrue((df.index.weekday == 0).all())

 class TestPriceRepair(unittest.TestCase):
@ -417,7 +418,7 @@ class TestPriceRepair(unittest.TestCase):
        # Setup:
        tkr = "PNL.L"
        dat = yf.Ticker(tkr, session=self.session)
-        tz_exchange = dat.fast_info["timezone"]
+        tz_exchange = ticker.fast_info["timezone"]

        data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
        df = _pd.DataFrame(data={"Open":      [470.5, 473.5, 474.5, 470],
@ -441,7 +442,7 @@ class TestPriceRepair(unittest.TestCase):

        # Run test

-        df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
+        df_repaired = ticker._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)

        # First test - no errors left
        for c in data_cols:
@ -468,8 +469,9 @@ class TestPriceRepair(unittest.TestCase):
        # PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.

        tkr = "PNL.L"
-        dat = yf.Ticker(tkr, session=self.session)
-        tz_exchange = dat.fast_info["timezone"]
+
+        ticker = yf.Ticker(tkr, session=self.session)
+        tz_exchange = ticker.fast_info["timezone"]

        data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
        df = _pd.DataFrame(data={"Open":      [400,   398,    392.5,   417],
@ -496,7 +498,7 @@ class TestPriceRepair(unittest.TestCase):
        df.index = df.index.tz_localize(tz_exchange)
        df_bad.index = df_bad.index.tz_localize(tz_exchange)

-        df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
+        df_repaired = ticker._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)

        # First test - no errors left
        for c in data_cols:
@ -523,8 +525,8 @@ class TestPriceRepair(unittest.TestCase):

    def test_repair_100x_daily(self):
        tkr = "PNL.L"
-        dat = yf.Ticker(tkr, session=self.session)
-        tz_exchange = dat.fast_info["timezone"]
+        ticker = yf.Ticker(tkr, session=self.session)
+        tz_exchange = ticker.fast_info["timezone"]

        data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
        df = _pd.DataFrame(data={"Open":      [478,    476,   476,   472],
@ -546,7 +548,7 @@ class TestPriceRepair(unittest.TestCase):
        df.index = df.index.tz_localize(tz_exchange)
        df_bad.index = df_bad.index.tz_localize(tz_exchange)

-        df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)
+        df_repaired = ticker._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)

        # First test - no errors left
        for c in data_cols:
@ -565,8 +567,9 @@ class TestPriceRepair(unittest.TestCase):

    def test_repair_zeroes_daily(self):
        tkr = "BBIL.L"
-        dat = yf.Ticker(tkr, session=self.session)
-        tz_exchange = dat.fast_info["timezone"]
+
+        ticker = yf.Ticker(tkr, session=self.session)
+        tz_exchange = ticker.fast_info["timezone"]

        df_bad = _pd.DataFrame(data={"Open":      [0,      102.04, 102.04],
                                     "High":      [0,      102.1,  102.11],
@ -581,7 +584,7 @@ class TestPriceRepair(unittest.TestCase):
        df_bad.index.name = "Date"
        df_bad.index = df_bad.index.tz_localize(tz_exchange)

-        repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
+        repaired_df = ticker._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)

        correct_df = df_bad.copy()
        correct_df.loc["2022-11-01", "Open"] = 102.080002
@ -592,10 +595,10 @@ class TestPriceRepair(unittest.TestCase):

    def test_repair_zeroes_hourly(self):
        tkr = "INTC"
-        dat = yf.Ticker(tkr, session=self.session)
-        tz_exchange = dat.fast_info["timezone"]
+        ticker = yf.Ticker(tkr, session=self.session)
+        tz_exchange = ticker.fast_info["timezone"]

-        correct_df = dat.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
+        correct_df = ticker.history(period="1wk", interval="1h", auto_adjust=False, repair=True)

        df_bad = correct_df.copy()
        bad_idx = correct_df.index[10]
@ -606,7 +609,7 @@ class TestPriceRepair(unittest.TestCase):
        df_bad.loc[bad_idx, "Adj Close"] = _np.nan
        df_bad.loc[bad_idx, "Volume"] = 0

-        repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
+        repaired_df = ticker._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)

        for c in ["Open", "Low", "High", "Close"]:
            try:
--- a/tests/ticker.py
+++ b/tests/ticker.py
@ -44,8 +44,8 @@ class TestTicker(unittest.TestCase):
            yf.utils.get_tz_cache().store(tkr, None)

            # Test:
-            dat = yf.Ticker(tkr, session=self.session)
-            tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
+            ticker = yf.Ticker(tkr, session=self.session)
+            tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)

            self.assertIsNotNone(tz)

@ -53,82 +53,245 @@ class TestTicker(unittest.TestCase):
        # Check yfinance doesn't die when ticker delisted

        tkr = "AM2Z.TA"
-        dat = yf.Ticker(tkr, session=self.session)
-        dat.history(period="1wk")
-        dat.history(start="2022-01-01")
-        dat.history(start="2022-01-01", end="2022-03-01")
+        ticker = yf.Ticker(tkr, session=self.session)
+        ticker.history(period="1wk")
+        ticker.history(start="2022-01-01")
+        ticker.history(start="2022-01-01", end="2022-03-01")
        yf.download([tkr], period="1wk")
-        dat.isin
-        dat.major_holders
-        dat.institutional_holders
-        dat.mutualfund_holders
-        dat.dividends
-        dat.splits
-        dat.actions
-        dat.shares
-        dat.get_shares_full()
-        dat.info
-        dat.calendar
-        dat.recommendations
-        dat.earnings
-        dat.quarterly_earnings
-        dat.income_stmt
-        dat.quarterly_income_stmt
-        dat.balance_sheet
-        dat.quarterly_balance_sheet
-        dat.cashflow
-        dat.quarterly_cashflow
-        dat.recommendations_summary
-        dat.analyst_price_target
-        dat.revenue_forecasts
-        dat.sustainability
-        dat.options
-        dat.news
-        dat.earnings_trend
-        dat.earnings_dates
-        dat.earnings_forecasts
+        ticker.isin
+        ticker.major_holders
+        ticker.institutional_holders
+        ticker.mutualfund_holders
+        ticker.dividends
+        ticker.splits
+        ticker.actions
+        ticker.shares
+        ticker.get_shares_full()
+        ticker.info
+        ticker.calendar
+        ticker.recommendations
+        ticker.earnings
+        ticker.quarterly_earnings
+        ticker.income_stmt
+        ticker.quarterly_income_stmt
+        ticker.balance_sheet
+        ticker.quarterly_balance_sheet
+        ticker.cashflow
+        ticker.quarterly_cashflow
+        ticker.recommendations_summary
+        ticker.analyst_price_target
+        ticker.revenue_forecasts
+        ticker.sustainability
+        ticker.options
+        ticker.news
+        ticker.earnings_trend
+        ticker.earnings_dates
+        ticker.earnings_forecasts

    def test_goodTicker(self):
        # that yfinance works when full api is called on same instance of ticker

        tkr = "IBM"
-        dat = yf.Ticker(tkr, session=self.session)
+        ticker = yf.Ticker(tkr, session=self.session)

-        dat.isin
-        dat.major_holders
-        dat.institutional_holders
-        dat.mutualfund_holders
-        dat.dividends
-        dat.splits
-        dat.actions
-        dat.shares
-        dat.get_shares_full()
-        dat.info
-        dat.calendar
-        dat.recommendations
-        dat.earnings
-        dat.quarterly_earnings
-        dat.income_stmt
-        dat.quarterly_income_stmt
-        dat.balance_sheet
-        dat.quarterly_balance_sheet
-        dat.cashflow
-        dat.quarterly_cashflow
-        dat.recommendations_summary
-        dat.analyst_price_target
-        dat.revenue_forecasts
-        dat.sustainability
-        dat.options
-        dat.news
-        dat.earnings_trend
-        dat.earnings_dates
-        dat.earnings_forecasts
+        ticker.isin
+        ticker.major_holders
+        ticker.institutional_holders
+        ticker.mutualfund_holders
+        ticker.dividends
+        ticker.splits
+        ticker.actions
+        ticker.shares
+        ticker.get_shares_full()
+        ticker.info
+        ticker.calendar
+        ticker.recommendations
+        ticker.earnings
+        ticker.quarterly_earnings
+        ticker.income_stmt
+        ticker.quarterly_income_stmt
+        ticker.balance_sheet
+        ticker.quarterly_balance_sheet
+        ticker.cashflow
+        ticker.quarterly_cashflow
+        ticker.recommendations_summary
+        ticker.analyst_price_target
+        ticker.revenue_forecasts
+        ticker.sustainability
+        ticker.options
+        ticker.news
+        ticker.earnings_trend
+        ticker.earnings_dates
+        ticker.earnings_forecasts

-        dat.history(period="1wk")
-        dat.history(start="2022-01-01")
-        dat.history(start="2022-01-01", end="2022-03-01")
+        ticker.history(period="1wk")
+        ticker.history(start="2022-01-01")
+        ticker.history(start="2022-01-01", end="2022-03-01")
        yf.download([tkr], period="1wk")

+    def test_session_pruning_goodTkr(self):
+        tkr = "IBM"
+        url = "https://finance.yahoo.com/quote/"+tkr
+        ticker = yf.Ticker(tkr, session=self.session)
+
+        # All requests should succeed, so all urls should be in cache
+
+        yf.enable_prune_session_cache()
+
+        expected_urls = []
+
+        ticker.history(period="1wk")
+        ticker.dividends
+        ticker.splits
+        ticker.actions
+        expected_urls.append(f"https://query2.finance.yahoo.com/v8/finance/chart/{tkr}?range=1wk&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains")
+
+        ticker.info
+        ticker.isin
+        ticker.calendar
+        ticker.recommendations
+        ticker.recommendations_summary
+        ticker.sustainability
+        expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}")
+
+        ticker.analyst_price_target
+        ticker.revenue_forecasts
+        ticker.earnings_trend
+        ticker.earnings_forecasts
+        expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/analysis")
+
+        ticker.major_holders
+        ticker.institutional_holders
+        ticker.mutualfund_holders
+        expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/holders")
+
+        ticker.shares
+        ticker.earnings
+        ticker.quarterly_earnings
+        expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/financials")
+        
+        ticker.income_stmt
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalRevenue...")
+        ticker.quarterly_income_stmt
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalRevenue...")
+
+        ticker.balance_sheet
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalAssets...")
+        ticker.quarterly_balance_sheet
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalAssets...")
+
+        ticker.cashflow
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualCashFlowsfromusedinOperatingActivitiesDirect...")
+        ticker.quarterly_cashflow
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyCashFlowsfromusedinOperatingActivitiesDirect...")
+
+        ticker.options
+        expected_urls.append(f"https://query2.finance.yahoo.com/v7/finance/options/{tkr}")
+
+        ticker.news
+        expected_urls.append(f"https://query2.finance.yahoo.com/v1/finance/search?q={tkr}")
+
+        ticker.earnings_dates
+        expected_urls.append(f"https://finance.yahoo.com/calendar/earnings?symbol={tkr}&offset=0&size=12")
+
+        for url in expected_urls:
+            if url.endswith("..."):
+                # This url ridiculously long so just search for a partial match
+                url2 = url.replace("...", "")
+                in_cache = False
+                # for surl in self.session.cache.urls:
+                for response in self.session.cache.filter():
+                    surl = response.url
+                    if surl.startswith(url2):
+                        in_cache = True
+                        break
+                self.assertTrue(in_cache, "This url missing from requests_cache: "+url)
+            else:
+                self.assertTrue(self.session.cache.contains(url=url), "This url missing from requests_cache: "+url)
+
+    def test_session_pruning_badTkr(self):
+        # Ideally would test a valid ticker after triggering Yahoo block, but
+        # that's not god for me. As a proxy, use invalid ticker
+        tkr = "XYZ-X"
+        url = "https://finance.yahoo.com/quote/"+tkr
+        ticker = yf.Ticker(tkr, session=self.session)
+
+        # All requests should fail, so none of these urls should be in cache
+
+        yf.enable_prune_session_cache()
+
+        expected_urls = []
+
+        ticker.history(period="1wk")
+        ticker.dividends
+        ticker.splits
+        ticker.actions
+        expected_urls.append(f"https://query2.finance.yahoo.com/v8/finance/chart/{tkr}?range=1wk&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains")
+
+        ticker.info
+        ticker.isin
+        ticker.calendar
+        ticker.recommendations
+        ticker.recommendations_summary
+        ticker.sustainability
+        expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}")
+
+        ticker.analyst_price_target
+        ticker.revenue_forecasts
+        ticker.earnings_trend
+        ticker.earnings_forecasts
+        expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/analysis")
+
+        ticker.major_holders
+        ticker.institutional_holders
+        ticker.mutualfund_holders
+        expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/holders")
+
+        ticker.shares
+        ticker.earnings
+        ticker.quarterly_earnings
+        expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/financials")
+        
+        ticker.income_stmt
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalRevenue...")
+        ticker.quarterly_income_stmt
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalRevenue...")
+
+        ticker.balance_sheet
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalAssets...")
+        ticker.quarterly_balance_sheet
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalAssets...")
+
+        ticker.cashflow
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualCashFlowsfromusedinOperatingActivitiesDirect...")
+        ticker.quarterly_cashflow
+        expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyCashFlowsfromusedinOperatingActivitiesDirect...")
+
+        ticker.options
+        expected_urls.append(f"https://query2.finance.yahoo.com/v7/finance/options/{tkr}")
+
+        # Skip news, don't care if in cache
+        # ticker.news
+        # expected_urls.append(f"https://query2.finance.yahoo.com/v1/finance/search?q={tkr}")
+
+        df = ticker.earnings_dates
+        expected_urls.append(f"https://finance.yahoo.com/calendar/earnings?symbol={tkr}&offset=0&size=12")
+
+        for url in expected_urls:
+            if url.endswith("..."):
+                # This url ridiculously long so just search for a partial match
+                url2 = url.replace("...", "")
+                in_cache = False
+                # for surl in self.session.cache.urls:
+                for response in self.session.cache.filter():
+                    surl = response.url
+                    if surl.startswith(url2):
+                        in_cache = True
+                        break
+                self.assertFalse(in_cache, "This url wrongly in requests_cache: "+url)
+            else:
+                self.assertFalse(self.session.cache.contains(url=url), "This url wrongly in requests_cache: "+url)
+

 class TestTickerHistory(unittest.TestCase):
    session = None
--- a/yfinance/init.py
+++ b/yfinance/init.py
@ -24,6 +24,7 @@ from .ticker import Ticker
 from .tickers import Tickers
 from .multi import download
 from .utils import set_tz_cache_location
+from .data import enable_prune_session_cache, disable_prune_session_cache

 __version__ = version.version
 __author__ = "Ran Aroussi"
@ -44,3 +45,4 @@ def pdr_override():


 __all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'set_tz_cache_location']
+__all__ += ['enable_prune_session_cache', 'disable_prune_session_cache']
--- a/yfinance/base.py
+++ b/yfinance/base.py
@ -595,7 +595,8 @@ class TickerBase:
                return utils.empty_df()

            if end is None:
-                end = int(_time.time())
+                midnight = pd.Timestamp.utcnow().tz_convert(tz).ceil("D")
+                end = int(midnight.timestamp())
            else:
                end = utils._parse_user_dt(end, tz)
            if start is None:
@ -651,7 +652,10 @@ class TickerBase:
                                   "Our engineers are working quickly to resolve "
                                   "the issue. Thank you for your patience.")

-            data = data.json()
+            if "yf_json" in dir(data):
+                data = data.yf_json
+            else:
+                data = data.json()
        except Exception:
            pass

@ -1816,22 +1820,25 @@ class TickerBase:
            url = "{}/calendar/earnings?symbol={}&offset={}&size={}".format(
                _ROOT_URL_, self.ticker, page_offset, page_size)

-            data = self._data.cache_get(url=url, proxy=proxy).text
-
+            response = self._data.cache_get(url=url, proxy=proxy)
+            data = response.text
            if "Will be right back" in data:
                raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
                                   "Our engineers are working quickly to resolve "
                                   "the issue. Thank you for your patience.")

-            try:
-                data = _pd.read_html(data)[0]
-            except ValueError:
-                if page_offset == 0:
-                    # Should not fail on first page
-                    if "Showing Earnings for:" in data:
-                        # Actually YF was successful, problem is company doesn't have earnings history
-                        dates = utils.empty_earnings_dates_df()
-                break
+            if "yf_html_pd" in dir(response):
+                data = response.yf_html_pd[0]
+            else:
+                try:
+                    data = _pd.read_html(data)[0]
+                except ValueError:
+                    if page_offset == 0:
+                        # Should not fail on first page
+                        if "Showing Earnings for:" in data:
+                            # Actually YF was successful, problem is company doesn't have earnings history
+                            dates = utils.empty_earnings_dates_df()
+                    break
            if dates is None:
                dates = data
            else:
--- a/yfinance/data.py
+++ b/yfinance/data.py
@ -14,6 +14,7 @@ else:

 import requests as requests
 import re
+import pandas as _pd
 from bs4 import BeautifulSoup

 from frozendict import frozendict
@ -25,6 +26,8 @@ except ImportError:

 cache_maxsize = 64

+prune_session_cache = True
+

 def lru_cache_freezeargs(func):
    """
@ -168,6 +171,13 @@ def decrypt_cryptojs_aes_stores(data, keys=None):
 _SCRAPE_URL_ = 'https://finance.yahoo.com/quote'


+def enable_prune_session_cache():
+    global prune_session_cache
+    prune_session_cache = True
+def disable_prune_session_cache():
+    global prune_session_cache
+    prune_session_cache = False
+
 class TickerData:
    """
    Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
@ -179,6 +189,18 @@ class TickerData:
        self.ticker = ticker
        self._session = session or requests

+    def _check_requests_cache_hook(self):
+        try:
+            c = self._session.cache
+        except AttributeError:
+            # Not a caching session
+            return
+        global prune_session_cache
+        if not prune_session_cache:
+            self._session.hooks["response"] = []
+        elif prune_session_cache and not self._check_Yahoo_response in self._session.hooks["response"]:
+            self._session.hooks["response"].append(self._check_Yahoo_response)
+
    def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
        proxy = self._get_proxy(proxy)
        response = self._session.get(
@ -262,30 +284,7 @@ class TickerData:

        return []

-    @lru_cache_freezeargs
-    @lru_cache(maxsize=cache_maxsize)
-    def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
-        '''
-        get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page.
-        '''
-        if sub_page:
-            ticker_url = "{}/{}/{}".format(_SCRAPE_URL_, self.ticker, sub_page)
-        else:
-            ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
-
-        response = self.get(url=ticker_url, proxy=proxy)
-        html = response.text
-
-        # The actual json-data for stores is in a javascript assignment in the webpage
-        try:
-            json_str = html.split('root.App.main =')[1].split(
-                '(this)')[0].split(';\n}')[0].strip()
-        except IndexError:
-            # Fetch failed, probably because Yahoo spam triggered
-            return {}
-
-        data = json.loads(json_str)
-
+    def _gather_keys_from_response(self, response):
        # Gather decryption keys:
        soup = BeautifulSoup(response.content, "html.parser")
        keys = self._get_decryption_keys_from_yahoo_js(soup)
@ -305,19 +304,169 @@ class TickerData:
            keys_url = "https://github.com/ranaroussi/yfinance/raw/main/yfinance/scrapers/yahoo-keys.txt"
            response_gh = self.cache_get(keys_url)
            keys += response_gh.text.splitlines()
+        return keys

-        # Decrypt!
-        stores = decrypt_cryptojs_aes_stores(data, keys)
-        if stores is None:
-            # Maybe Yahoo returned old format, not encrypted
-            if "context" in data and "dispatcher" in data["context"]:
-                stores = data['context']['dispatcher']['stores']
-        if stores is None:
-            raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
+    def _check_Yahoo_response(self, r, *args, **kwargs):
+        # Parse the data returned by Yahoo to determine if corrupt/incomplete.
+        # If bad, set 'status_code' to 204 "No content" , that stops it 
+        # entering a requests_cache.
+
+        # Because this involves parsing, the output is added to response object 
+        # with prefix "yf_" and reused elsewhere.
+
+        if not "yahoo.com/" in r.url:
+            # Only check Yahoo responses
+            return
+
+        attrs = dir(r)
+        r_from_cache = "from_cache" in attrs and r.from_cache
+        if "yf_data" in attrs or "yf_json" in attrs or "yf_html_pd" in attrs:
+            # Have already parsed this response, successfully
+            return
+
+        if "Will be right back" in r.text:
+            # Simple check, no parsing needed
+            r.status_code = 204
+            return r
+
+        parse_failed = False
+        r_modified = False
+
+        if "/ws/fundamentals-timeseries" in r.url:
+            # Timeseries
+            try:
+                data = r.json()
+                r.yf_json = data
+                r_modified = True
+                data["timeseries"]["result"]
+            except:
+                parse_failed = True
+        elif "/finance/chart/" in r.url:
+            # Prices
+            try:
+                data = r.json()
+                r.yf_json = data
+                r_modified = True
+                if data["chart"]["error"] is not None:
+                    parse_failed = True
+            except Exception:
+                parse_failed = True
+        elif "/finance/options/" in r.url:
+            # Options
+            if not "expirationDates" in r.text:
+                # Parse will fail
+                parse_failed = True
+        elif "/finance/search?" in r.url:
+            # News, can't be bothered to check
+            return
+        elif "/calendar/earnings?" in r.url:
+            try:
+                dfs = _pd.read_html(r.text)
+            except ValueError as e:
+                if "No tables found" in str(e):
+                    # Maybe this ticker doesn't have any earnings dates
+                    pass
+                else:
+                    parse_failed = True
+            except Exception as e:
+                parse_failed = True
+            else:
+                r.yf_html_pd = dfs
+                r_modified = True
+        elif "root.App.main" in r.text:
+            # JSON data stores
+            try:
+                json_str = r.text.split('root.App.main =')[1].split(
+                    '(this)')[0].split(';\n}')[0].strip()
+            except IndexError:
+                parse_failed = True
+
+            if not parse_failed:
+                data = json.loads(json_str)
+
+                keys = self._gather_keys_from_response(r)
+
+                # Decrypt!
+                stores = decrypt_cryptojs_aes_stores(data, keys)
+                if stores is None:
+                    # Maybe Yahoo returned old format, not encrypted
+                    if "context" in data and "dispatcher" in data["context"]:
+                        stores = data['context']['dispatcher']['stores']
+                if stores is None:
+                    # raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
+                    print(f"{self.ticker}: Failed to decrypt/extract data stores from web request")
+                    parse_failed = True
+
+                if "yf_data" not in attrs:
+                # if not parse_failed and "yf_data" not in attrs:
+                    r.yf_data = stores
+                    r_modified = True
+
+                if stores is not None and "QuoteSummaryStore" not in stores:
+                    parse_failed = True
+
+        else:
+            return
+
+        if parse_failed:
+            if not r_from_cache:
+                r.status_code = 204  # No content
+                r_modified = True
+
+        if r_modified:
+            return r
+
+    @lru_cache_freezeargs
+    @lru_cache(maxsize=cache_maxsize)
+    def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
+        '''
+        get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page.
+        '''
+        if sub_page:
+            ticker_url = "{}/{}/{}".format(_SCRAPE_URL_, self.ticker, sub_page)
+        else:
+            ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
+
+        # Ensure hook ready to intercept get responses
+        self._check_requests_cache_hook()
+
+        response = self.get(url=ticker_url, proxy=proxy)
+
+        if "yf_data" in dir(response):
+            # _check_requests_cache_hook() already successfully extracted & decrypted
+            stores = response.yf_data
+        else:
+            # Extract JSON and decrypt
+            html = response.text
+
+            # The actual json-data for stores is in a javascript assignment in the webpage
+            try:
+                json_str = html.split('root.App.main =')[1].split(
+                    '(this)')[0].split(';\n}')[0].strip()
+            except IndexError:
+                # Problem with data so clear from session cache
+                # self.session_cache_prune_url(ticker_url)
+                # Then exit
+                return {}
+
+            data = json.loads(json_str)
+
+            keys = self._gather_keys_from_response(response)
+
+            # Decrypt!
+            stores = decrypt_cryptojs_aes_stores(data, keys)
+            if stores is None:
+                # Maybe Yahoo returned old format, not encrypted
+                if "context" in data and "dispatcher" in data["context"]:
+                    stores = data['context']['dispatcher']['stores']
+            if stores is None:
+                raise Exception(f"{self.ticker}: Failed to extract data stores from web request")

        # return data
        new_data = json.dumps(stores).replace('{}', 'null')
        new_data = re.sub(
            r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)

-        return json.loads(new_data)
+        json_data = json.loads(new_data)
+
+        return json_data
Author	SHA1	Message	Date
ValueRaider	efd278a3e0	Relax requests_cache purging - allow empty earnings calendar table	2023-02-07 20:38:51 +00:00
ValueRaider	4d8ca3777a	Refactor check_Yahoo_response() to work with latest decryption	2023-02-07 20:37:06 +00:00
ValueRaider	14c6136699	Merge branch 'dev' into feature/session-prune-v2	2023-02-07 13:38:32 +00:00
ValueRaider	8b9faf15b3	Move requests_cache pruning into hook, enable-by-default	2022-12-22 13:59:49 +00:00
ValueRaider	71362f2252	Tests: rename 'dat' -> 'ticker'	2022-12-21 21:56:23 +00:00
ValueRaider	287cb0786e	Simplify midnight calculation	2022-12-21 21:51:23 +00:00
ValueRaider	0840b602b4	Default disable requests_cache pruning ; Add a prune check ; Improve doc	2022-12-21 21:38:08 +00:00
ValueRaider	6c0b4ddb7b	Rebase 'session-prune-v2' to 'dev'	2022-12-21 21:34:18 +00:00