Fix valuations table construction

Add Ticker.valuations
Implement scraping for 'key-statistics' via 'Ticker.stats'
2023-01-21 14:36:27 +00:00 · 2023-01-21 14:24:46 +00:00 · 2023-01-21 14:06:57 +00:00
5 changed files with 135 additions and 65 deletions
--- a/README.md
+++ b/README.md
@ -63,6 +63,10 @@ msft = yf.Ticker("MSFT")
 # get stock info
 msft.info

+# get stock price statistics
+msft.stats
+msft.valuations
+
 # get historical market data
 hist = msft.history(period="max")

--- a/yfinance/base.py
+++ b/yfinance/base.py
@ -40,6 +40,7 @@ from .scrapers.analysis import Analysis
 from .scrapers.fundamentals import Fundamentals
 from .scrapers.holders import Holders
 from .scrapers.quote import Quote
+from .scrapers.stats import KeyStats
 import json as _json

 _BASE_URL_ = 'https://query2.finance.yahoo.com'
@ -75,15 +76,9 @@ class TickerBase:
        self._analysis = Analysis(self._data)
        self._holders = Holders(self._data)
        self._quote = Quote(self._data)
+        self._stats = KeyStats(self._data)
        self._fundamentals = Fundamentals(self._data)

-    def stats(self, proxy=None):
-        ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
-
-        # get info and sustainability
-        data = self._data.get_json_data_stores(proxy=proxy)["QuoteSummaryStore"]
-        return data
-
    def history(self, period="1mo", interval="1d",
                start=None, end=None, prepost=False, actions=True,
                auto_adjust=True, back_adjust=False, repair=False, keepna=False,
@ -895,6 +890,16 @@ class TickerBase:
        data = self._quote.info
        return data

+    def get_stats(self, proxy=None) -> dict:
+        self._stats.proxy = proxy
+        data = self._stats.stats
+        return data
+
+    def get_valuations(self, proxy=None) -> dict:
+        self._stats.proxy = proxy
+        data = self._stats.valuations
+        return data
+
    def get_sustainability(self, proxy=None, as_dict=False):
        self._quote.proxy = proxy
        data = self._quote.sustainability
--- a/yfinance/scrapers/quote.py
+++ b/yfinance/scrapers/quote.py
@ -19,13 +19,11 @@ class Quote:
        self._calendar = None

        self._already_scraped = False
-        self._already_scraped_complementary = False

    @property
    def info(self) -> dict:
        if self._info is None:
            self._scrape(self.proxy)
-            self._scrape_complementary(self.proxy)

        return self._info

@ -154,59 +152,3 @@ class Quote:
                'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
        except Exception:
            pass
-
-    def _scrape_complementary(self, proxy):
-        if self._already_scraped_complementary:
-            return
-        self._already_scraped_complementary = True
-
-        self._scrape(proxy)
-        if self._info is None:
-            return
-
-        # Complementary key-statistics. For now just want 'trailing PEG ratio'
-        keys = {"trailingPegRatio"}
-        if keys:
-            # Simplified the original scrape code for key-statistics. Very expensive for fetching
-            # just one value, best if scraping most/all:
-            #
-            # p = _re.compile(r'root\.App\.main = (.*);')
-            # url = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self._ticker.ticker, self._ticker.ticker)
-            # try:
-            #     r = session.get(url, headers=utils.user_agent_headers)
-            #     data = _json.loads(p.findall(r.text)[0])
-            #     key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']["timeSeries"]
-            #     for k in keys:
-            #         if k not in key_stats or len(key_stats[k])==0:
-            #             # Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
-            #             v = None
-            #         else:
-            #             # Select most recent (last) raw value in list:
-            #             v = key_stats[k][-1]["reportedValue"]["raw"]
-            #         self._info[k] = v
-            # except Exception:
-            #     raise
-            #     pass
-            #
-            # For just one/few variable is faster to query directly:
-            url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(
-                self._data.ticker, self._data.ticker)
-            for k in keys:
-                url += "&type=" + k
-            # Request 6 months of data
-            start = pd.Timestamp.utcnow().floor("D") - datetime.timedelta(days=365 // 2)
-            start = int(start.timestamp())
-            end = pd.Timestamp.utcnow().ceil("D")
-            end = int(end.timestamp())
-            url += f"&period1={start}&period2={end}"
-
-            json_str = self._data.cache_get(url=url, proxy=proxy).text
-            json_data = json.loads(json_str)
-            key_stats = json_data["timeseries"]["result"][0]
-            if k not in key_stats:
-                # Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
-                v = None
-            else:
-                # Select most recent (last) raw value in list:
-                v = key_stats[k][-1]["reportedValue"]["raw"]
-            self._info[k] = v
--- a/yfinance/scrapers/stats.py
+++ b/yfinance/scrapers/stats.py
@ -0,0 +1,111 @@
+import datetime as _dt
+import re as _re
+
+import pandas as _pd
+
+from yfinance import utils
+from yfinance.data import TickerData
+
+from pprint import pprint
+
+class KeyStats:
+
+    def __init__(self, data: TickerData, proxy=None):
+        self._data = data
+        self.proxy = proxy
+
+        self._stats = None
+        self._valuations = None
+
+        self._already_scraped = False
+
+    @property
+    def stats(self) -> dict:
+        if self._stats is None:
+            self._scrape(self.proxy)
+        return self._stats
+
+    @property
+    def valuations(self) -> dict:
+        if self._valuations is None:
+            self._scrape(self.proxy)
+        return self._valuations
+
+    def _scrape(self, proxy):
+        if self._already_scraped:
+            return
+        self._already_scraped = True
+
+
+        data = self._data.get_json_data_stores('key-statistics', proxy)
+
+
+        self._stats = data['QuoteSummaryStore']
+        del self._stats["defaultKeyStatistics"]  # available in Ticker.info
+        del self._stats["financialData"]  # available in Ticker.info
+        exchange_tz = self._stats["quoteType"]["exchangeTimezoneName"]
+        try:
+            c = "calendarEvents"
+            for k in ["dividendDate", "exDividendDate"]:
+                self._stats[c][k] = _pd.to_datetime(self._stats[c][k], unit='s', utc=True)
+                if self._stats[c][k].time() == _dt.time(0):
+                    # Probably not UTC but meant to be in exchange timezone
+                    self._stats[c][k] = self._stats[c][k].tz_convert(None).tz_localize(exchange_tz)
+        except:
+            pass
+
+
+        ts = data['QuoteTimeSeriesStore']["timeSeries"]
+        trailing_series = []
+        year_series = []
+        for k in ts:
+            if len(ts[k]) == 0:
+                # Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
+                continue
+
+            if len(ts[k]) == 1:
+                date = _pd.to_datetime(ts[k][0]["asOfDate"])
+
+                v = ts[k][0]["reportedValue"]
+                if isinstance(v, dict):
+                    v = v["raw"]
+
+                k = _re.sub("^trailing", "", k)
+                trailing_series.append(_pd.Series([v], index=[date], name=k))
+
+            else:
+                if k == "timestamp":
+                    continue
+
+                dates = [d["asOfDate"] for d in ts[k]]
+                dates = _pd.to_datetime(dates)
+
+                has_raw = isinstance(ts[k][0]["reportedValue"], dict) and "raw" in ts[k][0]["reportedValue"]
+                if has_raw:
+                    values = [d["reportedValue"]["raw"] for d in ts[k]]
+                else:
+                    values = [d["reportedValue"] for d in ts[k]]
+
+                k = _re.sub("^quarterly", "", k)
+                year_series.append(_pd.Series(values, index=dates, name=k))
+
+        year_table = None
+        if len(year_series) > 0:
+            year_table = _pd.concat(year_series, axis=1)
+
+        trailing_table = None
+        if len(trailing_series) > 0:
+            trailing_table = _pd.concat(trailing_series, axis=1)
+
+        tables = [t for t in [year_table, trailing_table] if not t is None]
+        if len(tables) == 0:
+            table = _pd.DataFrame()
+        else:
+            if len(tables) == 1:
+                table = tables[0]
+            else:
+                table = _pd.concat(tables, axis=0)
+            table = table.T
+            table = table[table.columns.sort_values(ascending=False)]
+
+        self._valuations = table
--- a/yfinance/ticker.py
+++ b/yfinance/ticker.py
@ -137,6 +137,14 @@ class Ticker(TickerBase):
    def info(self) -> dict:
        return self.get_info()

+    @property
+    def stats(self) -> _pd.DataFrame:
+        return self.get_stats()
+
+    @property
+    def valuations(self) -> dict:
+        return self.get_valuations()
+
    @property
    def calendar(self) -> _pd.DataFrame:
        return self.get_calendar()
Author	SHA1	Message	Date
ValueRaider	620e29cf05	Fix valuations table construction	2023-01-21 14:36:27 +00:00
ValueRaider	b759ef03ca	Add Ticker.valuations	2023-01-21 14:24:46 +00:00
ValueRaider	fb9c72c35e	Implement scraping for 'key-statistics' via 'Ticker.stats'	2023-01-21 14:06:57 +00:00