Compare commits

...

3 Commits

Author SHA1 Message Date
ValueRaider 620e29cf05 Fix valuations table construction 2023-01-21 14:36:27 +00:00
ValueRaider b759ef03ca Add Ticker.valuations 2023-01-21 14:24:46 +00:00
ValueRaider fb9c72c35e Implement scraping for 'key-statistics' via 'Ticker.stats' 2023-01-21 14:06:57 +00:00
5 changed files with 135 additions and 65 deletions

View File

@ -63,6 +63,10 @@ msft = yf.Ticker("MSFT")
# get stock info
msft.info
# get stock price statistics
msft.stats
msft.valuations
# get historical market data
hist = msft.history(period="max")

View File

@ -40,6 +40,7 @@ from .scrapers.analysis import Analysis
from .scrapers.fundamentals import Fundamentals
from .scrapers.holders import Holders
from .scrapers.quote import Quote
from .scrapers.stats import KeyStats
import json as _json
_BASE_URL_ = 'https://query2.finance.yahoo.com'
@ -75,15 +76,9 @@ class TickerBase:
self._analysis = Analysis(self._data)
self._holders = Holders(self._data)
self._quote = Quote(self._data)
self._stats = KeyStats(self._data)
self._fundamentals = Fundamentals(self._data)
def stats(self, proxy=None):
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
# get info and sustainability
data = self._data.get_json_data_stores(proxy=proxy)["QuoteSummaryStore"]
return data
def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False, actions=True,
auto_adjust=True, back_adjust=False, repair=False, keepna=False,
@ -895,6 +890,16 @@ class TickerBase:
data = self._quote.info
return data
def get_stats(self, proxy=None) -> dict:
self._stats.proxy = proxy
data = self._stats.stats
return data
def get_valuations(self, proxy=None) -> dict:
self._stats.proxy = proxy
data = self._stats.valuations
return data
def get_sustainability(self, proxy=None, as_dict=False):
self._quote.proxy = proxy
data = self._quote.sustainability

View File

@ -19,13 +19,11 @@ class Quote:
self._calendar = None
self._already_scraped = False
self._already_scraped_complementary = False
@property
def info(self) -> dict:
if self._info is None:
self._scrape(self.proxy)
self._scrape_complementary(self.proxy)
return self._info
@ -154,59 +152,3 @@ class Quote:
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
except Exception:
pass
def _scrape_complementary(self, proxy):
if self._already_scraped_complementary:
return
self._already_scraped_complementary = True
self._scrape(proxy)
if self._info is None:
return
# Complementary key-statistics. For now just want 'trailing PEG ratio'
keys = {"trailingPegRatio"}
if keys:
# Simplified the original scrape code for key-statistics. Very expensive for fetching
# just one value, best if scraping most/all:
#
# p = _re.compile(r'root\.App\.main = (.*);')
# url = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self._ticker.ticker, self._ticker.ticker)
# try:
# r = session.get(url, headers=utils.user_agent_headers)
# data = _json.loads(p.findall(r.text)[0])
# key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']["timeSeries"]
# for k in keys:
# if k not in key_stats or len(key_stats[k])==0:
# # Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
# v = None
# else:
# # Select most recent (last) raw value in list:
# v = key_stats[k][-1]["reportedValue"]["raw"]
# self._info[k] = v
# except Exception:
# raise
# pass
#
# For just one/few variable is faster to query directly:
url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(
self._data.ticker, self._data.ticker)
for k in keys:
url += "&type=" + k
# Request 6 months of data
start = pd.Timestamp.utcnow().floor("D") - datetime.timedelta(days=365 // 2)
start = int(start.timestamp())
end = pd.Timestamp.utcnow().ceil("D")
end = int(end.timestamp())
url += f"&period1={start}&period2={end}"
json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
self._info[k] = v

View File

@ -0,0 +1,111 @@
import datetime as _dt
import re as _re
import pandas as _pd
from yfinance import utils
from yfinance.data import TickerData
from pprint import pprint
class KeyStats:
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._stats = None
self._valuations = None
self._already_scraped = False
@property
def stats(self) -> dict:
if self._stats is None:
self._scrape(self.proxy)
return self._stats
@property
def valuations(self) -> dict:
if self._valuations is None:
self._scrape(self.proxy)
return self._valuations
def _scrape(self, proxy):
if self._already_scraped:
return
self._already_scraped = True
data = self._data.get_json_data_stores('key-statistics', proxy)
self._stats = data['QuoteSummaryStore']
del self._stats["defaultKeyStatistics"] # available in Ticker.info
del self._stats["financialData"] # available in Ticker.info
exchange_tz = self._stats["quoteType"]["exchangeTimezoneName"]
try:
c = "calendarEvents"
for k in ["dividendDate", "exDividendDate"]:
self._stats[c][k] = _pd.to_datetime(self._stats[c][k], unit='s', utc=True)
if self._stats[c][k].time() == _dt.time(0):
# Probably not UTC but meant to be in exchange timezone
self._stats[c][k] = self._stats[c][k].tz_convert(None).tz_localize(exchange_tz)
except:
pass
ts = data['QuoteTimeSeriesStore']["timeSeries"]
trailing_series = []
year_series = []
for k in ts:
if len(ts[k]) == 0:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
continue
if len(ts[k]) == 1:
date = _pd.to_datetime(ts[k][0]["asOfDate"])
v = ts[k][0]["reportedValue"]
if isinstance(v, dict):
v = v["raw"]
k = _re.sub("^trailing", "", k)
trailing_series.append(_pd.Series([v], index=[date], name=k))
else:
if k == "timestamp":
continue
dates = [d["asOfDate"] for d in ts[k]]
dates = _pd.to_datetime(dates)
has_raw = isinstance(ts[k][0]["reportedValue"], dict) and "raw" in ts[k][0]["reportedValue"]
if has_raw:
values = [d["reportedValue"]["raw"] for d in ts[k]]
else:
values = [d["reportedValue"] for d in ts[k]]
k = _re.sub("^quarterly", "", k)
year_series.append(_pd.Series(values, index=dates, name=k))
year_table = None
if len(year_series) > 0:
year_table = _pd.concat(year_series, axis=1)
trailing_table = None
if len(trailing_series) > 0:
trailing_table = _pd.concat(trailing_series, axis=1)
tables = [t for t in [year_table, trailing_table] if not t is None]
if len(tables) == 0:
table = _pd.DataFrame()
else:
if len(tables) == 1:
table = tables[0]
else:
table = _pd.concat(tables, axis=0)
table = table.T
table = table[table.columns.sort_values(ascending=False)]
self._valuations = table

View File

@ -137,6 +137,14 @@ class Ticker(TickerBase):
def info(self) -> dict:
return self.get_info()
@property
def stats(self) -> _pd.DataFrame:
return self.get_stats()
@property
def valuations(self) -> dict:
return self.get_valuations()
@property
def calendar(self) -> _pd.DataFrame:
return self.get_calendar()